From 5a3fa8d6fb91677b12c3dafec30549bfc021b759 Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Fri, 8 Mar 2024 15:55:10 -0500 Subject: [PATCH 01/10] Updated with patches from antoyo and suggested changes from aclements. --- dwarf/cursor.cc | 6 ++++++ dwarf/dwarf.cc | 19 +++++++++++++++---- dwarf/internal.hh | 1 + dwarf/line.cc | 2 +- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/dwarf/cursor.cc b/dwarf/cursor.cc index 19902da..22b28b1 100644 --- a/dwarf/cursor.cc +++ b/dwarf/cursor.cc @@ -67,6 +67,12 @@ cursor::skip_initial_length() } } +void +cursor::skip_unit_type() +{ + pos += sizeof(sbyte); +} + section_offset cursor::offset() { diff --git a/dwarf/dwarf.cc b/dwarf/dwarf.cc index 2465eef..09d6fb0 100644 --- a/dwarf/dwarf.cc +++ b/dwarf/dwarf.cc @@ -273,12 +273,23 @@ compilation_unit::compilation_unit(const dwarf &file, section_offset offset) cursor sub(subsec); sub.skip_initial_length(); uhalf version = sub.fixed(); - if (version < 2 || version > 4) + (void)version; + if (version > 5) throw format_error("unknown compilation unit version " + std::to_string(version)); // .debug_abbrev-relative offset of this unit's abbrevs - section_offset debug_abbrev_offset = sub.offset(); - ubyte address_size = sub.fixed(); - subsec->addr_size = address_size; + section_offset debug_abbrev_offset; + if(version >= 5) + { + sub.skip_unit_type(); + ubyte address_size = sub.fixed(); + subsec->addr_size = address_size; + debug_abbrev_offset = sub.offset(); + } + else { + debug_abbrev_offset = sub.offset(); + ubyte address_size = sub.fixed(); + subsec->addr_size = address_size; + } m = make_shared(file, offset, subsec, debug_abbrev_offset, sub.get_section_offset()); diff --git a/dwarf/internal.hh b/dwarf/internal.hh index 7b89896..42679ca 100644 --- a/dwarf/internal.hh +++ b/dwarf/internal.hh @@ -178,6 +178,7 @@ struct cursor } void skip_initial_length(); + void skip_unit_type(); void skip_form(DW_FORM form); cursor &operator+=(section_offset offset) diff --git a/dwarf/line.cc b/dwarf/line.cc index be766d3..d3cbc4e 100644 --- a/dwarf/line.cc +++ b/dwarf/line.cc @@ -84,7 +84,7 @@ line_table::line_table(const shared_ptr
&sec, section_offset offset, m->program_offset = cur.get_section_offset() + header_length; m->minimum_instruction_length = cur.fixed(); m->maximum_operations_per_instruction = 1; - if (version == 4) + if (version >= 4) m->maximum_operations_per_instruction = cur.fixed(); if (m->maximum_operations_per_instruction == 0) throw format_error("maximum_operations_per_instruction cannot" From 76862fe577ec9fb60cb03737197a391e0da8f5bc Mon Sep 17 00:00:00 2001 From: Noah Lev Date: Fri, 8 Mar 2024 16:00:18 -0500 Subject: [PATCH 02/10] Remove incorrect use of const Found by compiling with clang++ instead of g++. --- elf/data.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elf/data.hh b/elf/data.hh index ed5c7a1..4a60944 100644 --- a/elf/data.hh +++ b/elf/data.hh @@ -553,7 +553,7 @@ struct Sym return (stb)(info >> 4); } - void set_binding(stb v) const + void set_binding(stb v) { info = (info & 0xF) | ((unsigned char)v << 4); } From 892389e23abce95cea9c5d40957d52d2ba8bb985 Mon Sep 17 00:00:00 2001 From: Noah Lev Date: Wed, 16 Oct 2024 19:11:52 -0400 Subject: [PATCH 03/10] Guard usage of `std::string::front` for empty string This code uses `front()` to get the underlying string buffer. However, when glibc++ assertions are enabled, this causes an assert failure if the string is empty. Since we have no need to perform the memmove if the string is empty, we can fix the crash by simply guarding with the condition `size > 0`. Note that this assert failure only occurred with glibc++ assertions enabled. Because Arch apparently enables them by default (while other distros don't), it initially appeared to be an Arch-specific problem. However, it's just that it only surfaced on Arch, while having the potential for issues on other platforms. --- dwarf/cursor.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dwarf/cursor.cc b/dwarf/cursor.cc index 22b28b1..d2df380 100644 --- a/dwarf/cursor.cc +++ b/dwarf/cursor.cc @@ -92,7 +92,9 @@ cursor::string(std::string &out) size_t size; const char *p = this->cstr(&size); out.resize(size); - memmove(&out.front(), p, size); + if (size > 0) { + memmove(&out.front(), p, size); + } } const char * From 8bd19d1e6906e3d27b71bd0687b68b0d252afddc Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Thu, 1 Jan 2026 17:55:40 -0500 Subject: [PATCH 04/10] Updated for DWARF5. --- Makefile | 14 -- dwarf/Makefile | 75 ---------- dwarf/abbrev.cc | 41 +++++- dwarf/cursor.cc | 22 +++ dwarf/data.hh | 35 +++++ dwarf/die.cc | 4 +- dwarf/dwarf++.hh | 14 +- dwarf/dwarf.cc | 2 +- dwarf/elf.cc | 1 + dwarf/internal.hh | 5 +- dwarf/line.cc | 349 ++++++++++++++++++++++++++++++++++++++++------ dwarf/value.cc | 25 +++- elf/Makefile | 73 ---------- examples/Makefile | 44 ------ 14 files changed, 438 insertions(+), 266 deletions(-) delete mode 100644 Makefile delete mode 100644 dwarf/Makefile delete mode 100644 elf/Makefile delete mode 100644 examples/Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index 30c8a14..0000000 --- a/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -all: - $(MAKE) -C elf - $(MAKE) -C dwarf - -install: - $(MAKE) -C elf install - $(MAKE) -C dwarf install - -clean: - $(MAKE) -C elf clean - $(MAKE) -C dwarf clean - -check: - cd test && ./test.sh diff --git a/dwarf/Makefile b/dwarf/Makefile deleted file mode 100644 index 2d47003..0000000 --- a/dwarf/Makefile +++ /dev/null @@ -1,75 +0,0 @@ -# Changed when ABI backwards compatibility is broken. -# Typically uses the major version. -SONAME = 0 - -CXXFLAGS+=-g -O2 -Werror -override CXXFLAGS+=-std=c++0x -Wall -fPIC - -all: libdwarf++.a libdwarf++.so.$(SONAME) libdwarf++.so libdwarf++.pc - -SRCS := dwarf.cc cursor.cc die.cc value.cc abbrev.cc \ - expr.cc rangelist.cc line.cc attrs.cc \ - die_str_map.cc elf.cc to_string.cc -HDRS := dwarf++.hh data.hh internal.hh small_vector.hh ../elf/to_hex.hh -CLEAN := - -libdwarf++.a: $(SRCS:.cc=.o) - ar rcs $@ $^ -CLEAN += libdwarf++.a $(SRCS:.cc=.o) - -$(SRCS:.cc=.o): $(HDRS) - -to_string.cc: ../elf/enum-print.py dwarf++.hh data.hh Makefile - @echo "// Automatically generated by make at $$(date)" > to_string.cc - @echo "// DO NOT EDIT" >> to_string.cc - @echo >> to_string.cc - @echo '#include "internal.hh"' >> to_string.cc - @echo >> to_string.cc - @echo 'DWARFPP_BEGIN_NAMESPACE' >> to_string.cc - @echo >> to_string.cc - python3 ../elf/enum-print.py < dwarf++.hh >> to_string.cc - python3 ../elf/enum-print.py -s _ -u --hex -x hi_user -x lo_user < data.hh >> to_string.cc - @echo 'DWARFPP_END_NAMESPACE' >> to_string.cc -CLEAN += to_string.cc - -libdwarf++.so.$(SONAME): $(SRCS:.cc=.o) - $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ -CLEAN += libdwarf++.so.* - -libdwarf++.so: - ln -s $@.$(SONAME) $@ -CLEAN += libdwarf++.so - -# Create pkg-config for local library and headers. This will be -# transformed in to the correct global pkg-config by install. -libdwarf++.pc: always - @(VER=$$(git describe --match 'v*' | sed -e s/^v//); \ - echo "libdir=$$PWD"; \ - echo "includedir=$$PWD"; \ - echo ""; \ - echo "Name: libdwarf++"; \ - echo "Description: C++11 DWARF library"; \ - echo "Version: $$VER"; \ - echo "Requires: libelf++ = $$VER"; \ - echo "Libs: -L\$${libdir} -ldwarf++"; \ - echo "Cflags: -I\$${includedir}") > $@ -CLEAN += libdwarf++.pc - -.PHONY: always - -PREFIX?=/usr/local - -install: libdwarf++.a libdwarf++.so.$(SONAME) libdwarf++.so libdwarf++.pc - install -d $(DESTDIR)$(PREFIX)/lib/pkgconfig - install -t $(DESTDIR)$(PREFIX)/lib libdwarf++.a - install -t $(DESTDIR)$(PREFIX)/lib libdwarf++.so.$(SONAME) - install -t $(DESTDIR)$(PREFIX)/lib libdwarf++.so - install -d $(DESTDIR)$(PREFIX)/include/libelfin/dwarf - install -t $(DESTDIR)$(PREFIX)/include/libelfin/dwarf data.hh dwarf++.hh small_vector.hh - sed 's,^libdir=.*,libdir=$(PREFIX)/lib,;s,^includedir=.*,includedir=$(PREFIX)/include,' libdwarf++.pc \ - > $(DESTDIR)$(PREFIX)/lib/pkgconfig/libdwarf++.pc - -clean: - rm -f $(CLEAN) - -.DELETE_ON_ERROR: diff --git a/dwarf/abbrev.cc b/dwarf/abbrev.cc index f77dc0c..25c69aa 100644 --- a/dwarf/abbrev.cc +++ b/dwarf/abbrev.cc @@ -98,12 +98,37 @@ resolve_type(DW_AT name, DW_FORM form) case DW_FORM::ref_addr: case DW_FORM::ref_sig8: case DW_FORM::ref_udata: + case DW_FORM::ref_sup4: + case DW_FORM::ref_sup8: return value::type::reference; case DW_FORM::string: case DW_FORM::strp: + case DW_FORM::line_strp: + case DW_FORM::strp_sup: + case DW_FORM::strx: + case DW_FORM::strx1: + case DW_FORM::strx2: + case DW_FORM::strx3: + case DW_FORM::strx4: return value::type::string; + case DW_FORM::addrx: + case DW_FORM::addrx1: + case DW_FORM::addrx2: + case DW_FORM::addrx3: + case DW_FORM::addrx4: + return value::type::address; + + case DW_FORM::implicit_const: + return value::type::constant; + + case DW_FORM::loclistx: + return value::type::loclist; + + case DW_FORM::rnglistx: + return value::type::rangelist; + case DW_FORM::indirect: // There's nothing meaningful we can do return value::type::invalid; @@ -138,15 +163,18 @@ resolve_type(DW_AT name, DW_FORM form) return value::type::invalid; default: - throw format_error("DW_FORM_sec_offset not expected for attribute " + - to_string(name)); + // DWARF 5 has many new attributes using sec_offset + // (str_offsets_base, addr_base, rnglists_base, loclists_base, etc.) + // Just treat them as invalid for now to allow skipping + return value::type::invalid; } } throw format_error("unknown attribute form " + to_string(form)); } -attribute_spec::attribute_spec(DW_AT name, DW_FORM form) - : name(name), form(form), type(resolve_type(name, form)) +attribute_spec::attribute_spec(DW_AT name, DW_FORM form, int64_t implicit_const) + : name(name), form(form), type(resolve_type(name, form)), + implicit_const(implicit_const) { } @@ -167,7 +195,10 @@ abbrev_entry::read(cursor *cur) DW_FORM form = (DW_FORM)cur->uleb128(); if (name == (DW_AT)0 && form == (DW_FORM)0) break; - attributes.push_back(attribute_spec(name, form)); + int64_t implicit_const = 0; + if (form == DW_FORM::implicit_const) + implicit_const = cur->sleb128(); + attributes.push_back(attribute_spec(name, form, implicit_const)); } attributes.shrink_to_fit(); return true; diff --git a/dwarf/cursor.cc b/dwarf/cursor.cc index d2df380..ea59641 100644 --- a/dwarf/cursor.cc +++ b/dwarf/cursor.cc @@ -125,6 +125,8 @@ cursor::skip_form(DW_FORM form) case DW_FORM::sec_offset: case DW_FORM::ref_addr: case DW_FORM::strp: + case DW_FORM::line_strp: + case DW_FORM::strp_sup: switch (sec->fmt) { case format::dwarf32: pos += 4; @@ -158,29 +160,49 @@ cursor::skip_form(DW_FORM form) // fixed-length forms case DW_FORM::flag_present: + case DW_FORM::implicit_const: break; case DW_FORM::flag: case DW_FORM::data1: case DW_FORM::ref1: + case DW_FORM::strx1: + case DW_FORM::addrx1: pos += 1; break; case DW_FORM::data2: case DW_FORM::ref2: + case DW_FORM::strx2: + case DW_FORM::addrx2: pos += 2; break; + case DW_FORM::strx3: + case DW_FORM::addrx3: + pos += 3; + break; case DW_FORM::data4: case DW_FORM::ref4: + case DW_FORM::ref_sup4: + case DW_FORM::strx4: + case DW_FORM::addrx4: pos += 4; break; case DW_FORM::data8: case DW_FORM::ref_sig8: + case DW_FORM::ref_sup8: pos += 8; break; + case DW_FORM::data16: + pos += 16; + break; // variable-length forms case DW_FORM::sdata: case DW_FORM::udata: case DW_FORM::ref_udata: + case DW_FORM::strx: + case DW_FORM::addrx: + case DW_FORM::loclistx: + case DW_FORM::rnglistx: while (pos < sec->end && (*(uint8_t*)pos & 0x80)) pos++; pos++; diff --git a/dwarf/data.hh b/dwarf/data.hh index e6002b7..5ba1345 100644 --- a/dwarf/data.hh +++ b/dwarf/data.hh @@ -250,7 +250,27 @@ enum class DW_FORM sec_offset = 0x17, // lineptr, loclistptr, macptr, rangelistptr exprloc = 0x18, // exprloc flag_present = 0x19, // flag + + // DWARF 5 + strx = 0x1a, // string index in .debug_str_offsets + addrx = 0x1b, // address index in .debug_addr + ref_sup4 = 0x1c, // reference + strp_sup = 0x1d, // string + data16 = 0x1e, // constant + line_strp = 0x1f, // string ref_sig8 = 0x20, // reference + implicit_const = 0x21, // constant encoded in abbrev + loclistx = 0x22, // location list index + rnglistx = 0x23, // range list index + ref_sup8 = 0x24, // reference + strx1 = 0x25, // 1-byte string index + strx2 = 0x26, // 2-byte string index + strx3 = 0x27, // 3-byte string index + strx4 = 0x28, // 4-byte string index + addrx1 = 0x29, // 1-byte address index + addrx2 = 0x2a, // 2-byte address index + addrx3 = 0x2b, // 3-byte address index + addrx4 = 0x2c, // 4-byte address index }; std::string @@ -560,6 +580,21 @@ enum class DW_LNE std::string to_string(DW_LNE v); +// Line number content types (DWARF5 section 7.22 table 7.30) +enum class DW_LNCT +{ + path = 0x01, + directory_index = 0x02, + timestamp = 0x03, + size = 0x04, + MD5 = 0x05, + lo_user = 0x2000, + hi_user = 0x3fff, +}; + +std::string +to_string(DW_LNCT v); + DWARFPP_END_NAMESPACE #endif diff --git a/dwarf/die.cc b/dwarf/die.cc index a87c018..cd3f390 100644 --- a/dwarf/die.cc +++ b/dwarf/die.cc @@ -74,7 +74,7 @@ die::operator[](DW_AT attr) const int i = 0; for (auto &a : abbrev->attributes) { if (a.name == attr) - return value(cu, a.name, a.form, a.type, attrs[i]); + return value(cu, a, attrs[i]); i++; } } @@ -174,7 +174,7 @@ die::attributes() const // custom iterator. int i = 0; for (auto &a : abbrev->attributes) { - res.push_back(make_pair(a.name, value(cu, a.name, a.form, a.type, attrs[i]))); + res.push_back(make_pair(a.name, value(cu, a, attrs[i]))); i++; } return res; diff --git a/dwarf/dwarf++.hh b/dwarf/dwarf++.hh index a53f87e..a0b0816 100644 --- a/dwarf/dwarf++.hh +++ b/dwarf/dwarf++.hh @@ -38,6 +38,7 @@ class line_table; // Internal type forward-declarations struct section; struct abbrev_entry; +struct attribute_spec; struct cursor; // XXX Audit for binary-compatibility @@ -85,6 +86,7 @@ enum class section_type frame, info, line, + line_str, loc, macinfo, pubnames, @@ -567,7 +569,8 @@ public: /** * Construct a value with type `type::invalid`. */ - value() : cu(nullptr), typ(type::invalid) { } + value() : cu(nullptr), form(DW_FORM::addr), typ(type::invalid), + offset(0), has_implicit_const(false), implicit_const(0) { } value(const value &o) = default; value(value &&o) = default; @@ -705,7 +708,7 @@ private: friend class die; value(const unit *cu, - DW_AT name, DW_FORM form, type typ, section_offset offset); + const attribute_spec &spec, section_offset offset); void resolve_indirect(DW_AT name); @@ -713,6 +716,8 @@ private: DW_FORM form; type typ; section_offset offset; + bool has_implicit_const; + int64_t implicit_const; }; std::string @@ -1079,7 +1084,7 @@ public: */ line_table(const std::shared_ptr
&sec, section_offset offset, unsigned cu_addr_size, const std::string &cu_comp_dir, - const std::string &cu_name); + const std::string &cu_name, const dwarf *dw = nullptr); /** * Construct an invalid, empty line table. @@ -1122,6 +1127,7 @@ public: */ iterator end() const; + /** * Return an iterator to the line table entry containing addr * (roughly, the entry with the highest address less than or @@ -1264,7 +1270,7 @@ public: * for all fields. is_stmt has no default value, so the * caller must provide it. */ - void reset(bool is_stmt); + void reset(bool is_stmt, unsigned default_file_index); /** * Return a descriptive string of the form diff --git a/dwarf/dwarf.cc b/dwarf/dwarf.cc index 09d6fb0..d763295 100644 --- a/dwarf/dwarf.cc +++ b/dwarf/dwarf.cc @@ -314,7 +314,7 @@ compilation_unit::get_line_table() const m->lt = line_table(sec, d[DW_AT::stmt_list].as_sec_offset(), m->subsec->addr_size, comp_dir, - at_name(d)); + at_name(d), &m->file); } done: return m->lt; diff --git a/dwarf/elf.cc b/dwarf/elf.cc index baf8e67..11add00 100644 --- a/dwarf/elf.cc +++ b/dwarf/elf.cc @@ -20,6 +20,7 @@ static const struct {".debug_frame", section_type::frame}, {".debug_info", section_type::info}, {".debug_line", section_type::line}, + {".debug_line_str", section_type::line_str}, {".debug_loc", section_type::loc}, {".debug_macinfo", section_type::macinfo}, {".debug_pubnames", section_type::pubnames}, diff --git a/dwarf/internal.hh b/dwarf/internal.hh index 42679ca..ac30c34 100644 --- a/dwarf/internal.hh +++ b/dwarf/internal.hh @@ -230,7 +230,10 @@ struct attribute_spec // Computed information value::type type; - attribute_spec(DW_AT name, DW_FORM form); + // For DW_FORM_implicit_const, stores the SLEB128 constant. + int64_t implicit_const; + + attribute_spec(DW_AT name, DW_FORM form, int64_t implicit_const = 0); }; typedef std::uint64_t abbrev_code; diff --git a/dwarf/line.cc b/dwarf/line.cc index d3cbc4e..e66cf1c 100644 --- a/dwarf/line.cc +++ b/dwarf/line.cc @@ -24,9 +24,19 @@ static const int opcode_lengths[] = { struct line_table::impl { + struct entry_format { + DW_LNCT content; + DW_FORM form; + }; + shared_ptr
sec; + const dwarf *dw; + shared_ptr
line_str_sec; + shared_ptr
str_sec; + string comp_dir; // Header information + uhalf version; section_offset program_offset; ubyte minimum_instruction_length; ubyte maximum_operations_per_instruction; @@ -34,9 +44,11 @@ struct line_table::impl sbyte line_base; ubyte line_range; ubyte opcode_base; + unsigned file_index_base; vector standard_opcode_lengths; vector include_directories; vector file_names; + vector file_entry_formats; // The offset in sec following the last read file name entry. // File name entries can appear both in the line table header @@ -49,42 +61,63 @@ struct line_table::impl // know we've gathered all file names. bool file_names_complete; - impl() : last_file_name_end(0), file_names_complete(false) {}; + impl() : dw(nullptr), version(0), file_index_base(1), + last_file_name_end(0), + file_names_complete(false) {}; bool read_file_entry(cursor *cur, bool in_header); + void add_include_directory(const string &dir); + void add_file_entry(string file_name, uint64_t dir_index, + uint64_t mtime, uint64_t length); + vector read_entry_formats(cursor *cur); + void read_v5_directory_table(cursor *cur); + void read_v5_file_table(cursor *cur); + void read_file_entry_v5(cursor *cur); + string read_form_string(cursor *cur, DW_FORM form); + uint64_t read_form_unsigned(cursor *cur, DW_FORM form); + string read_string_from_section(section_type type, section_offset off); }; line_table::line_table(const shared_ptr
&sec, section_offset offset, unsigned cu_addr_size, const string &cu_comp_dir, - const string &cu_name) + const string &cu_name, const dwarf *dw) : m(make_shared()) { - // XXX DWARF2 and 3 give a weird specification for DW_AT_comp_dir + m->dw = dw; + // XXX DWARF2 and 3 give a weird specification for DW_AT_comp_dir string comp_dir, abs_path; if (cu_comp_dir.empty() || cu_comp_dir.back() == '/') comp_dir = cu_comp_dir; else comp_dir = cu_comp_dir + '/'; + m->comp_dir = comp_dir; // Read the line table header (DWARF2 section 6.2.4, DWARF3 - // section 6.2.4, DWARF4 section 6.2.3) + // section 6.2.4, DWARF4 section 6.2.3, DWARF5 section 6.2.4) cursor cur(sec, offset); m->sec = cur.subsection(); cur = cursor(m->sec); cur.skip_initial_length(); - m->sec->addr_size = cu_addr_size; // Basic header information - uhalf version = cur.fixed(); - if (version < 2 || version > 4) + m->version = cur.fixed(); + if (m->version < 2 || m->version > 5) throw format_error("unknown line number table version " + - std::to_string(version)); + std::to_string(m->version)); + if (m->version >= 5) { + m->sec->addr_size = cur.fixed(); + ubyte segment_selector_size = cur.fixed(); + (void)segment_selector_size; + } else { + m->sec->addr_size = cu_addr_size; + } + m->file_index_base = (m->version >= 5) ? 0 : 1; section_length header_length = cur.offset(); m->program_offset = cur.get_section_offset() + header_length; m->minimum_instruction_length = cur.fixed(); m->maximum_operations_per_instruction = 1; - if (version >= 4) + if (m->version >= 4) m->maximum_operations_per_instruction = cur.fixed(); if (m->maximum_operations_per_instruction == 0) throw format_error("maximum_operations_per_instruction cannot" @@ -95,7 +128,7 @@ line_table::line_table(const shared_ptr
&sec, section_offset offset, if (m->line_range == 0) throw format_error("line_range cannot be 0 in line number table"); m->opcode_base = cur.fixed(); - + static_assert(sizeof(opcode_lengths) / sizeof(opcode_lengths[0]) == 13, "opcode_lengths table has wrong length"); @@ -118,31 +151,45 @@ line_table::line_table(const shared_ptr
&sec, section_offset offset, } // Include directories list - string incdir; - // Include directory 0 is implicitly the compilation unit - // current directory - m->include_directories.push_back(comp_dir); - while (true) { - cur.string(incdir); - if (incdir.empty()) - break; - if (incdir.back() != '/') - incdir += '/'; - if (incdir[0] == '/') - m->include_directories.push_back(move(incdir)); - else - m->include_directories.push_back(comp_dir + incdir); + m->include_directories.clear(); + if (m->version < 5) + m->include_directories.push_back(m->comp_dir); + if (m->version >= 5) { + m->read_v5_directory_table(&cur); + } else { + string incdir; + while (true) { + cur.string(incdir); + if (incdir.empty()) + break; + if (incdir.back() != '/') + incdir += '/'; + if (incdir[0] == '/') + m->include_directories.push_back(move(incdir)); + else + m->include_directories.push_back(comp_dir + incdir); + } } // File name list string file_name; - // File name 0 is implicitly the compilation unit file name. - // cu_name can be relative to comp_dir or absolute. - if (!cu_name.empty() && cu_name[0] == '/') - m->file_names.emplace_back(cu_name); - else - m->file_names.emplace_back(comp_dir + cu_name); - while (m->read_file_entry(&cur, true)); + if (m->version >= 5) { + m->read_v5_file_table(&cur); + if (m->file_names.empty()) { + if (!cu_name.empty() && cu_name[0] == '/') + m->file_names.emplace_back(cu_name); + else + m->file_names.emplace_back(comp_dir + cu_name); + } + } else { + // File name 0 is implicitly the compilation unit file name. + // cu_name can be relative to comp_dir or absolute. + if (!cu_name.empty() && cu_name[0] == '/') + m->file_names.emplace_back(cu_name); + else + m->file_names.emplace_back(comp_dir + cu_name); + while (m->read_file_entry(&cur, true)); + } } line_table::iterator @@ -204,6 +251,11 @@ line_table::impl::read_file_entry(cursor *cur, bool in_header) { assert(cur->sec == sec); + if (version >= 5) { + read_file_entry_v5(cur); + return true; + } + string file_name; cur->string(file_name); if (in_header && file_name.empty()) @@ -217,17 +269,227 @@ line_table::impl::read_file_entry(cursor *cur, bool in_header) return true; last_file_name_end = cur->get_section_offset(); - if (file_name[0] == '/') + if (file_name.empty()) + return false; + + add_file_entry(move(file_name), dir_index, mtime, length); + + return true; +} + +void +line_table::impl::add_include_directory(const string &dir) +{ + string resolved = dir; + if (!resolved.empty() && resolved.back() != '/') + resolved += '/'; + if (!resolved.empty() && resolved[0] != '/' && !comp_dir.empty()) + resolved = comp_dir + resolved; + if (resolved.empty()) + resolved = comp_dir; + include_directories.push_back(move(resolved)); +} + +void +line_table::impl::add_file_entry(string file_name, uint64_t dir_index, + uint64_t mtime, uint64_t length) +{ + if (file_name.empty()) + throw format_error("file entry missing file name"); + if (file_name[0] == '/') { file_names.emplace_back(move(file_name), mtime, length); - else if (dir_index < include_directories.size()) - file_names.emplace_back( - include_directories[dir_index] + file_name, - mtime, length); - else + return; + } + + const string *base = nullptr; + if (dir_index < include_directories.size()) + base = &include_directories[dir_index]; + else if (dir_index == 0 && version < 5 && !comp_dir.empty()) + base = &comp_dir; + if (!base) throw format_error("file name directory index out of range: " + std::to_string(dir_index)); + file_names.emplace_back(*base + file_name, mtime, length); +} - return true; +vector +line_table::impl::read_entry_formats(cursor *cur) +{ + vector formats; + uint64_t count = cur->uleb128(); + formats.reserve(count); + for (uint64_t i = 0; i < count; ++i) { + entry_format fmt; + fmt.content = (DW_LNCT)cur->uleb128(); + fmt.form = (DW_FORM)cur->uleb128(); + formats.push_back(fmt); + } + return formats; +} + +void +line_table::impl::read_v5_directory_table(cursor *cur) +{ + auto formats = read_entry_formats(cur); + uint64_t count = cur->uleb128(); + for (uint64_t i = 0; i < count; ++i) { + string path; + for (auto &fmt : formats) { + switch (fmt.content) { + case DW_LNCT::path: + path = read_form_string(cur, fmt.form); + break; + default: + cur->skip_form(fmt.form); + break; + } + } + add_include_directory(path); + } +} + +void +line_table::impl::read_v5_file_table(cursor *cur) +{ + file_entry_formats = read_entry_formats(cur); + uint64_t count = cur->uleb128(); + for (uint64_t i = 0; i < count; ++i) { + string file_name; + uint64_t dir_index = 0; + uint64_t mtime = 0; + uint64_t length = 0; + for (auto &fmt : file_entry_formats) { + switch (fmt.content) { + case DW_LNCT::path: + file_name = read_form_string(cur, fmt.form); + break; + case DW_LNCT::directory_index: + dir_index = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::timestamp: + mtime = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::size: + length = read_form_unsigned(cur, fmt.form); + break; + default: + cur->skip_form(fmt.form); + break; + } + } + if (!file_name.empty()) + add_file_entry(move(file_name), dir_index, mtime, length); + } +} + +void +line_table::impl::read_file_entry_v5(cursor *cur) +{ + if (file_entry_formats.empty()) + throw format_error("line table missing file name entry formats"); + + string file_name; + uint64_t dir_index = 0; + uint64_t mtime = 0; + uint64_t length = 0; + for (auto &fmt : file_entry_formats) { + switch (fmt.content) { + case DW_LNCT::path: + file_name = read_form_string(cur, fmt.form); + break; + case DW_LNCT::directory_index: + dir_index = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::timestamp: + mtime = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::size: + length = read_form_unsigned(cur, fmt.form); + break; + default: + cur->skip_form(fmt.form); + break; + } + } + + section_offset entry_end = cur->get_section_offset(); + if (entry_end <= last_file_name_end) + return; + last_file_name_end = entry_end; + + if (!file_name.empty()) + add_file_entry(move(file_name), dir_index, mtime, length); +} + +string +line_table::impl::read_form_string(cursor *cur, DW_FORM form) +{ + switch (form) { + case DW_FORM::string: { + string res; + cur->string(res); + return res; + } + case DW_FORM::line_strp: + return read_string_from_section(section_type::line_str, + cur->offset()); + case DW_FORM::strp: + return read_string_from_section(section_type::str, + cur->offset()); + default: + throw format_error("unsupported string form in line table: " + + to_string(form)); + } +} + +uint64_t +line_table::impl::read_form_unsigned(cursor *cur, DW_FORM form) +{ + switch (form) { + case DW_FORM::data1: + return cur->fixed(); + case DW_FORM::data2: + return cur->fixed(); + case DW_FORM::data4: + return cur->fixed(); + case DW_FORM::data8: + return cur->fixed(); + case DW_FORM::udata: + return cur->uleb128(); + case DW_FORM::sdata: + return (uint64_t)cur->sleb128(); + default: + throw format_error("unsupported numeric form in line table: " + + to_string(form)); + } +} + +string +line_table::impl::read_string_from_section(section_type type, + section_offset off) +{ + shared_ptr
*cache = nullptr; + switch (type) { + case section_type::line_str: + cache = &line_str_sec; + break; + case section_type::str: + cache = &str_sec; + break; + default: + throw format_error("unsupported string section"); + } + + if (!cache->get()) { + if (!dw) + throw format_error("line table requires DWARF context to read strings"); + *cache = dw->get_section(type); + } + + cursor scur(*cache, off); + string res; + scur.string(res); + return res; } line_table::file::file(string path, uint64_t mtime, uint64_t length) @@ -236,11 +498,12 @@ line_table::file::file(string path, uint64_t mtime, uint64_t length) } void -line_table::entry::reset(bool is_stmt) +line_table::entry::reset(bool is_stmt, unsigned default_file_index) { address = op_index = 0; file = nullptr; - file_index = line = 1; + file_index = default_file_index; + line = 1; column = 0; this->is_stmt = is_stmt; basic_block = end_sequence = prologue_end = epilogue_begin = false; @@ -263,7 +526,7 @@ line_table::iterator::iterator(const line_table *table, section_offset pos) : table(table), pos(pos) { if (table) { - regs.reset(table->m->default_is_stmt); + regs.reset(table->m->default_is_stmt, table->m->file_index_base); ++(*this); } } @@ -403,8 +666,8 @@ line_table::iterator::step(cursor *cur) case DW_LNE::end_sequence: regs.end_sequence = true; entry = regs; - regs.reset(m->default_is_stmt); - break; + regs.reset(m->default_is_stmt, m->file_index_base); + break; case DW_LNE::set_address: regs.address = cur->address(); regs.op_index = 0; diff --git a/dwarf/value.cc b/dwarf/value.cc index 2ab6431..8694dfd 100644 --- a/dwarf/value.cc +++ b/dwarf/value.cc @@ -11,10 +11,15 @@ using namespace std; DWARFPP_BEGIN_NAMESPACE value::value(const unit *cu, - DW_AT name, DW_FORM form, type typ, section_offset offset) - : cu(cu), form(form), typ(typ), offset(offset) { + const attribute_spec &spec, section_offset offset) + : cu(cu), + form(spec.form), + typ(spec.type), + offset(offset), + has_implicit_const(spec.form == DW_FORM::implicit_const), + implicit_const(spec.implicit_const) { if (form == DW_FORM::indirect) - resolve_indirect(name); + resolve_indirect(spec.name); } section_offset @@ -76,6 +81,8 @@ value::as_uconstant() const return cur.fixed(); case DW_FORM::udata: return cur.uleb128(); + case DW_FORM::implicit_const: + return static_cast(implicit_const); default: throw value_type_mismatch("cannot read " + to_string(typ) + " as uconstant"); } @@ -96,6 +103,8 @@ value::as_sconstant() const return cur.fixed(); case DW_FORM::sdata: return cur.sleb128(); + case DW_FORM::implicit_const: + return implicit_const; default: throw value_type_mismatch("cannot read " + to_string(typ) + " as sconstant"); } @@ -245,6 +254,11 @@ value::as_cstr(size_t *size_out) const cursor scur(cu->get_dwarf().get_section(section_type::str), off); return scur.cstr(size_out); } + case DW_FORM::line_strp: { + section_offset off = cur.offset(); + cursor scur(cu->get_dwarf().get_section(section_type::line_str), off); + return scur.cstr(size_out); + } default: throw value_type_mismatch("cannot read " + to_string(typ) + " as string"); } @@ -279,7 +293,10 @@ value::resolve_indirect(DW_AT name) do { form = (DW_FORM)c.uleb128(); } while (form == DW_FORM::indirect); - typ = attribute_spec(name, form).type; + attribute_spec spec(name, form); + typ = spec.type; + has_implicit_const = (form == DW_FORM::implicit_const); + implicit_const = spec.implicit_const; offset = c.get_section_offset(); } diff --git a/elf/Makefile b/elf/Makefile deleted file mode 100644 index f598328..0000000 --- a/elf/Makefile +++ /dev/null @@ -1,73 +0,0 @@ -# Changed when ABI backwards compatibility is broken. -# Typically uses the major version. -SONAME = 0 - -CXXFLAGS+=-g -O2 -Werror -override CXXFLAGS+=-std=c++0x -Wall -fPIC - -all: libelf++.a libelf++.so libelf++.so.$(SONAME) libelf++.pc - -SRCS := elf.cc mmap_loader.cc to_string.cc -HDRS := elf++.hh data.hh common.hh to_hex.hh -CLEAN := - -libelf++.a: $(SRCS:.cc=.o) - ar rcs $@ $^ -CLEAN += libelf++.a $(SRCS:.cc=.o) - -$(SRCS:.cc=.o): $(HDRS) - -to_string.cc: enum-print.py data.hh Makefile - @echo "// Automatically generated by make at $$(date)" > to_string.cc - @echo "// DO NOT EDIT" >> to_string.cc - @echo >> to_string.cc - @echo '#include "data.hh"' >> to_string.cc - @echo '#include "to_hex.hh"' >> to_string.cc - @echo >> to_string.cc - @echo 'ELFPP_BEGIN_NAMESPACE' >> to_string.cc - @echo >> to_string.cc - python3 enum-print.py -u --hex --no-type --mask shf --mask pf \ - -x loos -x hios -x loproc -x hiproc < data.hh >> to_string.cc - @echo 'ELFPP_END_NAMESPACE' >> to_string.cc -CLEAN += to_string.cc - -libelf++.so.$(SONAME): $(SRCS:.cc=.o) - $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ -CLEAN += libelf++.so.* - -libelf++.so: - ln -s $@.$(SONAME) $@ -CLEAN += libelf++.so - -# Create pkg-config for local library and headers. This will be -# transformed in to the correct global pkg-config by install. -libelf++.pc: always - @(VER=$$(git describe --match 'v*' | sed -e s/^v//); \ - echo "libdir=$$PWD"; \ - echo "includedir=$$PWD"; \ - echo ""; \ - echo "Name: libelf++"; \ - echo "Description: C++11 ELF library"; \ - echo "Version: $$VER"; \ - echo "Libs: -L\$${libdir} -lelf++"; \ - echo "Cflags: -I\$${includedir}") > $@ -CLEAN += libelf++.pc - -.PHONY: always - -PREFIX?=/usr/local - -install: libelf++.a libelf++.so libelf++.so.$(SONAME) libelf++.pc - install -d $(DESTDIR)$(PREFIX)/lib/pkgconfig - install -t $(DESTDIR)$(PREFIX)/lib libelf++.a - install -t $(DESTDIR)$(PREFIX)/lib libelf++.so.$(SONAME) - install -t $(DESTDIR)$(PREFIX)/lib libelf++.so - install -d $(DESTDIR)$(PREFIX)/include/libelfin/elf - install -t $(DESTDIR)$(PREFIX)/include/libelfin/elf common.hh data.hh elf++.hh - sed 's,^libdir=.*,libdir=$(PREFIX)/lib,;s,^includedir=.*,includedir=$(PREFIX)/include,' libelf++.pc \ - > $(DESTDIR)$(PREFIX)/lib/pkgconfig/libelf++.pc - -clean: - rm -f $(CLEAN) - -.DELETE_ON_ERROR: diff --git a/examples/Makefile b/examples/Makefile deleted file mode 100644 index 4fa66af..0000000 --- a/examples/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -CXXFLAGS+=-g -O2 -Werror -override CXXFLAGS+=-std=c++0x -Wall - -CLEAN := - -all: dump-sections dump-segments dump-syms dump-tree dump-lines find-pc - -# Find libs -export PKG_CONFIG_PATH=../elf:../dwarf -CPPFLAGS+=$$(pkg-config --cflags libelf++ libdwarf++) -# Statically link against our libs to keep the example binaries simple -# and dependencies correct. -LIBS=../dwarf/libdwarf++.a ../elf/libelf++.a - -# Dependencies -CPPFLAGS+=-MD -MP -MF .$@.d --include .*.d - -dump-sections: dump-sections.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-sections dump-sections.o - -dump-segments: dump-segments.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-segments dump-segments.o - -dump-syms: dump-syms.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-syms dump-syms.o - -dump-tree: dump-tree.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-tree dump-tree.o - -dump-lines: dump-lines.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-lines dump-lines.o - -find-pc: find-pc.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += find-pc find-pc.o - -clean: - rm -f $(CLEAN) .*.d From adcf6c91cddd473ef7814342b7b9ade1be11e071 Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Thu, 1 Jan 2026 18:05:35 -0500 Subject: [PATCH 05/10] Added. --- CMakeLists.txt | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b07c819 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,42 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file LICENSE.rst or https://cmake.org/licensing for details. + +cmake_minimum_required(VERSION 4.2.0) + +# Reject any attempt to use a toolchain file. We must not use one because +# we could be downloading it here. If the CMAKE_TOOLCHAIN_FILE environment +# variable is set, the cache variable will have been initialized from it. +unset(CMAKE_TOOLCHAIN_FILE CACHE) +unset(ENV{CMAKE_TOOLCHAIN_FILE}) + +# We name the project and the target for the ExternalProject_Add() call +# to something that will highlight to the user what we are working on if +# something goes wrong and an error message is produced. + +project(libelfin-populate NONE) + + +# Pass through things we've already detected in the main project to avoid +# paying the cost of redetecting them again in ExternalProject_Add() +set(GIT_EXECUTABLE [==[/opt/homebrew/bin/git]==]) +set(Git_VERSION [==[2.52.0]==]) +set_property(GLOBAL PROPERTY _CMAKE_FindGit_GIT_EXECUTABLE_VERSION + [==[/opt/homebrew/bin/git;2.52.0]==] +) + + +include(ExternalProject) +ExternalProject_Add(libelfin-populate + "UPDATE_DISCONNECTED" "False" "GIT_REPOSITORY" "https://github.com/plasma-umass/libelfin.git" "EXTERNALPROJECT_INTERNAL_ARGUMENT_SEPARATOR" "GIT_TAG" "8bd19d1e4bb19ec1046d44e1ac1b3bb72b91d0c5" "GIT_SHALLOW" "TRUE" + SOURCE_DIR "/Users/emery/git/coz-portage/_deps/libelfin-src" + BINARY_DIR "/Users/emery/git/coz-portage/_deps/libelfin-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + USES_TERMINAL_DOWNLOAD YES + USES_TERMINAL_UPDATE YES + USES_TERMINAL_PATCH YES +) + + From 8779775beea1ee5444ba53d14e354dbcf42a811d Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Thu, 1 Jan 2026 19:04:20 -0500 Subject: [PATCH 06/10] Add pre-built to_string.cc files for CMake integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These files are normally generated by enum-print.py during Makefile builds. Including them in the repo enables CMake-based builds (like FetchContent) to work without running the Python generator. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- dwarf/.gitignore | 1 - dwarf/to_string.cc | 576 +++++++++++++++++++++++++++++++++++++++++++++ elf/.gitignore | 1 - elf/to_string.cc | 160 +++++++++++++ 4 files changed, 736 insertions(+), 2 deletions(-) create mode 100644 dwarf/to_string.cc create mode 100644 elf/to_string.cc diff --git a/dwarf/.gitignore b/dwarf/.gitignore index 1049e91..3154398 100644 --- a/dwarf/.gitignore +++ b/dwarf/.gitignore @@ -1,5 +1,4 @@ *.o -to_string.cc libdwarf++.a libdwarf++.so libdwarf++.so.* diff --git a/dwarf/to_string.cc b/dwarf/to_string.cc new file mode 100644 index 0000000..c873bd7 --- /dev/null +++ b/dwarf/to_string.cc @@ -0,0 +1,576 @@ +// Automatically generated by make at Sat Nov 29 03:46:33 PM EST 2025 +// DO NOT EDIT + +#include "internal.hh" + +DWARFPP_BEGIN_NAMESPACE + +std::string +to_string(section_type v) +{ + switch (v) { + case section_type::abbrev: return "section_type::abbrev"; + case section_type::aranges: return "section_type::aranges"; + case section_type::frame: return "section_type::frame"; + case section_type::info: return "section_type::info"; + case section_type::line: return "section_type::line"; + case section_type::line_str: return "section_type::line_str"; + case section_type::loc: return "section_type::loc"; + case section_type::macinfo: return "section_type::macinfo"; + case section_type::pubnames: return "section_type::pubnames"; + case section_type::pubtypes: return "section_type::pubtypes"; + case section_type::ranges: return "section_type::ranges"; + case section_type::str: return "section_type::str"; + case section_type::types: return "section_type::types"; + } + return "(section_type)" + std::to_string((int)v); +} + +std::string +to_string(value::type v) +{ + switch (v) { + case value::type::invalid: return "value::type::invalid"; + case value::type::address: return "value::type::address"; + case value::type::block: return "value::type::block"; + case value::type::constant: return "value::type::constant"; + case value::type::uconstant: return "value::type::uconstant"; + case value::type::sconstant: return "value::type::sconstant"; + case value::type::exprloc: return "value::type::exprloc"; + case value::type::flag: return "value::type::flag"; + case value::type::line: return "value::type::line"; + case value::type::loclist: return "value::type::loclist"; + case value::type::mac: return "value::type::mac"; + case value::type::rangelist: return "value::type::rangelist"; + case value::type::reference: return "value::type::reference"; + case value::type::string: return "value::type::string"; + } + return "(value::type)" + std::to_string((int)v); +} + +std::string +to_string(expr_result::type v) +{ + switch (v) { + case expr_result::type::address: return "expr_result::type::address"; + case expr_result::type::reg: return "expr_result::type::reg"; + case expr_result::type::literal: return "expr_result::type::literal"; + case expr_result::type::implicit: return "expr_result::type::implicit"; + case expr_result::type::empty: return "expr_result::type::empty"; + } + return "(expr_result::type)" + std::to_string((int)v); +} + +std::string +to_string(DW_TAG v) +{ + switch (v) { + case DW_TAG::array_type: return "DW_TAG_array_type"; + case DW_TAG::class_type: return "DW_TAG_class_type"; + case DW_TAG::entry_point: return "DW_TAG_entry_point"; + case DW_TAG::enumeration_type: return "DW_TAG_enumeration_type"; + case DW_TAG::formal_parameter: return "DW_TAG_formal_parameter"; + case DW_TAG::imported_declaration: return "DW_TAG_imported_declaration"; + case DW_TAG::label: return "DW_TAG_label"; + case DW_TAG::lexical_block: return "DW_TAG_lexical_block"; + case DW_TAG::member: return "DW_TAG_member"; + case DW_TAG::pointer_type: return "DW_TAG_pointer_type"; + case DW_TAG::reference_type: return "DW_TAG_reference_type"; + case DW_TAG::compile_unit: return "DW_TAG_compile_unit"; + case DW_TAG::string_type: return "DW_TAG_string_type"; + case DW_TAG::structure_type: return "DW_TAG_structure_type"; + case DW_TAG::subroutine_type: return "DW_TAG_subroutine_type"; + case DW_TAG::typedef_: return "DW_TAG_typedef"; + case DW_TAG::union_type: return "DW_TAG_union_type"; + case DW_TAG::unspecified_parameters: return "DW_TAG_unspecified_parameters"; + case DW_TAG::variant: return "DW_TAG_variant"; + case DW_TAG::common_block: return "DW_TAG_common_block"; + case DW_TAG::common_inclusion: return "DW_TAG_common_inclusion"; + case DW_TAG::inheritance: return "DW_TAG_inheritance"; + case DW_TAG::inlined_subroutine: return "DW_TAG_inlined_subroutine"; + case DW_TAG::module: return "DW_TAG_module"; + case DW_TAG::ptr_to_member_type: return "DW_TAG_ptr_to_member_type"; + case DW_TAG::set_type: return "DW_TAG_set_type"; + case DW_TAG::subrange_type: return "DW_TAG_subrange_type"; + case DW_TAG::with_stmt: return "DW_TAG_with_stmt"; + case DW_TAG::access_declaration: return "DW_TAG_access_declaration"; + case DW_TAG::base_type: return "DW_TAG_base_type"; + case DW_TAG::catch_block: return "DW_TAG_catch_block"; + case DW_TAG::const_type: return "DW_TAG_const_type"; + case DW_TAG::constant: return "DW_TAG_constant"; + case DW_TAG::enumerator: return "DW_TAG_enumerator"; + case DW_TAG::file_type: return "DW_TAG_file_type"; + case DW_TAG::friend_: return "DW_TAG_friend"; + case DW_TAG::namelist: return "DW_TAG_namelist"; + case DW_TAG::namelist_item: return "DW_TAG_namelist_item"; + case DW_TAG::packed_type: return "DW_TAG_packed_type"; + case DW_TAG::subprogram: return "DW_TAG_subprogram"; + case DW_TAG::template_type_parameter: return "DW_TAG_template_type_parameter"; + case DW_TAG::template_value_parameter: return "DW_TAG_template_value_parameter"; + case DW_TAG::thrown_type: return "DW_TAG_thrown_type"; + case DW_TAG::try_block: return "DW_TAG_try_block"; + case DW_TAG::variant_part: return "DW_TAG_variant_part"; + case DW_TAG::variable: return "DW_TAG_variable"; + case DW_TAG::volatile_type: return "DW_TAG_volatile_type"; + case DW_TAG::dwarf_procedure: return "DW_TAG_dwarf_procedure"; + case DW_TAG::restrict_type: return "DW_TAG_restrict_type"; + case DW_TAG::interface_type: return "DW_TAG_interface_type"; + case DW_TAG::namespace_: return "DW_TAG_namespace"; + case DW_TAG::imported_module: return "DW_TAG_imported_module"; + case DW_TAG::unspecified_type: return "DW_TAG_unspecified_type"; + case DW_TAG::partial_unit: return "DW_TAG_partial_unit"; + case DW_TAG::imported_unit: return "DW_TAG_imported_unit"; + case DW_TAG::condition: return "DW_TAG_condition"; + case DW_TAG::shared_type: return "DW_TAG_shared_type"; + case DW_TAG::type_unit: return "DW_TAG_type_unit"; + case DW_TAG::rvalue_reference_type: return "DW_TAG_rvalue_reference_type"; + case DW_TAG::template_alias: return "DW_TAG_template_alias"; + case DW_TAG::lo_user: break; + case DW_TAG::hi_user: break; + } + return "(DW_TAG)0x" + to_hex((int)v); +} + +std::string +to_string(DW_CHILDREN v) +{ + switch (v) { + case DW_CHILDREN::no: return "DW_CHILDREN_no"; + case DW_CHILDREN::yes: return "DW_CHILDREN_yes"; + } + return "(DW_CHILDREN)0x" + to_hex((int)v); +} + +std::string +to_string(DW_AT v) +{ + switch (v) { + case DW_AT::sibling: return "DW_AT_sibling"; + case DW_AT::location: return "DW_AT_location"; + case DW_AT::name: return "DW_AT_name"; + case DW_AT::ordering: return "DW_AT_ordering"; + case DW_AT::byte_size: return "DW_AT_byte_size"; + case DW_AT::bit_offset: return "DW_AT_bit_offset"; + case DW_AT::bit_size: return "DW_AT_bit_size"; + case DW_AT::stmt_list: return "DW_AT_stmt_list"; + case DW_AT::low_pc: return "DW_AT_low_pc"; + case DW_AT::high_pc: return "DW_AT_high_pc"; + case DW_AT::language: return "DW_AT_language"; + case DW_AT::discr: return "DW_AT_discr"; + case DW_AT::discr_value: return "DW_AT_discr_value"; + case DW_AT::visibility: return "DW_AT_visibility"; + case DW_AT::import: return "DW_AT_import"; + case DW_AT::string_length: return "DW_AT_string_length"; + case DW_AT::common_reference: return "DW_AT_common_reference"; + case DW_AT::comp_dir: return "DW_AT_comp_dir"; + case DW_AT::const_value: return "DW_AT_const_value"; + case DW_AT::containing_type: return "DW_AT_containing_type"; + case DW_AT::default_value: return "DW_AT_default_value"; + case DW_AT::inline_: return "DW_AT_inline"; + case DW_AT::is_optional: return "DW_AT_is_optional"; + case DW_AT::lower_bound: return "DW_AT_lower_bound"; + case DW_AT::producer: return "DW_AT_producer"; + case DW_AT::prototyped: return "DW_AT_prototyped"; + case DW_AT::return_addr: return "DW_AT_return_addr"; + case DW_AT::start_scope: return "DW_AT_start_scope"; + case DW_AT::bit_stride: return "DW_AT_bit_stride"; + case DW_AT::upper_bound: return "DW_AT_upper_bound"; + case DW_AT::abstract_origin: return "DW_AT_abstract_origin"; + case DW_AT::accessibility: return "DW_AT_accessibility"; + case DW_AT::address_class: return "DW_AT_address_class"; + case DW_AT::artificial: return "DW_AT_artificial"; + case DW_AT::base_types: return "DW_AT_base_types"; + case DW_AT::calling_convention: return "DW_AT_calling_convention"; + case DW_AT::count: return "DW_AT_count"; + case DW_AT::data_member_location: return "DW_AT_data_member_location"; + case DW_AT::decl_column: return "DW_AT_decl_column"; + case DW_AT::decl_file: return "DW_AT_decl_file"; + case DW_AT::decl_line: return "DW_AT_decl_line"; + case DW_AT::declaration: return "DW_AT_declaration"; + case DW_AT::discr_list: return "DW_AT_discr_list"; + case DW_AT::encoding: return "DW_AT_encoding"; + case DW_AT::external: return "DW_AT_external"; + case DW_AT::frame_base: return "DW_AT_frame_base"; + case DW_AT::friend_: return "DW_AT_friend"; + case DW_AT::identifier_case: return "DW_AT_identifier_case"; + case DW_AT::macro_info: return "DW_AT_macro_info"; + case DW_AT::namelist_item: return "DW_AT_namelist_item"; + case DW_AT::priority: return "DW_AT_priority"; + case DW_AT::segment: return "DW_AT_segment"; + case DW_AT::specification: return "DW_AT_specification"; + case DW_AT::static_link: return "DW_AT_static_link"; + case DW_AT::type: return "DW_AT_type"; + case DW_AT::use_location: return "DW_AT_use_location"; + case DW_AT::variable_parameter: return "DW_AT_variable_parameter"; + case DW_AT::virtuality: return "DW_AT_virtuality"; + case DW_AT::vtable_elem_location: return "DW_AT_vtable_elem_location"; + case DW_AT::allocated: return "DW_AT_allocated"; + case DW_AT::associated: return "DW_AT_associated"; + case DW_AT::data_location: return "DW_AT_data_location"; + case DW_AT::byte_stride: return "DW_AT_byte_stride"; + case DW_AT::entry_pc: return "DW_AT_entry_pc"; + case DW_AT::use_UTF8: return "DW_AT_use_UTF8"; + case DW_AT::extension: return "DW_AT_extension"; + case DW_AT::ranges: return "DW_AT_ranges"; + case DW_AT::trampoline: return "DW_AT_trampoline"; + case DW_AT::call_column: return "DW_AT_call_column"; + case DW_AT::call_file: return "DW_AT_call_file"; + case DW_AT::call_line: return "DW_AT_call_line"; + case DW_AT::description: return "DW_AT_description"; + case DW_AT::binary_scale: return "DW_AT_binary_scale"; + case DW_AT::decimal_scale: return "DW_AT_decimal_scale"; + case DW_AT::small: return "DW_AT_small"; + case DW_AT::decimal_sign: return "DW_AT_decimal_sign"; + case DW_AT::digit_count: return "DW_AT_digit_count"; + case DW_AT::picture_string: return "DW_AT_picture_string"; + case DW_AT::mutable_: return "DW_AT_mutable"; + case DW_AT::threads_scaled: return "DW_AT_threads_scaled"; + case DW_AT::explicit_: return "DW_AT_explicit"; + case DW_AT::object_pointer: return "DW_AT_object_pointer"; + case DW_AT::endianity: return "DW_AT_endianity"; + case DW_AT::elemental: return "DW_AT_elemental"; + case DW_AT::pure: return "DW_AT_pure"; + case DW_AT::recursive: return "DW_AT_recursive"; + case DW_AT::signature: return "DW_AT_signature"; + case DW_AT::main_subprogram: return "DW_AT_main_subprogram"; + case DW_AT::data_bit_offset: return "DW_AT_data_bit_offset"; + case DW_AT::const_expr: return "DW_AT_const_expr"; + case DW_AT::enum_class: return "DW_AT_enum_class"; + case DW_AT::linkage_name: return "DW_AT_linkage_name"; + case DW_AT::lo_user: break; + case DW_AT::hi_user: break; + } + return "(DW_AT)0x" + to_hex((int)v); +} + +std::string +to_string(DW_FORM v) +{ + switch (v) { + case DW_FORM::addr: return "DW_FORM_addr"; + case DW_FORM::block2: return "DW_FORM_block2"; + case DW_FORM::block4: return "DW_FORM_block4"; + case DW_FORM::data2: return "DW_FORM_data2"; + case DW_FORM::data4: return "DW_FORM_data4"; + case DW_FORM::data8: return "DW_FORM_data8"; + case DW_FORM::string: return "DW_FORM_string"; + case DW_FORM::block: return "DW_FORM_block"; + case DW_FORM::block1: return "DW_FORM_block1"; + case DW_FORM::data1: return "DW_FORM_data1"; + case DW_FORM::flag: return "DW_FORM_flag"; + case DW_FORM::sdata: return "DW_FORM_sdata"; + case DW_FORM::strp: return "DW_FORM_strp"; + case DW_FORM::udata: return "DW_FORM_udata"; + case DW_FORM::ref_addr: return "DW_FORM_ref_addr"; + case DW_FORM::ref1: return "DW_FORM_ref1"; + case DW_FORM::ref2: return "DW_FORM_ref2"; + case DW_FORM::ref4: return "DW_FORM_ref4"; + case DW_FORM::ref8: return "DW_FORM_ref8"; + case DW_FORM::ref_udata: return "DW_FORM_ref_udata"; + case DW_FORM::indirect: return "DW_FORM_indirect"; + case DW_FORM::sec_offset: return "DW_FORM_sec_offset"; + case DW_FORM::exprloc: return "DW_FORM_exprloc"; + case DW_FORM::flag_present: return "DW_FORM_flag_present"; + case DW_FORM::line_strp: return "DW_FORM_line_strp"; + case DW_FORM::implicit_const: return "DW_FORM_implicit_const"; + case DW_FORM::ref_sig8: return "DW_FORM_ref_sig8"; + } + return "(DW_FORM)0x" + to_hex((int)v); +} + +std::string +to_string(DW_OP v) +{ + switch (v) { + case DW_OP::addr: return "DW_OP_addr"; + case DW_OP::deref: return "DW_OP_deref"; + case DW_OP::const1u: return "DW_OP_const1u"; + case DW_OP::const1s: return "DW_OP_const1s"; + case DW_OP::const2u: return "DW_OP_const2u"; + case DW_OP::const2s: return "DW_OP_const2s"; + case DW_OP::const4u: return "DW_OP_const4u"; + case DW_OP::const4s: return "DW_OP_const4s"; + case DW_OP::const8u: return "DW_OP_const8u"; + case DW_OP::const8s: return "DW_OP_const8s"; + case DW_OP::constu: return "DW_OP_constu"; + case DW_OP::consts: return "DW_OP_consts"; + case DW_OP::dup: return "DW_OP_dup"; + case DW_OP::drop: return "DW_OP_drop"; + case DW_OP::over: return "DW_OP_over"; + case DW_OP::pick: return "DW_OP_pick"; + case DW_OP::swap: return "DW_OP_swap"; + case DW_OP::rot: return "DW_OP_rot"; + case DW_OP::xderef: return "DW_OP_xderef"; + case DW_OP::abs: return "DW_OP_abs"; + case DW_OP::and_: return "DW_OP_and"; + case DW_OP::div: return "DW_OP_div"; + case DW_OP::minus: return "DW_OP_minus"; + case DW_OP::mod: return "DW_OP_mod"; + case DW_OP::mul: return "DW_OP_mul"; + case DW_OP::neg: return "DW_OP_neg"; + case DW_OP::not_: return "DW_OP_not"; + case DW_OP::or_: return "DW_OP_or"; + case DW_OP::plus: return "DW_OP_plus"; + case DW_OP::plus_uconst: return "DW_OP_plus_uconst"; + case DW_OP::shl: return "DW_OP_shl"; + case DW_OP::shr: return "DW_OP_shr"; + case DW_OP::shra: return "DW_OP_shra"; + case DW_OP::xor_: return "DW_OP_xor"; + case DW_OP::skip: return "DW_OP_skip"; + case DW_OP::bra: return "DW_OP_bra"; + case DW_OP::eq: return "DW_OP_eq"; + case DW_OP::ge: return "DW_OP_ge"; + case DW_OP::gt: return "DW_OP_gt"; + case DW_OP::le: return "DW_OP_le"; + case DW_OP::lt: return "DW_OP_lt"; + case DW_OP::ne: return "DW_OP_ne"; + case DW_OP::lit0: return "DW_OP_lit0"; + case DW_OP::lit31: return "DW_OP_lit31"; + case DW_OP::reg0: return "DW_OP_reg0"; + case DW_OP::reg31: return "DW_OP_reg31"; + case DW_OP::breg0: return "DW_OP_breg0"; + case DW_OP::breg31: return "DW_OP_breg31"; + case DW_OP::regx: return "DW_OP_regx"; + case DW_OP::fbreg: return "DW_OP_fbreg"; + case DW_OP::bregx: return "DW_OP_bregx"; + case DW_OP::piece: return "DW_OP_piece"; + case DW_OP::deref_size: return "DW_OP_deref_size"; + case DW_OP::xderef_size: return "DW_OP_xderef_size"; + case DW_OP::nop: return "DW_OP_nop"; + case DW_OP::push_object_address: return "DW_OP_push_object_address"; + case DW_OP::call2: return "DW_OP_call2"; + case DW_OP::call4: return "DW_OP_call4"; + case DW_OP::call_ref: return "DW_OP_call_ref"; + case DW_OP::form_tls_address: return "DW_OP_form_tls_address"; + case DW_OP::call_frame_cfa: return "DW_OP_call_frame_cfa"; + case DW_OP::bit_piece: return "DW_OP_bit_piece"; + case DW_OP::implicit_value: return "DW_OP_implicit_value"; + case DW_OP::stack_value: return "DW_OP_stack_value"; + case DW_OP::lo_user: break; + case DW_OP::hi_user: break; + } + return "(DW_OP)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ATE v) +{ + switch (v) { + case DW_ATE::address: return "DW_ATE_address"; + case DW_ATE::boolean: return "DW_ATE_boolean"; + case DW_ATE::complex_float: return "DW_ATE_complex_float"; + case DW_ATE::float_: return "DW_ATE_float"; + case DW_ATE::signed_: return "DW_ATE_signed"; + case DW_ATE::signed_char: return "DW_ATE_signed_char"; + case DW_ATE::unsigned_: return "DW_ATE_unsigned"; + case DW_ATE::unsigned_char: return "DW_ATE_unsigned_char"; + case DW_ATE::imaginary_float: return "DW_ATE_imaginary_float"; + case DW_ATE::packed_decimal: return "DW_ATE_packed_decimal"; + case DW_ATE::numeric_string: return "DW_ATE_numeric_string"; + case DW_ATE::edited: return "DW_ATE_edited"; + case DW_ATE::signed_fixed: return "DW_ATE_signed_fixed"; + case DW_ATE::unsigned_fixed: return "DW_ATE_unsigned_fixed"; + case DW_ATE::decimal_float: return "DW_ATE_decimal_float"; + case DW_ATE::UTF: return "DW_ATE_UTF"; + case DW_ATE::lo_user: break; + case DW_ATE::hi_user: break; + } + return "(DW_ATE)0x" + to_hex((int)v); +} + +std::string +to_string(DW_DS v) +{ + switch (v) { + case DW_DS::unsigned_: return "DW_DS_unsigned"; + case DW_DS::leading_overpunch: return "DW_DS_leading_overpunch"; + case DW_DS::trailing_overpunch: return "DW_DS_trailing_overpunch"; + case DW_DS::leading_separate: return "DW_DS_leading_separate"; + case DW_DS::trailing_separate: return "DW_DS_trailing_separate"; + } + return "(DW_DS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_END v) +{ + switch (v) { + case DW_END::default_: return "DW_END_default"; + case DW_END::big: return "DW_END_big"; + case DW_END::little: return "DW_END_little"; + case DW_END::lo_user: break; + case DW_END::hi_user: break; + } + return "(DW_END)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ACCESS v) +{ + switch (v) { + case DW_ACCESS::public_: return "DW_ACCESS_public"; + case DW_ACCESS::protected_: return "DW_ACCESS_protected"; + case DW_ACCESS::private_: return "DW_ACCESS_private"; + } + return "(DW_ACCESS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_VIS v) +{ + switch (v) { + case DW_VIS::local: return "DW_VIS_local"; + case DW_VIS::exported: return "DW_VIS_exported"; + case DW_VIS::qualified: return "DW_VIS_qualified"; + } + return "(DW_VIS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_VIRTUALITY v) +{ + switch (v) { + case DW_VIRTUALITY::none: return "DW_VIRTUALITY_none"; + case DW_VIRTUALITY::virtual_: return "DW_VIRTUALITY_virtual"; + case DW_VIRTUALITY::pure_virtual: return "DW_VIRTUALITY_pure_virtual"; + } + return "(DW_VIRTUALITY)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LANG v) +{ + switch (v) { + case DW_LANG::C89: return "DW_LANG_C89"; + case DW_LANG::C: return "DW_LANG_C"; + case DW_LANG::Ada83: return "DW_LANG_Ada83"; + case DW_LANG::C_plus_plus: return "DW_LANG_C_plus_plus"; + case DW_LANG::Cobol74: return "DW_LANG_Cobol74"; + case DW_LANG::Cobol85: return "DW_LANG_Cobol85"; + case DW_LANG::Fortran77: return "DW_LANG_Fortran77"; + case DW_LANG::Fortran90: return "DW_LANG_Fortran90"; + case DW_LANG::Pascal83: return "DW_LANG_Pascal83"; + case DW_LANG::Modula2: return "DW_LANG_Modula2"; + case DW_LANG::Java: return "DW_LANG_Java"; + case DW_LANG::C99: return "DW_LANG_C99"; + case DW_LANG::Ada95: return "DW_LANG_Ada95"; + case DW_LANG::Fortran95: return "DW_LANG_Fortran95"; + case DW_LANG::PLI: return "DW_LANG_PLI"; + case DW_LANG::ObjC: return "DW_LANG_ObjC"; + case DW_LANG::ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus"; + case DW_LANG::UPC: return "DW_LANG_UPC"; + case DW_LANG::D: return "DW_LANG_D"; + case DW_LANG::Python: return "DW_LANG_Python"; + case DW_LANG::lo_user: break; + case DW_LANG::hi_user: break; + } + return "(DW_LANG)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ID v) +{ + switch (v) { + case DW_ID::case_sensitive: return "DW_ID_case_sensitive"; + case DW_ID::up_case: return "DW_ID_up_case"; + case DW_ID::down_case: return "DW_ID_down_case"; + case DW_ID::case_insensitive: return "DW_ID_case_insensitive"; + } + return "(DW_ID)0x" + to_hex((int)v); +} + +std::string +to_string(DW_CC v) +{ + switch (v) { + case DW_CC::normal: return "DW_CC_normal"; + case DW_CC::program: return "DW_CC_program"; + case DW_CC::nocall: return "DW_CC_nocall"; + case DW_CC::lo_user: break; + case DW_CC::hi_user: break; + } + return "(DW_CC)0x" + to_hex((int)v); +} + +std::string +to_string(DW_INL v) +{ + switch (v) { + case DW_INL::not_inlined: return "DW_INL_not_inlined"; + case DW_INL::inlined: return "DW_INL_inlined"; + case DW_INL::declared_not_inlined: return "DW_INL_declared_not_inlined"; + case DW_INL::declared_inlined: return "DW_INL_declared_inlined"; + } + return "(DW_INL)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ORD v) +{ + switch (v) { + case DW_ORD::row_major: return "DW_ORD_row_major"; + case DW_ORD::col_major: return "DW_ORD_col_major"; + } + return "(DW_ORD)0x" + to_hex((int)v); +} + +std::string +to_string(DW_DSC v) +{ + switch (v) { + case DW_DSC::label: return "DW_DSC_label"; + case DW_DSC::range: return "DW_DSC_range"; + } + return "(DW_DSC)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LNS v) +{ + switch (v) { + case DW_LNS::copy: return "DW_LNS_copy"; + case DW_LNS::advance_pc: return "DW_LNS_advance_pc"; + case DW_LNS::advance_line: return "DW_LNS_advance_line"; + case DW_LNS::set_file: return "DW_LNS_set_file"; + case DW_LNS::set_column: return "DW_LNS_set_column"; + case DW_LNS::negate_stmt: return "DW_LNS_negate_stmt"; + case DW_LNS::set_basic_block: return "DW_LNS_set_basic_block"; + case DW_LNS::const_add_pc: return "DW_LNS_const_add_pc"; + case DW_LNS::fixed_advance_pc: return "DW_LNS_fixed_advance_pc"; + case DW_LNS::set_prologue_end: return "DW_LNS_set_prologue_end"; + case DW_LNS::set_epilogue_begin: return "DW_LNS_set_epilogue_begin"; + case DW_LNS::set_isa: return "DW_LNS_set_isa"; + } + return "(DW_LNS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LNE v) +{ + switch (v) { + case DW_LNE::end_sequence: return "DW_LNE_end_sequence"; + case DW_LNE::set_address: return "DW_LNE_set_address"; + case DW_LNE::define_file: return "DW_LNE_define_file"; + case DW_LNE::set_discriminator: return "DW_LNE_set_discriminator"; + case DW_LNE::lo_user: break; + case DW_LNE::hi_user: break; + } + return "(DW_LNE)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LNCT v) +{ + switch (v) { + case DW_LNCT::path: return "DW_LNCT_path"; + case DW_LNCT::directory_index: return "DW_LNCT_directory_index"; + case DW_LNCT::timestamp: return "DW_LNCT_timestamp"; + case DW_LNCT::size: return "DW_LNCT_size"; + case DW_LNCT::MD5: return "DW_LNCT_MD5"; + case DW_LNCT::lo_user: break; + case DW_LNCT::hi_user: break; + } + return "(DW_LNCT)0x" + to_hex((int)v); +} + +DWARFPP_END_NAMESPACE diff --git a/elf/.gitignore b/elf/.gitignore index 0166efc..5120c10 100644 --- a/elf/.gitignore +++ b/elf/.gitignore @@ -1,5 +1,4 @@ *.o -to_string.cc libelf++.a libelf++.so libelf++.so.* diff --git a/elf/to_string.cc b/elf/to_string.cc new file mode 100644 index 0000000..3293a5b --- /dev/null +++ b/elf/to_string.cc @@ -0,0 +1,160 @@ +// Automatically generated by make at Sat Nov 29 03:46:27 PM EST 2025 +// DO NOT EDIT + +#include "data.hh" +#include "to_hex.hh" + +ELFPP_BEGIN_NAMESPACE + +std::string +to_string(elfclass v) +{ + switch (v) { + case elfclass::_32: return "32"; + case elfclass::_64: return "64"; + } + return "(elfclass)0x" + to_hex((int)v); +} + +std::string +to_string(elfdata v) +{ + switch (v) { + case elfdata::lsb: return "lsb"; + case elfdata::msb: return "msb"; + } + return "(elfdata)0x" + to_hex((int)v); +} + +std::string +to_string(elfosabi v) +{ + switch (v) { + case elfosabi::sysv: return "sysv"; + case elfosabi::hpux: return "hpux"; + case elfosabi::standalone: return "standalone"; + } + return "(elfosabi)0x" + to_hex((int)v); +} + +std::string +to_string(et v) +{ + switch (v) { + case et::none: return "none"; + case et::rel: return "rel"; + case et::exec: return "exec"; + case et::dyn: return "dyn"; + case et::core: return "core"; + case et::loos: break; + case et::hios: break; + case et::loproc: break; + case et::hiproc: break; + } + return "(et)0x" + to_hex((int)v); +} + +std::string +to_string(sht v) +{ + switch (v) { + case sht::null: return "null"; + case sht::progbits: return "progbits"; + case sht::symtab: return "symtab"; + case sht::strtab: return "strtab"; + case sht::rela: return "rela"; + case sht::hash: return "hash"; + case sht::dynamic: return "dynamic"; + case sht::note: return "note"; + case sht::nobits: return "nobits"; + case sht::rel: return "rel"; + case sht::shlib: return "shlib"; + case sht::dynsym: return "dynsym"; + case sht::loos: break; + case sht::hios: break; + case sht::loproc: break; + case sht::hiproc: break; + } + return "(sht)0x" + to_hex((int)v); +} + +std::string +to_string(shf v) +{ + std::string res; + if ((v & shf::write) == shf::write) { res += "write|"; v &= ~shf::write; } + if ((v & shf::alloc) == shf::alloc) { res += "alloc|"; v &= ~shf::alloc; } + if ((v & shf::execinstr) == shf::execinstr) { res += "execinstr|"; v &= ~shf::execinstr; } + if ((v & shf::maskos) == shf::maskos) { res += "maskos|"; v &= ~shf::maskos; } + if ((v & shf::maskproc) == shf::maskproc) { res += "maskproc|"; v &= ~shf::maskproc; } + if (res.empty() || v != (shf)0) res += "(shf)0x" + to_hex((int)v); + else res.pop_back(); + return res; +} + +std::string +to_string(pt v) +{ + switch (v) { + case pt::null: return "null"; + case pt::load: return "load"; + case pt::dynamic: return "dynamic"; + case pt::interp: return "interp"; + case pt::note: return "note"; + case pt::shlib: return "shlib"; + case pt::phdr: return "phdr"; + case pt::loos: break; + case pt::hios: break; + case pt::loproc: break; + case pt::hiproc: break; + } + return "(pt)0x" + to_hex((int)v); +} + +std::string +to_string(pf v) +{ + std::string res; + if ((v & pf::x) == pf::x) { res += "x|"; v &= ~pf::x; } + if ((v & pf::w) == pf::w) { res += "w|"; v &= ~pf::w; } + if ((v & pf::r) == pf::r) { res += "r|"; v &= ~pf::r; } + if ((v & pf::maskos) == pf::maskos) { res += "maskos|"; v &= ~pf::maskos; } + if ((v & pf::maskproc) == pf::maskproc) { res += "maskproc|"; v &= ~pf::maskproc; } + if (res.empty() || v != (pf)0) res += "(pf)0x" + to_hex((int)v); + else res.pop_back(); + return res; +} + +std::string +to_string(stb v) +{ + switch (v) { + case stb::local: return "local"; + case stb::global: return "global"; + case stb::weak: return "weak"; + case stb::loos: break; + case stb::hios: break; + case stb::loproc: break; + case stb::hiproc: break; + } + return "(stb)0x" + to_hex((int)v); +} + +std::string +to_string(stt v) +{ + switch (v) { + case stt::notype: return "notype"; + case stt::object: return "object"; + case stt::func: return "func"; + case stt::section: return "section"; + case stt::file: return "file"; + case stt::loos: break; + case stt::hios: break; + case stt::loproc: break; + case stt::hiproc: break; + } + return "(stt)0x" + to_hex((int)v); +} + +ELFPP_END_NAMESPACE From ca22688e97b05bfceaf0f8daa32634a243277880 Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Thu, 1 Jan 2026 20:47:13 -0500 Subject: [PATCH 07/10] Added more section coverage. --- dwarf/dwarf++.hh | 1 + dwarf/elf.cc | 5 +++-- dwarf/to_string.cc | 1 + dwarf/value.cc | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/dwarf/dwarf++.hh b/dwarf/dwarf++.hh index a0b0816..01db15f 100644 --- a/dwarf/dwarf++.hh +++ b/dwarf/dwarf++.hh @@ -93,6 +93,7 @@ enum class section_type pubtypes, ranges, str, + str_offsets, types, }; diff --git a/dwarf/elf.cc b/dwarf/elf.cc index 11add00..5fd7a20 100644 --- a/dwarf/elf.cc +++ b/dwarf/elf.cc @@ -26,8 +26,9 @@ static const struct {".debug_pubnames", section_type::pubnames}, {".debug_pubtypes", section_type::pubtypes}, {".debug_ranges", section_type::ranges}, - {".debug_str", section_type::str}, - {".debug_types", section_type::types}, + {".debug_str", section_type::str}, + {".debug_str_offsets", section_type::str_offsets}, + {".debug_types", section_type::types}, }; bool diff --git a/dwarf/to_string.cc b/dwarf/to_string.cc index c873bd7..57784f5 100644 --- a/dwarf/to_string.cc +++ b/dwarf/to_string.cc @@ -21,6 +21,7 @@ to_string(section_type v) case section_type::pubtypes: return "section_type::pubtypes"; case section_type::ranges: return "section_type::ranges"; case section_type::str: return "section_type::str"; + case section_type::str_offsets: return "section_type::str_offsets"; case section_type::types: return "section_type::types"; } return "(section_type)" + std::to_string((int)v); diff --git a/dwarf/value.cc b/dwarf/value.cc index 8694dfd..95e7708 100644 --- a/dwarf/value.cc +++ b/dwarf/value.cc @@ -259,6 +259,46 @@ value::as_cstr(size_t *size_out) const cursor scur(cu->get_dwarf().get_section(section_type::line_str), off); return scur.cstr(size_out); } + case DW_FORM::strx: + case DW_FORM::strx1: + case DW_FORM::strx2: + case DW_FORM::strx3: + case DW_FORM::strx4: { + // DWARF 5: Read string index, look up in .debug_str_offsets, then read from .debug_str + uint64_t index; + switch (form) { + case DW_FORM::strx: + index = cur.uleb128(); + break; + case DW_FORM::strx1: + index = cur.fixed(); + break; + case DW_FORM::strx2: + index = cur.fixed(); + break; + case DW_FORM::strx3: + index = cur.fixed() | (cur.fixed() << 8); + break; + case DW_FORM::strx4: + index = cur.fixed(); + break; + default: + index = 0; + break; + } + // Get str_offsets_base from CU root DIE's DW_AT_str_offsets_base + // For now, we use a simplified approach: read from start of section + header + // DWARF 5 .debug_str_offsets has a header (length + version + padding) + // We skip the 8-byte header (4-byte length + 2-byte version + 2-byte padding for 32-bit DWARF) + auto str_offsets_sec = cu->get_dwarf().get_section(section_type::str_offsets); + section_offset header_size = 8; // Simplified: assume 32-bit DWARF + unsigned offset_size = (str_offsets_sec->addr_size == 8) ? 8 : 4; + cursor offsets_cur(str_offsets_sec, + header_size + index * offset_size); + section_offset str_off = offsets_cur.offset(); + cursor scur(cu->get_dwarf().get_section(section_type::str), str_off); + return scur.cstr(size_out); + } default: throw value_type_mismatch("cannot read " + to_string(typ) + " as string"); } From f00401b1141e3cc5fd0d8632ad84abb396c2bb2d Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Fri, 2 Jan 2026 13:29:53 -0500 Subject: [PATCH 08/10] Updated. --- dwarf/dwarf++.hh | 3 +++ dwarf/dwarf.cc | 40 +++++++++++++++++++++++++------ dwarf/elf.cc | 25 +++++++++++--------- dwarf/to_string.cc | 3 +++ dwarf/value.cc | 59 ++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 108 insertions(+), 22 deletions(-) diff --git a/dwarf/dwarf++.hh b/dwarf/dwarf++.hh index 01db15f..590dac2 100644 --- a/dwarf/dwarf++.hh +++ b/dwarf/dwarf++.hh @@ -82,16 +82,19 @@ public: enum class section_type { abbrev, + addr, // DWARF 5: .debug_addr aranges, frame, info, line, line_str, loc, + loclists, // DWARF 5: .debug_loclists macinfo, pubnames, pubtypes, ranges, + rnglists, // DWARF 5: .debug_rnglists str, str_offsets, types, diff --git a/dwarf/dwarf.cc b/dwarf/dwarf.cc index d763295..de0fdde 100644 --- a/dwarf/dwarf.cc +++ b/dwarf/dwarf.cc @@ -42,24 +42,46 @@ dwarf::dwarf(const std::shared_ptr &l) throw format_error("required .debug_info section missing"); m->sec_info = make_shared
(section_type::info, data, size, byte_order::lsb); - // Sniff the endianness from the version field of the first - // CU. This is always a small but non-zero integer. + // Sniff the format (DWARF32/64), endianness, and address size from the first CU header cursor endcur(m->sec_info); - // Skip length. + // Skip length and detect format section_length length = endcur.fixed(); - if (length == 0xffffffff) + format detected_fmt = format::dwarf32; + if (length == 0xffffffff) { + detected_fmt = format::dwarf64; endcur.fixed(); + } // Get version in both little and big endian. uhalf version = endcur.fixed(); uhalf versionbe = (version >> 8) | ((version & 0xFF) << 8); + byte_order detected_ord = byte_order::lsb; if (versionbe < version) { - m->sec_info = make_shared
(section_type::info, data, size, byte_order::msb); + detected_ord = byte_order::msb; } + // For DWARF 5, header is: version(2) + unit_type(1) + address_size(1) + ... + // For DWARF 2-4, header is: version(2) + abbrev_offset(4/8) + address_size(1) + unsigned detected_addr_size = 8; // Default to 8 for 64-bit systems + if (version >= 5) { + // Skip unit_type (1 byte) + endcur.fixed(); + detected_addr_size = endcur.fixed(); + } else { + // Skip abbrev_offset (4 or 8 bytes depending on format) + if (detected_fmt == format::dwarf64) + endcur.fixed(); + else + endcur.fixed(); + detected_addr_size = endcur.fixed(); + } + + // Recreate sec_info with the detected format, byte order, and address size + m->sec_info = make_shared
(section_type::info, data, size, detected_ord, detected_fmt, detected_addr_size); + data = l->load(section_type::abbrev, &size); if (!data) throw format_error("required .debug_abbrev section missing"); - m->sec_abbrev = make_shared
(section_type::abbrev, data, size, m->sec_info->ord); + m->sec_abbrev = make_shared
(section_type::abbrev, data, size, m->sec_info->ord, m->sec_info->fmt); // Get compilation units. Everything derives from these, so // there's no point in doing it lazily. @@ -122,7 +144,11 @@ dwarf::get_section(section_type type) const if (!data) throw format_error(std::string(elf::section_type_to_name(type)) + " section missing"); - m->sections[type] = std::make_shared
(section_type::str, data, size, m->sec_info->ord); + // Use the correct section type, format, and inherit address size from sec_info + m->sections[type] = std::make_shared
(type, data, size, + m->sec_info->ord, + m->sec_info->fmt, + m->sec_info->addr_size); return m->sections[type]; } diff --git a/dwarf/elf.cc b/dwarf/elf.cc index 5fd7a20..bac17d9 100644 --- a/dwarf/elf.cc +++ b/dwarf/elf.cc @@ -15,17 +15,20 @@ static const struct const char *name; section_type type; } sections[] = { - {".debug_abbrev", section_type::abbrev}, - {".debug_aranges", section_type::aranges}, - {".debug_frame", section_type::frame}, - {".debug_info", section_type::info}, - {".debug_line", section_type::line}, - {".debug_line_str", section_type::line_str}, - {".debug_loc", section_type::loc}, - {".debug_macinfo", section_type::macinfo}, - {".debug_pubnames", section_type::pubnames}, - {".debug_pubtypes", section_type::pubtypes}, - {".debug_ranges", section_type::ranges}, + {".debug_abbrev", section_type::abbrev}, + {".debug_addr", section_type::addr}, + {".debug_aranges", section_type::aranges}, + {".debug_frame", section_type::frame}, + {".debug_info", section_type::info}, + {".debug_line", section_type::line}, + {".debug_line_str", section_type::line_str}, + {".debug_loc", section_type::loc}, + {".debug_loclists", section_type::loclists}, + {".debug_macinfo", section_type::macinfo}, + {".debug_pubnames", section_type::pubnames}, + {".debug_pubtypes", section_type::pubtypes}, + {".debug_ranges", section_type::ranges}, + {".debug_rnglists", section_type::rnglists}, {".debug_str", section_type::str}, {".debug_str_offsets", section_type::str_offsets}, {".debug_types", section_type::types}, diff --git a/dwarf/to_string.cc b/dwarf/to_string.cc index 57784f5..5fdf315 100644 --- a/dwarf/to_string.cc +++ b/dwarf/to_string.cc @@ -10,16 +10,19 @@ to_string(section_type v) { switch (v) { case section_type::abbrev: return "section_type::abbrev"; + case section_type::addr: return "section_type::addr"; case section_type::aranges: return "section_type::aranges"; case section_type::frame: return "section_type::frame"; case section_type::info: return "section_type::info"; case section_type::line: return "section_type::line"; case section_type::line_str: return "section_type::line_str"; case section_type::loc: return "section_type::loc"; + case section_type::loclists: return "section_type::loclists"; case section_type::macinfo: return "section_type::macinfo"; case section_type::pubnames: return "section_type::pubnames"; case section_type::pubtypes: return "section_type::pubtypes"; case section_type::ranges: return "section_type::ranges"; + case section_type::rnglists: return "section_type::rnglists"; case section_type::str: return "section_type::str"; case section_type::str_offsets: return "section_type::str_offsets"; case section_type::types: return "section_type::types"; diff --git a/dwarf/value.cc b/dwarf/value.cc index 95e7708..ffd01cc 100644 --- a/dwarf/value.cc +++ b/dwarf/value.cc @@ -31,11 +31,62 @@ value::get_section_offset() const taddr value::as_address() const { - if (form != DW_FORM::addr) - throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); - cursor cur(cu->data(), offset); - return cur.address(); + + switch (form) { + case DW_FORM::addr: + return cur.address(); + + case DW_FORM::addrx: + case DW_FORM::addrx1: + case DW_FORM::addrx2: + case DW_FORM::addrx3: + case DW_FORM::addrx4: { + // DWARF 5: Read address index, look up in .debug_addr + uint64_t index; + switch (form) { + case DW_FORM::addrx: + index = cur.uleb128(); + break; + case DW_FORM::addrx1: + index = cur.fixed(); + break; + case DW_FORM::addrx2: + index = cur.fixed(); + break; + case DW_FORM::addrx3: + index = cur.fixed() | (cur.fixed() << 8); + break; + case DW_FORM::addrx4: + index = cur.fixed(); + break; + default: + index = 0; + break; + } + + // Get addr_base from CU root DIE's DW_AT_addr_base + // For now, use a simplified approach: skip the 8-byte header + // DWARF 5 .debug_addr has a header (length + version + addr_size + segment_selector_size) + auto addr_sec = cu->get_dwarf().get_section(section_type::addr); + auto cusec = cu->data(); + unsigned addr_size = cusec->addr_size; + + // The .debug_addr header is: + // - 4/12 bytes: unit length (4 for 32-bit DWARF, 12 for 64-bit) + // - 2 bytes: version + // - 1 byte: address_size + // - 1 byte: segment_selector_size + // For 32-bit DWARF, total header is 8 bytes + section_offset header_size = 8; + + cursor addr_cur(addr_sec, header_size + index * addr_size); + return addr_cur.address(); + } + + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); + } } const void * From 1c038e9e04b0685d83d3ad69e8010ba0df043155 Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Fri, 2 Jan 2026 13:47:05 -0500 Subject: [PATCH 09/10] Revert "Merge pull request #3 from plasma-umass/more_dwarf5" This reverts commit b88a0c6de38fa1ff92f5874905a9206fb0157000, reversing changes made to ca22688e97b05bfceaf0f8daa32634a243277880. --- dwarf/dwarf++.hh | 3 --- dwarf/dwarf.cc | 40 ++++++------------------------- dwarf/elf.cc | 25 +++++++++----------- dwarf/to_string.cc | 3 --- dwarf/value.cc | 59 ++++------------------------------------------ 5 files changed, 22 insertions(+), 108 deletions(-) diff --git a/dwarf/dwarf++.hh b/dwarf/dwarf++.hh index 590dac2..01db15f 100644 --- a/dwarf/dwarf++.hh +++ b/dwarf/dwarf++.hh @@ -82,19 +82,16 @@ public: enum class section_type { abbrev, - addr, // DWARF 5: .debug_addr aranges, frame, info, line, line_str, loc, - loclists, // DWARF 5: .debug_loclists macinfo, pubnames, pubtypes, ranges, - rnglists, // DWARF 5: .debug_rnglists str, str_offsets, types, diff --git a/dwarf/dwarf.cc b/dwarf/dwarf.cc index de0fdde..d763295 100644 --- a/dwarf/dwarf.cc +++ b/dwarf/dwarf.cc @@ -42,46 +42,24 @@ dwarf::dwarf(const std::shared_ptr &l) throw format_error("required .debug_info section missing"); m->sec_info = make_shared
(section_type::info, data, size, byte_order::lsb); - // Sniff the format (DWARF32/64), endianness, and address size from the first CU header + // Sniff the endianness from the version field of the first + // CU. This is always a small but non-zero integer. cursor endcur(m->sec_info); - // Skip length and detect format + // Skip length. section_length length = endcur.fixed(); - format detected_fmt = format::dwarf32; - if (length == 0xffffffff) { - detected_fmt = format::dwarf64; + if (length == 0xffffffff) endcur.fixed(); - } // Get version in both little and big endian. uhalf version = endcur.fixed(); uhalf versionbe = (version >> 8) | ((version & 0xFF) << 8); - byte_order detected_ord = byte_order::lsb; if (versionbe < version) { - detected_ord = byte_order::msb; + m->sec_info = make_shared
(section_type::info, data, size, byte_order::msb); } - // For DWARF 5, header is: version(2) + unit_type(1) + address_size(1) + ... - // For DWARF 2-4, header is: version(2) + abbrev_offset(4/8) + address_size(1) - unsigned detected_addr_size = 8; // Default to 8 for 64-bit systems - if (version >= 5) { - // Skip unit_type (1 byte) - endcur.fixed(); - detected_addr_size = endcur.fixed(); - } else { - // Skip abbrev_offset (4 or 8 bytes depending on format) - if (detected_fmt == format::dwarf64) - endcur.fixed(); - else - endcur.fixed(); - detected_addr_size = endcur.fixed(); - } - - // Recreate sec_info with the detected format, byte order, and address size - m->sec_info = make_shared
(section_type::info, data, size, detected_ord, detected_fmt, detected_addr_size); - data = l->load(section_type::abbrev, &size); if (!data) throw format_error("required .debug_abbrev section missing"); - m->sec_abbrev = make_shared
(section_type::abbrev, data, size, m->sec_info->ord, m->sec_info->fmt); + m->sec_abbrev = make_shared
(section_type::abbrev, data, size, m->sec_info->ord); // Get compilation units. Everything derives from these, so // there's no point in doing it lazily. @@ -144,11 +122,7 @@ dwarf::get_section(section_type type) const if (!data) throw format_error(std::string(elf::section_type_to_name(type)) + " section missing"); - // Use the correct section type, format, and inherit address size from sec_info - m->sections[type] = std::make_shared
(type, data, size, - m->sec_info->ord, - m->sec_info->fmt, - m->sec_info->addr_size); + m->sections[type] = std::make_shared
(section_type::str, data, size, m->sec_info->ord); return m->sections[type]; } diff --git a/dwarf/elf.cc b/dwarf/elf.cc index bac17d9..5fd7a20 100644 --- a/dwarf/elf.cc +++ b/dwarf/elf.cc @@ -15,20 +15,17 @@ static const struct const char *name; section_type type; } sections[] = { - {".debug_abbrev", section_type::abbrev}, - {".debug_addr", section_type::addr}, - {".debug_aranges", section_type::aranges}, - {".debug_frame", section_type::frame}, - {".debug_info", section_type::info}, - {".debug_line", section_type::line}, - {".debug_line_str", section_type::line_str}, - {".debug_loc", section_type::loc}, - {".debug_loclists", section_type::loclists}, - {".debug_macinfo", section_type::macinfo}, - {".debug_pubnames", section_type::pubnames}, - {".debug_pubtypes", section_type::pubtypes}, - {".debug_ranges", section_type::ranges}, - {".debug_rnglists", section_type::rnglists}, + {".debug_abbrev", section_type::abbrev}, + {".debug_aranges", section_type::aranges}, + {".debug_frame", section_type::frame}, + {".debug_info", section_type::info}, + {".debug_line", section_type::line}, + {".debug_line_str", section_type::line_str}, + {".debug_loc", section_type::loc}, + {".debug_macinfo", section_type::macinfo}, + {".debug_pubnames", section_type::pubnames}, + {".debug_pubtypes", section_type::pubtypes}, + {".debug_ranges", section_type::ranges}, {".debug_str", section_type::str}, {".debug_str_offsets", section_type::str_offsets}, {".debug_types", section_type::types}, diff --git a/dwarf/to_string.cc b/dwarf/to_string.cc index 5fdf315..57784f5 100644 --- a/dwarf/to_string.cc +++ b/dwarf/to_string.cc @@ -10,19 +10,16 @@ to_string(section_type v) { switch (v) { case section_type::abbrev: return "section_type::abbrev"; - case section_type::addr: return "section_type::addr"; case section_type::aranges: return "section_type::aranges"; case section_type::frame: return "section_type::frame"; case section_type::info: return "section_type::info"; case section_type::line: return "section_type::line"; case section_type::line_str: return "section_type::line_str"; case section_type::loc: return "section_type::loc"; - case section_type::loclists: return "section_type::loclists"; case section_type::macinfo: return "section_type::macinfo"; case section_type::pubnames: return "section_type::pubnames"; case section_type::pubtypes: return "section_type::pubtypes"; case section_type::ranges: return "section_type::ranges"; - case section_type::rnglists: return "section_type::rnglists"; case section_type::str: return "section_type::str"; case section_type::str_offsets: return "section_type::str_offsets"; case section_type::types: return "section_type::types"; diff --git a/dwarf/value.cc b/dwarf/value.cc index ffd01cc..95e7708 100644 --- a/dwarf/value.cc +++ b/dwarf/value.cc @@ -31,62 +31,11 @@ value::get_section_offset() const taddr value::as_address() const { - cursor cur(cu->data(), offset); - - switch (form) { - case DW_FORM::addr: - return cur.address(); - - case DW_FORM::addrx: - case DW_FORM::addrx1: - case DW_FORM::addrx2: - case DW_FORM::addrx3: - case DW_FORM::addrx4: { - // DWARF 5: Read address index, look up in .debug_addr - uint64_t index; - switch (form) { - case DW_FORM::addrx: - index = cur.uleb128(); - break; - case DW_FORM::addrx1: - index = cur.fixed(); - break; - case DW_FORM::addrx2: - index = cur.fixed(); - break; - case DW_FORM::addrx3: - index = cur.fixed() | (cur.fixed() << 8); - break; - case DW_FORM::addrx4: - index = cur.fixed(); - break; - default: - index = 0; - break; - } - - // Get addr_base from CU root DIE's DW_AT_addr_base - // For now, use a simplified approach: skip the 8-byte header - // DWARF 5 .debug_addr has a header (length + version + addr_size + segment_selector_size) - auto addr_sec = cu->get_dwarf().get_section(section_type::addr); - auto cusec = cu->data(); - unsigned addr_size = cusec->addr_size; - - // The .debug_addr header is: - // - 4/12 bytes: unit length (4 for 32-bit DWARF, 12 for 64-bit) - // - 2 bytes: version - // - 1 byte: address_size - // - 1 byte: segment_selector_size - // For 32-bit DWARF, total header is 8 bytes - section_offset header_size = 8; - - cursor addr_cur(addr_sec, header_size + index * addr_size); - return addr_cur.address(); - } - - default: + if (form != DW_FORM::addr) throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); - } + + cursor cur(cu->data(), offset); + return cur.address(); } const void * From 0f1143af4f0a6d939a07c9a5219e67b299abb17b Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Mon, 12 Jan 2026 11:48:24 -0600 Subject: [PATCH 10/10] More Dwarf v5 support. --- dwarf/data.hh | 16 ++++++ dwarf/dwarf++.hh | 16 ++++-- dwarf/dwarf.cc | 33 ++++++++++- dwarf/elf.cc | 24 ++++---- dwarf/rangelist.cc | 135 ++++++++++++++++++++++++++++++++++----------- dwarf/to_string.cc | 17 ++++++ dwarf/value.cc | 108 +++++++++++++++++++++++++++++++++--- 7 files changed, 293 insertions(+), 56 deletions(-) diff --git a/dwarf/data.hh b/dwarf/data.hh index 5ba1345..cd32770 100644 --- a/dwarf/data.hh +++ b/dwarf/data.hh @@ -595,6 +595,22 @@ enum class DW_LNCT std::string to_string(DW_LNCT v); +// Range list entry encodings (DWARF5 section 7.25) +enum class DW_RLE : ubyte +{ + end_of_list = 0x00, + base_addressx = 0x01, + startx_endx = 0x02, + startx_length = 0x03, + offset_pair = 0x04, + base_address = 0x05, + start_end = 0x06, + start_length = 0x07, +}; + +std::string +to_string(DW_RLE v); + DWARFPP_END_NAMESPACE #endif diff --git a/dwarf/dwarf++.hh b/dwarf/dwarf++.hh index 01db15f..0d282ef 100644 --- a/dwarf/dwarf++.hh +++ b/dwarf/dwarf++.hh @@ -82,6 +82,7 @@ public: enum class section_type { abbrev, + addr, // DWARF 5 .debug_addr aranges, frame, info, @@ -92,6 +93,7 @@ enum class section_type pubnames, pubtypes, ranges, + rnglists, // DWARF 5 .debug_rnglists str, str_offsets, types, @@ -920,10 +922,11 @@ public: * the associated compilation unit. cu_low_pc is the * DW_AT::low_pc attribute of the compilation unit containing * the referring DIE or 0 (this is used as the base address of - * the range list). + * the range list). is_dwarf5 indicates whether this uses + * DWARF 5 format (DW_RLE_* encodings). */ rangelist(const std::shared_ptr
&sec, section_offset off, - unsigned cu_addr_size, taddr cu_low_pc); + unsigned cu_addr_size, taddr cu_low_pc, bool is_dwarf5 = false); /** * Construct a range list from a sequence of {low, high} @@ -972,6 +975,7 @@ private: std::vector synthetic; std::shared_ptr
sec; taddr base_addr; + bool is_dwarf5; }; /** @@ -1000,14 +1004,15 @@ public: /** * \internal Construct an end iterator. */ - iterator() : sec(nullptr), base_addr(0), pos(0) { } + iterator() : sec(nullptr), base_addr(0), pos(0), is_dwarf5(false) { } /** * \internal Construct an iterator that reads rangelist data * from the beginning of the given section and starts with the - * given base address. + * given base address. is_dwarf5 indicates whether to use + * DWARF 5 format parsing (DW_RLE_* encodings). */ - iterator(const std::shared_ptr
&sec, taddr base_addr); + iterator(const std::shared_ptr
&sec, taddr base_addr, bool is_dwarf5 = false); /** Copy constructor */ iterator(const iterator &o) = default; @@ -1056,6 +1061,7 @@ private: taddr base_addr; section_offset pos; rangelist::entry entry; + bool is_dwarf5; }; ////////////////////////////////////////////////////////////////// diff --git a/dwarf/dwarf.cc b/dwarf/dwarf.cc index d763295..2ca56c6 100644 --- a/dwarf/dwarf.cc +++ b/dwarf/dwarf.cc @@ -122,7 +122,38 @@ dwarf::get_section(section_type type) const if (!data) throw format_error(std::string(elf::section_type_to_name(type)) + " section missing"); - m->sections[type] = std::make_shared
(section_type::str, data, size, m->sec_info->ord); + + // Determine format for auxiliary sections. + // DWARF 5 sections like str_offsets and addr have headers with initial length. + // Simple sections like str and line_str are just raw string data. + format fmt = format::unknown; + if ((type == section_type::str_offsets || type == section_type::addr) && size >= 4) { + // .debug_str_offsets and .debug_addr have headers starting with initial length + uint32_t initial_length = *reinterpret_cast(data); + if (initial_length == 0xffffffff) { + fmt = format::dwarf64; + } else { + fmt = format::dwarf32; + } + } else if (type == section_type::str || type == section_type::line_str) { + // String sections don't need format - they're just null-terminated strings. + // Use dwarf32 as default since cursor operations don't use format for strings. + fmt = format::dwarf32; + } else { + // For other sections, try to detect format from initial length if present + if (size >= 4) { + uint32_t initial_length = *reinterpret_cast(data); + if (initial_length == 0xffffffff) { + fmt = format::dwarf64; + } else if (initial_length < 0xfffffff0) { + fmt = format::dwarf32; + } + // If initial_length is a reserved value, leave format unknown + } + } + + m->sections[type] = std::make_shared
(type, data, size, + m->sec_info->ord, fmt); return m->sections[type]; } diff --git a/dwarf/elf.cc b/dwarf/elf.cc index 5fd7a20..1ed0134 100644 --- a/dwarf/elf.cc +++ b/dwarf/elf.cc @@ -15,17 +15,19 @@ static const struct const char *name; section_type type; } sections[] = { - {".debug_abbrev", section_type::abbrev}, - {".debug_aranges", section_type::aranges}, - {".debug_frame", section_type::frame}, - {".debug_info", section_type::info}, - {".debug_line", section_type::line}, - {".debug_line_str", section_type::line_str}, - {".debug_loc", section_type::loc}, - {".debug_macinfo", section_type::macinfo}, - {".debug_pubnames", section_type::pubnames}, - {".debug_pubtypes", section_type::pubtypes}, - {".debug_ranges", section_type::ranges}, + {".debug_abbrev", section_type::abbrev}, + {".debug_addr", section_type::addr}, + {".debug_aranges", section_type::aranges}, + {".debug_frame", section_type::frame}, + {".debug_info", section_type::info}, + {".debug_line", section_type::line}, + {".debug_line_str", section_type::line_str}, + {".debug_loc", section_type::loc}, + {".debug_macinfo", section_type::macinfo}, + {".debug_pubnames", section_type::pubnames}, + {".debug_pubtypes", section_type::pubtypes}, + {".debug_ranges", section_type::ranges}, + {".debug_rnglists", section_type::rnglists}, {".debug_str", section_type::str}, {".debug_str_offsets", section_type::str_offsets}, {".debug_types", section_type::types}, diff --git a/dwarf/rangelist.cc b/dwarf/rangelist.cc index f9eb8e9..9853b1f 100644 --- a/dwarf/rangelist.cc +++ b/dwarf/rangelist.cc @@ -9,13 +9,15 @@ using namespace std; DWARFPP_BEGIN_NAMESPACE rangelist::rangelist(const std::shared_ptr
&sec, section_offset off, - unsigned cu_addr_size, taddr cu_low_pc) + unsigned cu_addr_size, taddr cu_low_pc, bool is_dwarf5) : sec(sec->slice(off, ~0, format::unknown, cu_addr_size)), - base_addr(cu_low_pc) + base_addr(cu_low_pc), + is_dwarf5(is_dwarf5) { } rangelist::rangelist(const initializer_list > &ranges) + : is_dwarf5(false) { synthetic.reserve(ranges.size() * 2 + 2); for (auto &range : ranges) { @@ -37,7 +39,7 @@ rangelist::iterator rangelist::begin() const { if (sec) - return iterator(sec, base_addr); + return iterator(sec, base_addr, is_dwarf5); return end(); } @@ -56,8 +58,8 @@ rangelist::contains(taddr addr) const return false; } -rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_addr) - : sec(sec), base_addr(base_addr), pos(0) +rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_addr, bool is_dwarf5) + : sec(sec), base_addr(base_addr), pos(0), is_dwarf5(is_dwarf5) { // Read in the first entry ++(*this); @@ -66,34 +68,103 @@ rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_ad rangelist::iterator & rangelist::iterator::operator++() { - // DWARF4 section 2.17.3 - taddr largest_offset = ~(taddr)0; - if (sec->addr_size < sizeof(taddr)) - largest_offset += 1 << (8 * sec->addr_size); - - // Read in entries until we reach a regular entry of an - // end-of-list. Note that pos points to the beginning of the - // entry *following* the current entry, so that's where we - // start. cursor cur(sec, pos); - while (true) { - entry.low = cur.address(); - entry.high = cur.address(); - - if (entry.low == 0 && entry.high == 0) { - // End of list - sec.reset(); - pos = 0; - break; - } else if (entry.low == largest_offset) { - // Base address change - base_addr = entry.high; - } else { - // Regular entry. Adjust by base address. - entry.low += base_addr; - entry.high += base_addr; - pos = cur.get_section_offset(); - break; + + if (is_dwarf5) { + // DWARF 5 range list entries (Section 2.17.3) + while (true) { + if (cur.end()) { + sec.reset(); + pos = 0; + return *this; + } + + DW_RLE rle = (DW_RLE)cur.fixed(); + + switch (rle) { + case DW_RLE::end_of_list: + sec.reset(); + pos = 0; + return *this; + + case DW_RLE::base_addressx: + // Index into .debug_addr - for now, skip this + cur.uleb128(); + break; + + case DW_RLE::startx_endx: + // Both start and end are indices into .debug_addr + cur.uleb128(); + cur.uleb128(); + // Skip for now - would need .debug_addr lookup + break; + + case DW_RLE::startx_length: + // Start is index, length is ULEB128 + cur.uleb128(); + cur.uleb128(); + // Skip for now - would need .debug_addr lookup + break; + + case DW_RLE::offset_pair: + // Two ULEB128 offsets from base address + entry.low = base_addr + cur.uleb128(); + entry.high = base_addr + cur.uleb128(); + pos = cur.get_section_offset(); + return *this; + + case DW_RLE::base_address: + // New base address (full address) + base_addr = cur.address(); + break; + + case DW_RLE::start_end: + // Two full addresses + entry.low = cur.address(); + entry.high = cur.address(); + pos = cur.get_section_offset(); + return *this; + + case DW_RLE::start_length: + // Full address + ULEB128 length + entry.low = cur.address(); + entry.high = entry.low + cur.uleb128(); + pos = cur.get_section_offset(); + return *this; + + default: + throw format_error("unknown DW_RLE encoding " + to_string(rle)); + } + } + } else { + // DWARF 4 section 2.17.3 + taddr largest_offset = ~(taddr)0; + if (sec->addr_size < sizeof(taddr)) + largest_offset += 1 << (8 * sec->addr_size); + + // Read in entries until we reach a regular entry or an + // end-of-list. Note that pos points to the beginning of the + // entry *following* the current entry, so that's where we + // start. + while (true) { + entry.low = cur.address(); + entry.high = cur.address(); + + if (entry.low == 0 && entry.high == 0) { + // End of list + sec.reset(); + pos = 0; + break; + } else if (entry.low == largest_offset) { + // Base address change + base_addr = entry.high; + } else { + // Regular entry. Adjust by base address. + entry.low += base_addr; + entry.high += base_addr; + pos = cur.get_section_offset(); + break; + } } } diff --git a/dwarf/to_string.cc b/dwarf/to_string.cc index 57784f5..cd9c6c3 100644 --- a/dwarf/to_string.cc +++ b/dwarf/to_string.cc @@ -10,6 +10,7 @@ to_string(section_type v) { switch (v) { case section_type::abbrev: return "section_type::abbrev"; + case section_type::addr: return "section_type::addr"; case section_type::aranges: return "section_type::aranges"; case section_type::frame: return "section_type::frame"; case section_type::info: return "section_type::info"; @@ -574,4 +575,20 @@ to_string(DW_LNCT v) return "(DW_LNCT)0x" + to_hex((int)v); } +std::string +to_string(DW_RLE v) +{ + switch (v) { + case DW_RLE::end_of_list: return "DW_RLE_end_of_list"; + case DW_RLE::base_addressx: return "DW_RLE_base_addressx"; + case DW_RLE::startx_endx: return "DW_RLE_startx_endx"; + case DW_RLE::startx_length: return "DW_RLE_startx_length"; + case DW_RLE::offset_pair: return "DW_RLE_offset_pair"; + case DW_RLE::base_address: return "DW_RLE_base_address"; + case DW_RLE::start_end: return "DW_RLE_start_end"; + case DW_RLE::start_length: return "DW_RLE_start_length"; + } + return "(DW_RLE)0x" + to_hex((int)v); +} + DWARFPP_END_NAMESPACE diff --git a/dwarf/value.cc b/dwarf/value.cc index 95e7708..5693026 100644 --- a/dwarf/value.cc +++ b/dwarf/value.cc @@ -31,11 +31,48 @@ value::get_section_offset() const taddr value::as_address() const { - if (form != DW_FORM::addr) + cursor cur(cu->data(), offset); + + if (form == DW_FORM::addr) { + return cur.address(); + } + + // DWARF 5 address index forms + uint64_t index; + switch (form) { + case DW_FORM::addrx: + index = cur.uleb128(); + break; + case DW_FORM::addrx1: + index = cur.fixed(); + break; + case DW_FORM::addrx2: + index = cur.fixed(); + break; + case DW_FORM::addrx3: + index = cur.fixed() | (cur.fixed() << 8); + break; + case DW_FORM::addrx4: + index = cur.fixed(); + break; + default: throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); + } - cursor cur(cu->data(), offset); - return cur.address(); + // Look up address in .debug_addr section + // DWARF 5 .debug_addr has a header: length (4 or 12 bytes), version (2), addr_size (1), segment_selector_size (1) + auto addr_sec = cu->get_dwarf().get_section(section_type::addr); + section_offset header_size = 8; // Simplified: assume 32-bit DWARF (4 + 2 + 1 + 1) + unsigned addr_size = cu->data()->addr_size; + cursor addr_cur(addr_sec, header_size + index * addr_size); + // Read address directly using the CU's addr_size (not the section's) + if (addr_size == 4) { + return addr_cur.fixed(); + } else if (addr_size == 8) { + return addr_cur.fixed(); + } else { + throw format_error("unsupported address size " + std::to_string(addr_size)); + } } const void * @@ -154,17 +191,74 @@ value::as_flag() const rangelist value::as_rangelist() const { - section_offset off = as_sec_offset(); - // The compilation unit may not have a base address. In this // case, the first entry in the range list must be a base // address entry, but we'll just assume 0 for the initial base // address. die cudie = cu->root(); taddr cu_low_pc = cudie.has(DW_AT::low_pc) ? at_low_pc(cudie) : 0; - auto sec = cu->get_dwarf().get_section(section_type::ranges); auto cusec = cu->data(); - return rangelist(sec, off, cusec->addr_size, cu_low_pc); + + // DWARF 5 uses rnglistx form with .debug_rnglists section + if (form == DW_FORM::rnglistx) { + cursor cur(cu->data(), offset); + uint64_t index = cur.uleb128(); + + // Get .debug_rnglists section + auto rnglists_sec = cu->get_dwarf().get_section(section_type::rnglists); + + // Parse the rnglists header to find the offsets table + // Header format: unit_length (4/12), version (2), addr_size (1), + // segment_selector_size (1), offset_entry_count (4) + cursor hdr(rnglists_sec, (section_offset)0); + + // Read unit length to determine format + uint32_t unit_length32 = hdr.fixed(); + format fmt; + section_offset header_size; + if (unit_length32 == 0xffffffff) { + // 64-bit DWARF + hdr.fixed(); // actual length + fmt = format::dwarf64; + header_size = 20; // 12 + 2 + 1 + 1 + 4 + } else { + fmt = format::dwarf32; + header_size = 12; // 4 + 2 + 1 + 1 + 4 + } + + uint16_t version = hdr.fixed(); + (void)version; // Should be 5 + uint8_t addr_size = hdr.fixed(); + (void)addr_size; + uint8_t segment_selector_size = hdr.fixed(); + (void)segment_selector_size; + uint32_t offset_entry_count = hdr.fixed(); + + if (index >= offset_entry_count) { + throw format_error("rnglistx index out of bounds"); + } + + // Read the offset from the offsets table + section_offset offset_size = (fmt == format::dwarf64) ? 8 : 4; + cursor offsets_cur(rnglists_sec, header_size + index * offset_size); + section_offset range_offset; + if (fmt == format::dwarf64) { + range_offset = offsets_cur.fixed(); + } else { + range_offset = offsets_cur.fixed(); + } + + // The offset is relative to the first range list entry (after offsets table) + section_offset base_offset = header_size + offset_entry_count * offset_size; + section_offset absolute_offset = base_offset + range_offset; + + return rangelist(rnglists_sec, absolute_offset, cusec->addr_size, cu_low_pc, true); + } + + // DWARF 4 and earlier: direct offset into .debug_ranges + section_offset off = as_sec_offset(); + auto sec = cu->get_dwarf().get_section(section_type::ranges); + return rangelist(sec, off, cusec->addr_size, cu_low_pc, false); } die