From d96062a90d6727d80ac2252e7e1e6b6d02eb962f Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 22 Aug 2025 14:41:57 -0700 Subject: [PATCH 01/10] fix dwarf rebasing and mark extern inlines --- cle/backends/elf/elf.py | 16 ++++++++++------ cle/backends/elf/subprogram.py | 11 +++++++++++ cle/backends/inlined_function.py | 4 ++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 011acd4d..bb670372 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -454,6 +454,9 @@ def rebase(self, new_base): super().rebase(new_base) self.addr_to_line = SortedDict((addr + delta, value) for addr, value in self.addr_to_line.items()) + self.functions_debug_info = {addr + delta: value for addr, value in self.functions_debug_info.items()} + for f in self.functions_debug_info.values(): + f.rebase(delta) # # Private Methods @@ -698,9 +701,8 @@ def _load_line_info(self, dwarf): self.addr_to_line[relocated_addr].add((str(filename), line.state.line)) @staticmethod - def _load_ranges_form_die(die: DIE, aranges) -> list[tuple[int, int]] | None: + def _load_ranges_form_die(die: DIE, aranges, base_addr: int = 0) -> list[tuple[int, int]] | None: if aranges is not None and "DW_AT_ranges" in die.attributes: - base_addr = 0 result = [] for entry in aranges.get_range_list_at_offset(die.attributes["DW_AT_ranges"].value, die.cu): if isinstance(entry, BaseAddressEntry): @@ -711,7 +713,7 @@ def _load_ranges_form_die(die: DIE, aranges) -> list[tuple[int, int]] | None: return None @staticmethod - def _load_low_high_pc_form_die(die: DIE) -> tuple[int | None, int | None]: + def _load_low_high_pc_form_die(die: DIE, base_addr: int = 0) -> tuple[int | None, int | None]: """ Load low and high pc from a DIE. @@ -723,7 +725,7 @@ def _load_low_high_pc_form_die(die: DIE) -> tuple[int | None, int | None]: lowpc = die.attributes["DW_AT_low_pc"].value if "DW_AT_high_pc" not in die.attributes: - return lowpc, None + return lowpc + base_addr, None # DWARF v4 in section 2.17 describes how to interpret the # DW_AT_high_pc attribute based on the class of its form. @@ -739,7 +741,7 @@ def _load_low_high_pc_form_die(die: DIE) -> tuple[int | None, int | None]: else: log.warning("Error: invalid DW_AT_high_pc class:%s", highpc_attr_class) return lowpc, None - return lowpc, highpc + return lowpc + base_addr, highpc + base_addr def _load_dies(self, dwarf: DWARFInfo): """ @@ -886,12 +888,14 @@ def _load_die_lex_block( subr.ranges.append((low_pc, high_pc)) elif "DW_AT_ranges" in sub_die.attributes: aranges = dwarf.range_lists() - ranges = self._load_ranges_form_die(sub_die, aranges) + ranges = self._load_ranges_form_die(sub_die, aranges, subprogram.low_pc) if ranges is not None: subr.ranges = ranges if "DW_AT_abstract_origin" in sub_die.attributes: origin = sub_die.get_DIE_from_attribute("DW_AT_abstract_origin") subr.name = self._dwarf_get_name_with_namespace(origin) + if 'DW_AT_external' in origin.attributes: + subr.extern = origin.attributes['DW_AT_external'].value subprogram.inlined_functions.append(subr) return block diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index 570c3db0..35107791 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -35,6 +35,12 @@ def __init__(self, low_pc: int | None, high_pc: int | None, ranges: list[tuple[i self.high_pc = high_pc self.child_blocks: list[LexicalBlock] = [] + def rebase(self, delta: int): + self.low_pc += delta + self.high_pc += delta + for blk in self.child_blocks: + blk.rebase(delta) + class Subprogram(LexicalBlock): """ @@ -60,3 +66,8 @@ def __init__( self.name = name self.local_variables: list[Variable] = [] self.inlined_functions: list[InlinedFunction] = [] + + def rebase(self, delta: int): + super().rebase(delta) + for inl in self.inlined_functions: + inl.rebase(delta) diff --git a/cle/backends/inlined_function.py b/cle/backends/inlined_function.py index f4dd86b1..4d520618 100644 --- a/cle/backends/inlined_function.py +++ b/cle/backends/inlined_function.py @@ -11,6 +11,7 @@ class InlinedFunction: name: str | None = None ranges: list[tuple[int, int]] = field(default_factory=list) + extern: bool = False @property def low_pc(self): @@ -19,3 +20,6 @@ def low_pc(self): @property def high_pc(self): return max(x for _, x in self.ranges) + + def rebase(self, delta: int): + self.ranges = [(lo + delta, hi + delta) for lo, hi in self.ranges] From 570211cde3c3cbe3cc13c7ab814d6ab3000da7b9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Aug 2025 21:43:39 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cle/backends/elf/elf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index bb670372..f046fc9b 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -894,8 +894,8 @@ def _load_die_lex_block( if "DW_AT_abstract_origin" in sub_die.attributes: origin = sub_die.get_DIE_from_attribute("DW_AT_abstract_origin") subr.name = self._dwarf_get_name_with_namespace(origin) - if 'DW_AT_external' in origin.attributes: - subr.extern = origin.attributes['DW_AT_external'].value + if "DW_AT_external" in origin.attributes: + subr.extern = origin.attributes["DW_AT_external"].value subprogram.inlined_functions.append(subr) return block From 7917daac51494af9dec0cdeea72406c25a435b11 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 22 Aug 2025 15:30:47 -0700 Subject: [PATCH 03/10] ELF: Add source file and line tracking for subprograms --- cle/backends/elf/elf.py | 26 ++++++++++++++++++++++---- cle/backends/elf/subprogram.py | 19 ++++++++++++++++--- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index f046fc9b..dbb86f75 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -849,14 +849,32 @@ def _load_die_lex_block( subprogram, namespace: list[str] | None = None, ) -> LexicalBlock | None: + if "DW_AT_abstract_origin" in die.attributes: + origin = cu.get_DIE_from_refaddr(cu.cu_offset + die.attributes["DW_AT_abstract_origin"].value) + else: + origin = None + if "DW_AT_name" in die.attributes: name = "::".join((namespace or []) + [die.attributes["DW_AT_name"].value.decode("utf-8")]) - elif "DW_AT_abstract_origin" in die.attributes: - origin = die.get_DIE_from_attribute("DW_AT_abstract_origin") + elif origin is not None: name = self._dwarf_get_name_with_namespace(origin) else: name = None + if "DW_AT_decl_file" in die.attributes: + filename = die.attributes["DW_AT_decl_file"].value + elif origin is not None: + filename = origin.attributes["DW_AT_decl_file"].value + else: + filename = None + + if "DW_AT_decl_line" in die.attributes: + line = die.attributes["DW_AT_decl_line"].value + elif origin is not None: + line = origin.attributes["DW_AT_decl_line"].value + else: + line = None + low_pc, high_pc = self._load_low_high_pc_form_die(die) ranges = None if low_pc is None or high_pc is None: @@ -865,9 +883,9 @@ def _load_die_lex_block( return None if subprogram is None: - subprogram = block = Subprogram(name, low_pc, high_pc, ranges) + subprogram = block = Subprogram(name, low_pc, high_pc, ranges, filename, line) else: - block = LexicalBlock(low_pc, high_pc, ranges) + block = LexicalBlock(low_pc, high_pc, ranges, filename, line) for sub_die in cu.iter_DIE_children(die): if sub_die.tag in ["DW_TAG_variable", "DW_TAG_formal_parameter"]: diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index 35107791..b7fd5213 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -22,7 +22,14 @@ class LexicalBlock: :type child_blocks: List[LexicalBlock] """ - def __init__(self, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None) -> None: + def __init__( + self, + low_pc: int | None, + high_pc: int | None, + ranges: list[tuple[int, int]] | None = None, + source_file: str | None = None, + source_line: int | None = None, + ) -> None: self.ranges = ranges if low_pc is None and high_pc is None: @@ -58,11 +65,17 @@ class Subprogram(LexicalBlock): """ def __init__( - self, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None + self, + name: str | None, + low_pc: int | None, + high_pc: int | None, + ranges: list[tuple[int, int]] | None = None, + source_file: str | None = None, + source_line: int | None = None, ) -> None: # pass self as the super_block of this subprogram self.subprogram = self - super().__init__(low_pc, high_pc, ranges) + super().__init__(low_pc, high_pc, ranges, source_file, source_line) self.name = name self.local_variables: list[Variable] = [] self.inlined_functions: list[InlinedFunction] = [] From c94438c58c0119ed14f079758bf13ab322164f13 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 22 Aug 2025 15:32:38 -0700 Subject: [PATCH 04/10] lmao oops --- cle/backends/elf/subprogram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index b7fd5213..215d2972 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -31,6 +31,8 @@ def __init__( source_line: int | None = None, ) -> None: self.ranges = ranges + self.source_file = source_file + self.source_line = source_line if low_pc is None and high_pc is None: if ranges is not None: From c2fc4ecc76cca9f9e5a9041e04184b3dccf6f7cc Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Thu, 4 Sep 2025 11:28:00 -0700 Subject: [PATCH 05/10] augh --- cle/backends/elf/elf.py | 6 ++++-- cle/backends/inlined_function.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index dbb86f75..5cd3c50a 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -863,14 +863,14 @@ def _load_die_lex_block( if "DW_AT_decl_file" in die.attributes: filename = die.attributes["DW_AT_decl_file"].value - elif origin is not None: + elif origin is not None and "DW_AT_decl_file" in origin.attributes: filename = origin.attributes["DW_AT_decl_file"].value else: filename = None if "DW_AT_decl_line" in die.attributes: line = die.attributes["DW_AT_decl_line"].value - elif origin is not None: + elif origin is not None and "DW_AT_decl_line" in origin.attributes: line = origin.attributes["DW_AT_decl_line"].value else: line = None @@ -902,6 +902,8 @@ def _load_die_lex_block( elif sub_die.tag == "DW_TAG_inlined_subroutine": subr = InlinedFunction() low_pc, high_pc = self._load_low_high_pc_form_die(sub_die) + if "DW_AT_entry_pc" in sub_die.attributes: + subr.entry = sub_die.attributes["DW_AT_entry_pc"].value if low_pc is not None and high_pc is not None: subr.ranges.append((low_pc, high_pc)) elif "DW_AT_ranges" in sub_die.attributes: diff --git a/cle/backends/inlined_function.py b/cle/backends/inlined_function.py index 4d520618..dec19563 100644 --- a/cle/backends/inlined_function.py +++ b/cle/backends/inlined_function.py @@ -12,6 +12,7 @@ class InlinedFunction: name: str | None = None ranges: list[tuple[int, int]] = field(default_factory=list) extern: bool = False + entry: int | None = None @property def low_pc(self): From 807e96ad2f2eb47a9abfd371e03be1cb30e9c94a Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Thu, 4 Sep 2025 11:29:24 -0700 Subject: [PATCH 06/10] more dwarf precision --- cle/backends/elf/elf.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 5cd3c50a..fc07e4aa 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -862,9 +862,21 @@ def _load_die_lex_block( name = None if "DW_AT_decl_file" in die.attributes: - filename = die.attributes["DW_AT_decl_file"].value + filename_idx = die.attributes["DW_AT_decl_file"].value elif origin is not None and "DW_AT_decl_file" in origin.attributes: - filename = origin.attributes["DW_AT_decl_file"].value + filename_idx = origin.attributes["DW_AT_decl_file"].value + else: + filename_idx = None + + if filename_idx is not None: + debug_line = dwarf.line_program_for_CU(cu) + assert debug_line is not None + basename = debug_line.header.file_names[filename_idx] + basename_str = basename.DW_LNCT_path.decode(errors="replace") + dirname_idx = basename.DW_LNCT_directory_index + dirname = debug_line.header.directories[dirname_idx] + dirname_str = dirname.DW_LNCT_path.decode(errors="replace") + filename = f"{dirname_str}/{basename_str}" else: filename = None From 38e43373daac03315eef848188a565768d66ec3b Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Tue, 21 Oct 2025 08:09:25 -0700 Subject: [PATCH 07/10] some nonsense idk --- cle/backends/elf/elf.py | 8 +++++++- cle/backends/elf/subprogram.py | 5 ++++- cle/backends/inlined_function.py | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index fc07e4aa..bb184f1b 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -829,7 +829,7 @@ def _load_die_namespace( if sub_prog is not None: assert isinstance(sub_prog, Subprogram) cu_.functions[sub_prog.low_pc] = sub_prog - self.functions_debug_info[sub_prog.low_pc] = sub_prog + self.functions_debug_info[sub_prog.ranges[0][0]] = sub_prog elif die_child.tag == "DW_TAG_namespace": if "DW_AT_name" in die_child.attributes: new_namespace = namespace + [die_child.attributes["DW_AT_name"].value.decode("utf-8")] @@ -928,6 +928,12 @@ def _load_die_lex_block( subr.name = self._dwarf_get_name_with_namespace(origin) if "DW_AT_external" in origin.attributes: subr.extern = origin.attributes["DW_AT_external"].value + nargs = 0 + for arg_die in origin.iter_children(): + if arg_die.tag == "DW_TAG_formal_parameter": + nargs += 1 + subr.nargs = nargs + subprogram.inlined_functions.append(subr) return block diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index 215d2972..61750290 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -30,7 +30,6 @@ def __init__( source_file: str | None = None, source_line: int | None = None, ) -> None: - self.ranges = ranges self.source_file = source_file self.source_line = source_line @@ -40,6 +39,9 @@ def __init__( high_pc = max(x for _, x in ranges) if low_pc is None or high_pc is None: raise ValueError("Must provide low_pc/high_pc or ranges") + if ranges is None: + ranges = [(low_pc, high_pc)] + self.ranges = ranges self.low_pc = low_pc self.high_pc = high_pc self.child_blocks: list[LexicalBlock] = [] @@ -47,6 +49,7 @@ def __init__( def rebase(self, delta: int): self.low_pc += delta self.high_pc += delta + self.ranges = [(lo + delta, hi + delta) for lo, hi in self.ranges] for blk in self.child_blocks: blk.rebase(delta) diff --git a/cle/backends/inlined_function.py b/cle/backends/inlined_function.py index dec19563..af585683 100644 --- a/cle/backends/inlined_function.py +++ b/cle/backends/inlined_function.py @@ -3,7 +3,7 @@ from dataclasses import dataclass, field -@dataclass +@dataclass(eq=False) class InlinedFunction: """ A representation of a piece of a function which is inlined from another function. @@ -13,6 +13,7 @@ class InlinedFunction: ranges: list[tuple[int, int]] = field(default_factory=list) extern: bool = False entry: int | None = None + nargs: int | None = None @property def low_pc(self): From d3dc17045c23cb069219a34b3a3f7509e7d02b58 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Tue, 21 Oct 2025 19:20:25 -0700 Subject: [PATCH 08/10] apparently there is an is_absolute property or somesuch --- cle/backends/elf/elf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index bb184f1b..8128b43e 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -708,7 +708,10 @@ def _load_ranges_form_die(die: DIE, aranges, base_addr: int = 0) -> list[tuple[i if isinstance(entry, BaseAddressEntry): base_addr = entry.base_address elif isinstance(entry, RangeEntry): - result.append((base_addr + entry.begin_offset, base_addr + entry.end_offset)) + if entry.is_absolute: + result.append((entry.begin_offset, entry.end_offset)) + else: + result.append((base_addr + entry.begin_offset, base_addr + entry.end_offset)) return result return None From b9edc7d299e0c9583c4862c4b2965e05e183f9a9 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Thu, 13 Nov 2025 17:42:36 -0700 Subject: [PATCH 09/10] yet more dwarf fixes --- cle/backends/elf/elf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 8128b43e..4a06ff73 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -701,7 +701,7 @@ def _load_line_info(self, dwarf): self.addr_to_line[relocated_addr].add((str(filename), line.state.line)) @staticmethod - def _load_ranges_form_die(die: DIE, aranges, base_addr: int = 0) -> list[tuple[int, int]] | None: + def _load_ranges_form_die(die: DIE, aranges, base_addr: int | None = None) -> list[tuple[int, int]] | None: if aranges is not None and "DW_AT_ranges" in die.attributes: result = [] for entry in aranges.get_range_list_at_offset(die.attributes["DW_AT_ranges"].value, die.cu): @@ -711,6 +711,8 @@ def _load_ranges_form_die(die: DIE, aranges, base_addr: int = 0) -> list[tuple[i if entry.is_absolute: result.append((entry.begin_offset, entry.end_offset)) else: + if base_addr is None: + base_addr = die.cu.get_top_DIE().attributes["DW_AT_low_pc"].value result.append((base_addr + entry.begin_offset, base_addr + entry.end_offset)) return result return None @@ -923,7 +925,7 @@ def _load_die_lex_block( subr.ranges.append((low_pc, high_pc)) elif "DW_AT_ranges" in sub_die.attributes: aranges = dwarf.range_lists() - ranges = self._load_ranges_form_die(sub_die, aranges, subprogram.low_pc) + ranges = self._load_ranges_form_die(sub_die, aranges) if ranges is not None: subr.ranges = ranges if "DW_AT_abstract_origin" in sub_die.attributes: From b87e321d3223fbf261a4e672dfe51444c44c033f Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 19 Dec 2025 13:19:26 -0700 Subject: [PATCH 10/10] fix edge cases with dwarf filenames, scan inlined functions as lex blocks, mark dwoffset of InlinedFunctions --- cle/backends/elf/elf.py | 24 +++++++++++++++++------- cle/backends/inlined_function.py | 1 + 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 4a06ff73..0160ee24 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -876,12 +876,16 @@ def _load_die_lex_block( if filename_idx is not None: debug_line = dwarf.line_program_for_CU(cu) assert debug_line is not None - basename = debug_line.header.file_names[filename_idx] - basename_str = basename.DW_LNCT_path.decode(errors="replace") - dirname_idx = basename.DW_LNCT_directory_index - dirname = debug_line.header.directories[dirname_idx] - dirname_str = dirname.DW_LNCT_path.decode(errors="replace") - filename = f"{dirname_str}/{basename_str}" + if debug_line.header.file_names is None: + assert filename_idx == 1 + filename = file_path + else: + basename = debug_line.header.file_names[filename_idx] + basename_str = basename.DW_LNCT_path.decode(errors="replace") + dirname_idx = basename.DW_LNCT_directory_index + dirname = debug_line.header.directories[dirname_idx] + dirname_str = dirname.DW_LNCT_path.decode(errors="replace") + filename = f"{dirname_str}/{basename_str}" else: filename = None @@ -917,7 +921,7 @@ def _load_die_lex_block( if sub_block is not None: block.child_blocks.append(sub_block) elif sub_die.tag == "DW_TAG_inlined_subroutine": - subr = InlinedFunction() + subr = InlinedFunction(sub_die.offset) low_pc, high_pc = self._load_low_high_pc_form_die(sub_die) if "DW_AT_entry_pc" in sub_die.attributes: subr.entry = sub_die.attributes["DW_AT_entry_pc"].value @@ -941,6 +945,12 @@ def _load_die_lex_block( subprogram.inlined_functions.append(subr) + sub_block = self._load_die_lex_block( + sub_die, dwarf, aranges, expr_parser, type_list, cu, file_path, subprogram, namespace + ) + if sub_block is not None: + block.child_blocks.append(sub_block) + return block @staticmethod diff --git a/cle/backends/inlined_function.py b/cle/backends/inlined_function.py index af585683..4345c5aa 100644 --- a/cle/backends/inlined_function.py +++ b/cle/backends/inlined_function.py @@ -9,6 +9,7 @@ class InlinedFunction: A representation of a piece of a function which is inlined from another function. """ + dwoffset: int name: str | None = None ranges: list[tuple[int, int]] = field(default_factory=list) extern: bool = False