From 1e9491e9fd7ca109e8e8d62c2c63837b05f18744 Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Sun, 8 Jun 2025 02:43:53 +0200
Subject: [PATCH 01/11] Added failing test for issue #348

---
 tests/test_line_profiler.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py
index 954c77b1..40ec3199 100644
--- a/tests/test_line_profiler.py
+++ b/tests/test_line_profiler.py
@@ -781,3 +781,37 @@ def sum_n_cb(n):
     assert t2['sum_n'][2][1] == n
     assert t1['sum_n_sq'][2][1] == n
     assert t2['sum_n_cb'][2][1] == n
+
+
+def test_duplicate_code_objects():
+    """
+    Test that results are correctly aggregated between duplicate code
+    objects.
+    """
+    code = textwrap.dedent("""
+    @profile
+    def func(n):
+        x = 0
+        for n in range(1, n + 1):
+            x += n
+        return x
+    """).strip('\n')
+    profile = LineProfiler()
+    # Create and call the function once
+    namespace_1 = {'profile': profile}
+    exec(code, namespace_1)
+    assert 'func' in namespace_1
+    assert len(profile.functions) == 1
+    assert namespace_1['func'].__wrapped__ in profile.functions
+    assert namespace_1['func'](10) == 10 * 11 // 2
+    # Do it again
+    namespace_2 = {'profile': profile}
+    exec(code, namespace_2)
+    assert 'func' in namespace_2
+    assert len(profile.functions) == 2
+    assert namespace_2['func'].__wrapped__ in profile.functions
+    assert namespace_2['func'](20) == 20 * 21 // 2
+    # Check that data from both calls are aggregated
+    # (Entries are represented as tuples `(lineno, nhits, time)`)
+    entries, = profile.get_stats().timings.values()
+    assert entries[-2][1] == 10 + 20

From e5afa6300f9dedd10e5a00ae098b0c643149b84d Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Sun, 8 Jun 2025 03:30:14 +0200
Subject: [PATCH 02/11] Refactored `.get_stats()` to fix the bug

line_profiler/_line_profiler.pyx::LineProfiler.get_stats()
    Updated/simplified implementation to aggregate profiling data
    between code objects with duplicate hashes
---
 line_profiler/_line_profiler.pyx | 36 +++++++++++++-------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
index 15510317..db2ff852 100644
--- a/line_profiler/_line_profiler.pyx
+++ b/line_profiler/_line_profiler.pyx
@@ -530,35 +530,29 @@ cdef class LineProfiler:
         """
         cdef dict cmap = self._c_code_map
 
-        stats = {}
+        all_entries = {}
         for code in self.code_hash_map:
             entries = []
             for entry in self.code_hash_map[code]:
-                entries += list(cmap[entry].values())
+                entries.extend(cmap[entry].values())
             key = label(code)
 
-            # Merge duplicate line numbers, which occur for branch entrypoints like `if`
-            nhits_by_lineno = {}
-            total_time_by_lineno = {}
+            # Merge duplicate line numbers, which occur for branch
+            # entrypoints like `if`
+            entries_by_lineno = all_entries.setdefault(key, {})
 
             for line_dict in entries:
                  _, lineno, total_time, nhits = line_dict.values()
-                 nhits_by_lineno[lineno] = nhits_by_lineno.setdefault(lineno, 0) + nhits
-                 total_time_by_lineno[lineno] = total_time_by_lineno.setdefault(lineno, 0) + total_time
-
-            entries = [(lineno, nhits, total_time_by_lineno[lineno]) for lineno, nhits in nhits_by_lineno.items()]
-            entries.sort()
-
-            # NOTE: v4.x may produce more than one entry per line. For example:
-            #   1:  for x in range(10):
-            #   2:      pass
-            #  will produce a 1-hit entry on line 1, and 10-hit entries on lines 1 and 2
-            #  This doesn't affect `print_stats`, because it uses the last entry for a given line (line number is
-            #  used a dict key so earlier entries are overwritten), but to keep compatability with other tools,
-            #  let's only keep the last entry for each line
-            # Remove all but the last entry for each line
-            entries = list({e[0]: e for e in entries}.values())
-            stats[key] = entries
+                 orig_nhits, orig_total_time = entries_by_lineno.get(
+                     lineno, (0, 0))
+                 entries_by_lineno[lineno] = (orig_nhits + nhits,
+                                              orig_total_time + total_time)
+
+        # Aggregate the timing data
+        stats = {
+            key: sorted((line, nhits, time)
+                        for line, (nhits, time) in entries_by_lineno.items())
+            for key, entries_by_lineno in all_entries.items()}
         return LineStats(stats, self.timer_unit)
 
 

From 9bcc961481611b0f1aa8853de4b1df1cf4bbd4cf Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Sun, 8 Jun 2025 04:04:48 +0200
Subject: [PATCH 03/11] CHANGELOG entry

---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2abb86ed..effad2e6 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -25,6 +25,7 @@ Changes
     like class methods and properties
   * ``LineProfiler`` can now be used as a class decorator
 * FIX: Fixed line tracing for Cython code; superseded use of the legacy tracing system with ``sys.monitoring``
+* ENH: Fixed edge case where :py:meth:`LineProfiler.get_stats()` neglects data from duplicate code objects (#348)
 
 4.2.0
 ~~~~~

From c8d895b03bf6523c1fe258eb0f347d80ba574afe Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Tue, 10 Jun 2025 08:01:30 +0200
Subject: [PATCH 04/11] Consistent byte-code padding between instances

line_profiler/_line_profiler.pyx::LineProfiler
    _all_instances_by_bcodes
        New private class attribute for keeping track of:
        - Instances profiling the same bytecode
        - Count for how the the bytecode should be padded
    dupes_map
        Now containing the up-to-date (instead of pre-padding) code objs
    add_function()
        Refactored implementation to:
        - Pad non-Cython bytecodes according to
          `._all_instances_by_bcodes` instead of the instance's
          `.dupes_map`
        - Use `.dupes_map` to identify profiler instances profiling the
          same functions and update their hash tables
        - Curate `.dupes_map` on said instances to stay up-to-date
---
 line_profiler/_line_profiler.pyx | 82 +++++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 18 deletions(-)

diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
index db2ff852..75ec13ee 100644
--- a/line_profiler/_line_profiler.pyx
+++ b/line_profiler/_line_profiler.pyx
@@ -26,6 +26,7 @@ import threading
 import opcode
 import os
 import types
+from weakref import WeakSet
 
 NOP_VALUE: int = opcode.opmap['NOP']
 
@@ -318,8 +319,12 @@ cdef class LineProfiler:
     cdef public double timer_unit
     cdef public object threaddata
 
-    # This is shared between instances and threads
+    # These are shared between instances and threads
+    # type: dict[int, set[LineProfiler]], int = thread id
     _all_active_instances = {}
+    # type: dict[bytes, tuple[int, weakref.WeakSet[LineProfiler]]]
+    # Wher bytes = bytecode, int = padding length
+    _all_instances_by_bcodes = {}
 
     def __init__(self, *functions):
         self.functions = []
@@ -353,22 +358,57 @@ cdef class LineProfiler:
                 warnings.warn("Could not extract a code object for the object %r" % (func,))
                 return
 
+        # Note: if we are to alter the code object, other profilers
+        # which previously added this function would still expect the
+        # old bytecode, and thus will not see anything when the function
+        # is executed;
+        # hence:
+        # - When doing bytecode padding, take into account all instances
+        #   which refers to the same base bytecode to ensure
+        #   disambiguation
+        # - Update all existing instances referring to the old code
+        #   object
+        # Since no code padding is/can be done with Cython mock
+        # "code objects", it is *probably* okay to only do the special
+        # handling on the non-Cython branch.
+        # XXX: tests for the above assertion if necessary
+        original_code_obj = code
+        original_bcode = code.co_code
+        instances_to_update = {self}
         code_hashes = []
-        if any(code.co_code):  # Normal Python functions
-            if code.co_code in self.dupes_map:
-                self.dupes_map[code.co_code] += [code]
-                # code hash already exists, so there must be a duplicate
-                # function. add no-op
-                co_padding : bytes = NOP_BYTES * (len(self.dupes_map[code.co_code]) + 1)
-                co_code = code.co_code + co_padding
-                CodeType = type(code)
+        if any(original_bcode):  # Normal Python functions
+            try:
+                npad, neighbors = self._all_instances_by_bcodes[original_bcode]
+                neighbors.add(self)
+            except KeyError:
+                npad = 0
+                neighbors = WeakSet({self})
+                self._all_instances_by_bcodes[original_bcode] = (0, neighbors)
+            if npad:
+                # Code hash already exists, so there must be a duplicate
+                # function (on some instance);
+                # add no-op
+                co_code = original_bcode + NOP_BYTES * npad
                 code = _code_replace(func, co_code=co_code)
                 try:
                     func.__code__ = code
                 except AttributeError as e:
                     func.__func__.__code__ = code
-            else:
-                self.dupes_map[code.co_code] = [code]
+            try:
+                self.dupes_map[original_bcode].append(code)
+            except KeyError:
+                self.dupes_map[original_bcode] = [code]
+            # If we padded the bytecode, identify other instances
+            # profiling the same CODE OBJECT, replacing the original
+            # code object with the new one and marking those instances
+            # for further updates
+            if npad:
+                for instance in neighbors:
+                    code_objs = instance.dupes_map[original_bcode]
+                    for i, code_obj in enumerate(code_objs):
+                        if code_obj is original_code_obj:
+                            instances_to_update.add(instance)
+                            code_objs[i] = code
             # TODO: Since each line can be many bytecodes, this is kinda
             # inefficient
             # See if this can be sped up by not needing to iterate over
@@ -400,13 +440,19 @@ cdef class LineProfiler:
             # We can't replace the code object on Cython functions, but
             # we can *store* a copy with the correct metadata
             code = code.replace(co_filename=cython_source)
-        for code_hash in code_hashes:
-            if not self._c_code_map.count(code_hash):
-                try:
-                    self.code_hash_map[code].append(code_hash)
-                except KeyError:
-                    self.code_hash_map[code] = [code_hash]
-                self._c_code_map[code_hash]
+        # Update `._c_code_map` and `.code_hash_map` with the new line
+        # hashes on `self` and other instances profiling the same func
+        for instance in instances_to_update:
+            prof = <LineProfiler>instance
+            try:
+                line_hashes = prof.code_hash_map[code]
+            except KeyError:
+                line_hashes = prof.code_hash_map[code] = []
+            for code_hash in code_hashes:
+                line_hash = <int64>code_hash
+                if not prof._c_code_map.count(line_hash):
+                    line_hashes.append(line_hash)
+                    prof._c_code_map[line_hash]
 
         self.functions.append(func)
 

From cdf0c2e2f48fecb9dbb5d8fc622f4b773187da4e Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Tue, 10 Jun 2025 08:50:43 +0200
Subject: [PATCH 05/11] Follow-up refactoring

line_profiler/_line_profiler.pyx::LineProfiler
    add_function()
        - Now identifying update targets based on function-object
          identity instead of code-object identity (because different
          function objects can share the same code object)
        - Simplified implementation
    _all_instances_by_bcodes
        Superseded by other private class attributes:
        - `._all_paddings`:
          mapping from bytecodes to padding length
        - `._all_instances_by_funcs`:
          mapping from function identity to `WeakSet`s of instances
---
 line_profiler/_line_profiler.pyx | 62 +++++++++++++++-----------------
 1 file changed, 29 insertions(+), 33 deletions(-)

diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
index 75ec13ee..28818286 100644
--- a/line_profiler/_line_profiler.pyx
+++ b/line_profiler/_line_profiler.pyx
@@ -322,9 +322,10 @@ cdef class LineProfiler:
     # These are shared between instances and threads
     # type: dict[int, set[LineProfiler]], int = thread id
     _all_active_instances = {}
-    # type: dict[bytes, tuple[int, weakref.WeakSet[LineProfiler]]]
-    # Wher bytes = bytecode, int = padding length
-    _all_instances_by_bcodes = {}
+    # type: dict[bytes, int], bytes = bytecode
+    _all_paddings = {}
+    # type: dict[int, weakref.WeakSet[LineProfiler]], int = func id
+    _all_instances_by_funcs = {}
 
     def __init__(self, *functions):
         self.functions = []
@@ -350,9 +351,11 @@ cdef class LineProfiler:
             )
         try:
             code = func.__code__
+            func_id = id(func)
         except AttributeError:
             try:
                 code = func.__func__.__code__
+                func_id = id(func.__func__)
             except AttributeError:
                 import warnings
                 warnings.warn("Could not extract a code object for the object %r" % (func,))
@@ -372,52 +375,44 @@ cdef class LineProfiler:
         # "code objects", it is *probably* okay to only do the special
         # handling on the non-Cython branch.
         # XXX: tests for the above assertion if necessary
-        original_code_obj = code
-        original_bcode = code.co_code
-        instances_to_update = {self}
+        co_code: bytes = code.co_code
         code_hashes = []
-        if any(original_bcode):  # Normal Python functions
+        if any(co_code):  # Normal Python functions
             try:
-                npad, neighbors = self._all_instances_by_bcodes[original_bcode]
-                neighbors.add(self)
+                npad = self._all_paddings[co_code]
+                self._all_paddings[co_code] = npad + 1
             except KeyError:
                 npad = 0
-                neighbors = WeakSet({self})
-                self._all_instances_by_bcodes[original_bcode] = (0, neighbors)
+                self._all_paddings[co_code] = 1
+            try:
+                neighbors = self._all_instances_by_funcs[func_id]
+                neighbors.add(self)
+            except KeyError:
+                neighbors = self._all_instances_by_funcs[func_id] = WeakSet({self})
+            # Maintain `.dupes_map` (legacy)
+            try:
+                self.dupes_map[co_code].append(code)
+            except KeyError:
+                self.dupes_map[co_code] = [code]
             if npad:
                 # Code hash already exists, so there must be a duplicate
                 # function (on some instance);
                 # add no-op
-                co_code = original_bcode + NOP_BYTES * npad
+                co_code += NOP_BYTES * npad
                 code = _code_replace(func, co_code=co_code)
                 try:
                     func.__code__ = code
                 except AttributeError as e:
                     func.__func__.__code__ = code
-            try:
-                self.dupes_map[original_bcode].append(code)
-            except KeyError:
-                self.dupes_map[original_bcode] = [code]
-            # If we padded the bytecode, identify other instances
-            # profiling the same CODE OBJECT, replacing the original
-            # code object with the new one and marking those instances
-            # for further updates
-            if npad:
-                for instance in neighbors:
-                    code_objs = instance.dupes_map[original_bcode]
-                    for i, code_obj in enumerate(code_objs):
-                        if code_obj is original_code_obj:
-                            instances_to_update.add(instance)
-                            code_objs[i] = code
             # TODO: Since each line can be many bytecodes, this is kinda
             # inefficient
             # See if this can be sped up by not needing to iterate over
             # every byte
-            for offset, _ in enumerate(code.co_code):
-                code_hash = compute_line_hash(
-                    hash((code.co_code)),
-                    PyCode_Addr2Line(<PyCodeObject*>code, offset))
-                code_hashes.append(code_hash)
+            for offset, _ in enumerate(co_code):
+                code_hashes.append(
+                    compute_line_hash(
+                        hash(co_code),
+                        PyCode_Addr2Line(<PyCodeObject*>code, offset)))
         else:  # Cython functions have empty/zero bytecodes
             if CANNOT_LINE_TRACE_CYTHON:
                 return
@@ -440,9 +435,10 @@ cdef class LineProfiler:
             # We can't replace the code object on Cython functions, but
             # we can *store* a copy with the correct metadata
             code = code.replace(co_filename=cython_source)
+            neighbors = {self}
         # Update `._c_code_map` and `.code_hash_map` with the new line
         # hashes on `self` and other instances profiling the same func
-        for instance in instances_to_update:
+        for instance in neighbors:
             prof = <LineProfiler>instance
             try:
                 line_hashes = prof.code_hash_map[code]

From 703dd9740b080ded734d920e3239648989ad5b23 Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Tue, 10 Jun 2025 13:48:52 +0200
Subject: [PATCH 06/11] Test for decoration by multiple profilers

tests/test_line_profiler.py
    test_multiple_profilers_identical_bytecode()
        New test for applying multiple profilers to identical functions
        in arbitrary order (one subtest currently failing)
---
 tests/test_line_profiler.py | 152 ++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py
index 40ec3199..6aafbe50 100644
--- a/tests/test_line_profiler.py
+++ b/tests/test_line_profiler.py
@@ -815,3 +815,155 @@ def func(n):
     # (Entries are represented as tuples `(lineno, nhits, time)`)
     entries, = profile.get_stats().timings.values()
     assert entries[-2][1] == 10 + 20
+
+
+@pytest.mark.parametrize('force_same_line_numbers', [True, False])
+@pytest.mark.parametrize(
+    'ops',
+    [
+        # Replication of the problematic case in issue #350
+        'func1:prof_all'
+        '-func2:prof_some:prof_all'
+        '-func3:prof_all'
+        '-func4:prof_some:prof_all',
+        # Invert the order of decoration
+        'func1:prof_all'
+        '-func2:prof_all:prof_some'
+        '-func3:prof_all'
+        '-func4:prof_all:prof_some',
+        # More profiler stacks
+        'func1:p1:p2'
+        '-func2:p2:p3'
+        '-func3:p3:p4'
+        '-func4:p4:p1',
+        'func1:p1:p2:p3'
+        '-func2:p2:p3:p4'
+        '-func3:p3:p4:p1'
+        '-func4:p4:p1:p2',
+        'func1:p1:p2:p3'
+        '-func2:p4:p3:p2'
+        '-func3:p3:p4:p1'
+        '-func4:p2:p1:p4',
+        # Misc. edge cases
+        # - Note: while the following results in `func1()` and `func2()`
+        #   sharing the same bytecodes, the profiler `p3` is nonetheless
+        #   able to distinguish between the two (when the functions have
+        #   distinct line numbers), because they are defined on
+        #   different lines and thus hashed to different line hashes
+        'func1:p1:p2'  # `func1()` padded once
+        '-func2:p3'  # `func2()` padded twice
+        '-func1:p4:p3',  # `func1()` padded once (again)
+    ])
+def test_multiple_profilers_identical_bytecode(
+        tmp_path, ops, force_same_line_numbers):
+    """
+    Test that functions compiling down to the same bytecode are
+    correctly handled between multiple profilers.
+
+    Notes
+    -----
+    `ops` should consist of chunks joined by hyphens, where each chunk
+    has the format `<func_id>:<prof_name>[:<prof_name>[...]]`,
+    indicating that the profilers are to be used in order to decorate
+    the specified function.
+    """
+    def check_seen(name, output, func_id, expected):
+        lines = [line for line in output.splitlines()
+                 if line.startswith('Function: ')]
+        if any(func_id in line for line in lines) == expected:
+            return
+        if expected:
+            raise AssertionError(
+                f'profiler `@{name}` didn\'t see `{func_id}()`')
+        raise AssertionError(
+            f'profiler `@{name}` saw `{func_id}()`')
+
+    def check_has_profiling_data(name, output, func_id, expected):
+        assert func_id.startswith('func')
+        nloops = func_id[len('func'):]
+        try:
+            line = next(line for line in output.splitlines()
+                        if line.endswith(f'result.append({nloops})'))
+        except StopIteration:
+            if expected:
+                raise AssertionError(
+                    f'profiler `@{name}` didn\'t see `{func_id}()`')
+            else:
+                return
+        if (line.split()[1] == nloops) == expected:
+            return
+        if expected:
+            raise AssertionError(
+                f'profiler `@{name}` didn\'t get data from `{func_id}()`')
+        raise AssertionError(
+            f'profiler `@{name}` got data from `{func_id}()`')
+
+    if force_same_line_numbers:
+        funcs = {}
+        pattern = textwrap.dedent("""
+        def func{0}():
+            result = []
+            for _ in range({0}):
+                result.append({0})
+            return result
+            """).strip('\n')
+        for i in [1, 2, 3, 4]:
+            tempfile = tmp_path / f'func{i}.py'
+            source = pattern.format(i)
+            tempfile.write_text(source)
+            exec(compile(source, str(tempfile), 'exec'), funcs)
+    else:
+        def func1():
+            result = []
+            for _ in range(1):
+                result.append(1)
+            return result
+
+        def func2():
+            result = []
+            for _ in range(2):
+                result.append(2)
+            return result
+
+        def func3():
+            result = []
+            for _ in range(3):
+                result.append(3)
+            return result
+
+        def func4():
+            result = []
+            for _ in range(4):
+                result.append(4)
+            return result
+
+        funcs = {'func1': func1, 'func2': func2,
+                 'func3': func3, 'func4': func4}
+
+    # Apply the decorators in order
+    all_dec_names = {}
+    all_profs = {}
+    for op in ops.split('-'):
+        func_id, *profs = op.split(':')
+        all_dec_names.setdefault(func_id, set()).update(profs)
+        for name in profs:
+            try:
+                prof = all_profs[name]
+            except KeyError:
+                prof = all_profs[name] = LineProfiler()
+            funcs[func_id] = prof(funcs[func_id])
+    # Call each function once
+    assert funcs['func1']() == [1]
+    assert funcs['func2']() == [2, 2]
+    assert funcs['func3']() == [3, 3, 3]
+    assert funcs['func4']() == [4, 4, 4, 4]
+    # Check the profiling results
+    for name, prof in sorted(all_profs.items()):
+        with io.StringIO() as sio:
+            prof.print_stats(sio, summarize=True)
+            output = sio.getvalue()
+            print(f'@{name}:', textwrap.indent(output, '  '), sep='\n\n')
+        for func_id, decs in all_dec_names.items():
+            profiled = name in decs
+            check_seen(name, output, func_id, profiled)
+            check_has_profiling_data(name, output, func_id, profiled)

From d11880577556cdff36a792abfd8a0601410dfd88 Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Tue, 10 Jun 2025 15:05:36 +0200
Subject: [PATCH 07/11] Fixed bytecode-clash edge case

line_profiler/_line_profiler.pyx::LineProfiler.add_function()
    Now stripping padded code objects and re-padding when appropriate,
    so that no two profiled code objects can end up with the same
    bytecode

tests/test_line_profiler.py
    test_multiple_profilers_identical_bytecode()
        - Updated comments and docstring
        - Added check against duplicate bytecodes
---
 line_profiler/_line_profiler.pyx | 27 +++++++++++++++++---------
 tests/test_line_profiler.py      | 33 ++++++++++++++++++--------------
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
index 28818286..89472e17 100644
--- a/line_profiler/_line_profiler.pyx
+++ b/line_profiler/_line_profiler.pyx
@@ -378,27 +378,36 @@ cdef class LineProfiler:
         co_code: bytes = code.co_code
         code_hashes = []
         if any(co_code):  # Normal Python functions
+            base_co_code: bytes = co_code
+            # Figure out how much padding we need and strip the bytecode
+            # TODO: is there a way to do this faster? `.rstrip()` doesn't
+            # work (reliably) since `NOP_BYTES` should be 2-byte wide
+            npad_code: int = 0
+            nop_len: int = -len(NOP_BYTES)
+            while base_co_code.endswith(NOP_BYTES):
+                base_co_code = base_co_code[:nop_len]
+                npad_code += 1
             try:
-                npad = self._all_paddings[co_code]
-                self._all_paddings[co_code] = npad + 1
+                npad = self._all_paddings[base_co_code]
             except KeyError:
                 npad = 0
-                self._all_paddings[co_code] = 1
+            self._all_paddings[base_co_code] = max(npad, npad_code) + 1
             try:
                 neighbors = self._all_instances_by_funcs[func_id]
                 neighbors.add(self)
             except KeyError:
-                neighbors = self._all_instances_by_funcs[func_id] = WeakSet({self})
+                neighbors = self._all_instances_by_funcs[func_id] = WeakSet(
+                    {self})
             # Maintain `.dupes_map` (legacy)
             try:
-                self.dupes_map[co_code].append(code)
+                self.dupes_map[base_co_code].append(code)
             except KeyError:
-                self.dupes_map[co_code] = [code]
-            if npad:
+                self.dupes_map[base_co_code] = [code]
+            if npad > npad_code:
                 # Code hash already exists, so there must be a duplicate
                 # function (on some instance);
-                # add no-op
-                co_code += NOP_BYTES * npad
+                # (re-)pad with no-op
+                co_code = base_co_code + NOP_BYTES * npad
                 code = _code_replace(func, co_code=co_code)
                 try:
                     func.__code__ = code
diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py
index 6aafbe50..f1a16609 100644
--- a/tests/test_line_profiler.py
+++ b/tests/test_line_profiler.py
@@ -845,14 +845,12 @@ def func(n):
         '-func3:p3:p4:p1'
         '-func4:p2:p1:p4',
         # Misc. edge cases
-        # - Note: while the following results in `func1()` and `func2()`
-        #   sharing the same bytecodes, the profiler `p3` is nonetheless
-        #   able to distinguish between the two (when the functions have
-        #   distinct line numbers), because they are defined on
-        #   different lines and thus hashed to different line hashes
-        'func1:p1:p2'  # `func1()` padded once
-        '-func2:p3'  # `func2()` padded twice
-        '-func1:p4:p3',  # `func1()` padded once (again)
+        # - Naive padding of the following case would cause `func1()`
+        #   and `func2()` to end up with the same bytecode, so guard
+        #   against it
+        'func1:p1:p2'  # `func1()` padded once?
+        '-func2:p3'  # `func2()` padded twice?
+        '-func1:p4:p3',  # `func1()` padded once (again)?
     ])
 def test_multiple_profilers_identical_bytecode(
         tmp_path, ops, force_same_line_numbers):
@@ -862,10 +860,12 @@ def test_multiple_profilers_identical_bytecode(
 
     Notes
     -----
-    `ops` should consist of chunks joined by hyphens, where each chunk
-    has the format `<func_id>:<prof_name>[:<prof_name>[...]]`,
-    indicating that the profilers are to be used in order to decorate
-    the specified function.
+    - `ops` should consist of chunks joined by hyphens, where each chunk
+      has the format `<func_id>:<prof_name>[:<prof_name>[...]]`,
+      indicating that the profilers are to be used in order to decorate
+      the specified function.
+    - `force_same_line_numbers` is used to coerce all functions to
+      compile down to code objects with the same line numbers.
     """
     def check_seen(name, output, func_id, expected):
         lines = [line for line in output.splitlines()
@@ -941,11 +941,11 @@ def func4():
                  'func3': func3, 'func4': func4}
 
     # Apply the decorators in order
-    all_dec_names = {}
+    all_dec_names = {f'func{i}': set() for i in [1, 2, 3, 4]}
     all_profs = {}
     for op in ops.split('-'):
         func_id, *profs = op.split(':')
-        all_dec_names.setdefault(func_id, set()).update(profs)
+        all_dec_names[func_id].update(profs)
         for name in profs:
             try:
                 prof = all_profs[name]
@@ -957,6 +957,11 @@ def func4():
     assert funcs['func2']() == [2, 2]
     assert funcs['func3']() == [3, 3, 3]
     assert funcs['func4']() == [4, 4, 4, 4]
+    # Check that the bytecodes of the profiled functions are distinct
+    profiled_funcs = {funcs[name].__line_profiler_id__.func
+                      for name, decs in all_dec_names.items() if decs}
+    assert len({func.__code__.co_code
+                for func in profiled_funcs}) == len(profiled_funcs)
     # Check the profiling results
     for name, prof in sorted(all_profs.items()):
         with io.StringIO() as sio:

From 654121f24a25b91a00d50dc5d674a65ac13d3677 Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Tue, 10 Jun 2025 17:12:24 +0200
Subject: [PATCH 08/11] Test for profiling data preservation

tests/test_line_profiler.py
    get_prof_stats()
        New helper function for formatting and retrieving
        `LineProfiler.print_stats()` output
    test_*_decorator()
    test_profile_generated_code()
    test_multiple_profilers_identical_bytecode()
        Refactored to use `get_prof_stats()`
    test_aggregate_profiling_data_between_code_versions()
        New test checking that profiling data gathered before and after
        a function's code object is rewritten are correctly aggregated
---
 tests/test_line_profiler.py | 89 ++++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 37 deletions(-)

diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py
index f1a16609..79e3572f 100644
--- a/tests/test_line_profiler.py
+++ b/tests/test_line_profiler.py
@@ -41,6 +41,14 @@ def strip(s):
     return textwrap.dedent(s).strip('\n')
 
 
+def get_prof_stats(prof, name='prof', **kwargs):
+    with io.StringIO() as sio:
+        prof.print_stats(sio, **kwargs)
+        output = sio.getvalue()
+        print(f'@{name}:', textwrap.indent(output, '  '), sep='\n\n')
+    return output
+
+
 class check_timings:
     """
     Verify that the profiler starts without timing data and ends with
@@ -252,10 +260,7 @@ def foo(cls) -> str:
     assert profile.enable_count == 0
     assert len(profile.functions) == 1
     assert Object.foo() == Object().foo() == 'ObjectObject'
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
-    print(output)
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `Object.foo()`
     assert output.endswith('foo')
     line, = (line for line in output.splitlines() if line.endswith('* 2'))
@@ -286,10 +291,7 @@ def foo(x: int) -> int:
     assert profile.enable_count == 0
     assert len(profile.functions) == 1
     assert Object.foo(3) == Object().foo(3) == 6
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
-    print(output)
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `Object.foo()`
     assert output.endswith('foo')
     line, = (line for line in output.splitlines() if line.endswith('* 2'))
@@ -327,10 +329,7 @@ def foo(self, x: int) -> int:
             == profiled_foo_2(2)
             == obj.foo(2)
             == id(obj) * 2)
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
-    print(output)
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `Object.foo()`
     assert output.endswith('foo')
     line, = (line for line in output.splitlines() if line.endswith('* x'))
@@ -363,10 +362,7 @@ def foo(self, x: int) -> int:
     assert profile.enable_count == 0
     assert profile.functions == [Object.foo]
     assert obj.foo(1) == obj.bar() == id(obj)
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
-    print(output)
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `Object.foo()` (via `.bar()`)
     assert output.endswith('foo')
     line, = (line for line in output.splitlines() if line.endswith('* x'))
@@ -403,10 +399,7 @@ def foo(x: int, y: int) -> int:
             == bar(3)
             == foo(2, 3)
             == 5)
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
-    print(output)
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `foo()`
     assert output.endswith('foo')
     line, = (line for line in output.splitlines() if line.endswith('x + y'))
@@ -452,10 +445,7 @@ def foo(self, foo) -> None:
     obj.foo = 10  # Use setter
     assert obj.x == 5
     assert obj.foo == 10  # Use getter
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
-    print(output)
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `Object.foo`
     assert output.endswith('foo')
     getter_line, = (line for line in output.splitlines()
@@ -495,9 +485,7 @@ def foo(self) -> int:
     obj = Object(3)
     assert obj.foo == 6  # Use getter
     assert obj.foo == 6  # Getter not called because it's cached
-    with io.StringIO() as sio:
-        profile.print_stats(stream=sio, summarize=True)
-        output = strip(sio.getvalue())
+    output = strip(get_prof_stats(profile, name='profile', summarize=True))
     # Check that we have profiled `Object.foo`
     assert output.endswith('foo')
     line, = (line for line in output.splitlines() if line.endswith('* 2'))
@@ -713,10 +701,7 @@ def test_profile_generated_code():
     profiled_fn = profiler(fn)
     profiled_fn()
 
-    import io
-    s = io.StringIO()
-    profiler.print_stats(stream=s)
-    output = s.getvalue()
+    output = get_prof_stats(profiler, 'profiler')
 
     # Check that the output contains the generated code's source lines
     for line in code_lines:
@@ -900,13 +885,13 @@ def check_has_profiling_data(name, output, func_id, expected):
 
     if force_same_line_numbers:
         funcs = {}
-        pattern = textwrap.dedent("""
+        pattern = strip("""
         def func{0}():
             result = []
             for _ in range({0}):
                 result.append({0})
             return result
-            """).strip('\n')
+            """)
         for i in [1, 2, 3, 4]:
             tempfile = tmp_path / f'func{i}.py'
             source = pattern.format(i)
@@ -964,11 +949,41 @@ def func4():
                 for func in profiled_funcs}) == len(profiled_funcs)
     # Check the profiling results
     for name, prof in sorted(all_profs.items()):
-        with io.StringIO() as sio:
-            prof.print_stats(sio, summarize=True)
-            output = sio.getvalue()
-            print(f'@{name}:', textwrap.indent(output, '  '), sep='\n\n')
+        output = get_prof_stats(prof, name=name, summarize=True)
         for func_id, decs in all_dec_names.items():
             profiled = name in decs
             check_seen(name, output, func_id, profiled)
             check_has_profiling_data(name, output, func_id, profiled)
+
+
+def test_aggregate_profiling_data_between_code_versions():
+    """
+    Test that profiling data from previous versions of the code object
+    are preserved when another profiler causes the code object of a
+    function to be overwritten.
+    """
+    def func(n):
+        x = 0
+        for n in range(1, n + 1):
+            x += n
+        return x
+
+    prof1 = LineProfiler()
+    prof2 = LineProfiler()
+
+    # Gather data with `@prof1`
+    wrapper1 = prof1(func)
+    assert wrapper1(10) == 10 * 11 // 2
+    code = func.__code__
+    # Gather data with `@prof2`; the code object is overwritten here
+    wrapper2 = prof2(wrapper1)
+    assert func.__code__ != code
+    assert wrapper2(15) == 15 * 16 // 2
+    # Despite the overwrite of the code object, the old data should
+    # still remain, and be aggregated with the new data when calling
+    # `prof1.get_stats()`
+    for prof, name, count in (prof1, 'prof1', 25), (prof2, 'prof2', 15):
+        result = get_prof_stats(prof, name)
+        loop_body = next(line for line in result.splitlines()
+                         if line.endswith('x += n'))
+        assert loop_body.split()[1] == str(count)

From af098a26d41ba2934ced228c6862198c15baf966 Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Tue, 10 Jun 2025 17:43:02 +0200
Subject: [PATCH 09/11] Minor optimization

---
 line_profiler/_line_profiler.pyx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
index 89472e17..f23c929c 100644
--- a/line_profiler/_line_profiler.pyx
+++ b/line_profiler/_line_profiler.pyx
@@ -413,6 +413,8 @@ cdef class LineProfiler:
                     func.__code__ = code
                 except AttributeError as e:
                     func.__func__.__code__ = code
+            else:  # No re-padding -> no need to update the neighbors
+                neighbors = {self}
             # TODO: Since each line can be many bytecodes, this is kinda
             # inefficient
             # See if this can be sped up by not needing to iterate over
@@ -446,7 +448,8 @@ cdef class LineProfiler:
             code = code.replace(co_filename=cython_source)
             neighbors = {self}
         # Update `._c_code_map` and `.code_hash_map` with the new line
-        # hashes on `self` and other instances profiling the same func
+        # hashes on `self` (and other instances profiling the same
+        # function if we padded the bytecode)
         for instance in neighbors:
             prof = <LineProfiler>instance
             try:

From 18c4475beba9301519f31a70b46ac245bbe67041 Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Wed, 9 Jul 2025 04:48:06 +0200
Subject: [PATCH 10/11] Added subtest for passing func to the same prof twice

---
 tests/test_line_profiler.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py
index 79e3572f..166f05b9 100644
--- a/tests/test_line_profiler.py
+++ b/tests/test_line_profiler.py
@@ -836,6 +836,12 @@ def func(n):
         'func1:p1:p2'  # `func1()` padded once?
         '-func2:p3'  # `func2()` padded twice?
         '-func1:p4:p3',  # `func1()` padded once (again)?
+        # - Check that double decoration doesn't mess things up
+        'func1:p1:p2'
+        '-func2:p2:p3'
+        '-func3:p3:p4'
+        '-func4:p4:p1'
+        '-func1:p1',  # Now we're passing `func1()` to `p1` twice
     ])
 def test_multiple_profilers_identical_bytecode(
         tmp_path, ops, force_same_line_numbers):

From 90cd65ecfebfad1ce21cd1b36506fbd24811bd4a Mon Sep 17 00:00:00 2001
From: "Terence S.-C. Tsang" <t.tsang.sci@gmail.com>
Date: Wed, 9 Jul 2025 05:20:34 +0200
Subject: [PATCH 11/11] Cython code readability cleanup

---
 line_profiler/_line_profiler.pyx | 48 +++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
index f23c929c..f0401160 100644
--- a/line_profiler/_line_profiler.pyx
+++ b/line_profiler/_line_profiler.pyx
@@ -33,7 +33,8 @@ NOP_VALUE: int = opcode.opmap['NOP']
 # The Op code should be 2 bytes as stated in
 # https://docs.python.org/3/library/dis.html
 # if sys.version_info[0:2] >= (3, 11):
-NOP_BYTES: bytes = NOP_VALUE.to_bytes(2, byteorder=byteorder)
+NOP_BYTES_LEN: int = 2
+NOP_BYTES: bytes = NOP_VALUE.to_bytes(NOP_BYTES_LEN, byteorder=byteorder)
 
 # This should be true for Python >=3.11a1
 HAS_CO_QUALNAME: bool = hasattr(types.CodeType, 'co_qualname')
@@ -151,6 +152,24 @@ cdef inline int64 compute_line_hash(uint64 block_hash, uint64 linenum):
     return block_hash ^ linenum
 
 
+cdef inline object multibyte_rstrip(bytes bytecode):
+    """
+    Returns:
+        result (tuple[bytes, int])
+        - First item is the bare unpadded bytecode
+        - Second item is the number of :py:const:`NOP_BYTES`
+          ``bytecode`` has been padded with
+    """
+    npad: int = 0
+    nop_len: int = -NOP_BYTES_LEN
+    nop_bytes: bytes = NOP_BYTES
+    unpadded: bytes = bytecode
+    while unpadded.endswith(nop_bytes):
+        unpadded = unpadded[:nop_len]
+        npad += 1
+    return (unpadded, npad)
+
+
 if CAN_USE_SYS_MONITORING:
     def _is_main_thread() -> bool:
         return threading.current_thread() == threading.main_thread()
@@ -378,26 +397,21 @@ cdef class LineProfiler:
         co_code: bytes = code.co_code
         code_hashes = []
         if any(co_code):  # Normal Python functions
-            base_co_code: bytes = co_code
             # Figure out how much padding we need and strip the bytecode
-            # TODO: is there a way to do this faster? `.rstrip()` doesn't
-            # work (reliably) since `NOP_BYTES` should be 2-byte wide
-            npad_code: int = 0
-            nop_len: int = -len(NOP_BYTES)
-            while base_co_code.endswith(NOP_BYTES):
-                base_co_code = base_co_code[:nop_len]
-                npad_code += 1
+            base_co_code: bytes
+            npad_code: int
+            base_co_code, npad_code = multibyte_rstrip(co_code)
             try:
                 npad = self._all_paddings[base_co_code]
             except KeyError:
                 npad = 0
             self._all_paddings[base_co_code] = max(npad, npad_code) + 1
             try:
-                neighbors = self._all_instances_by_funcs[func_id]
-                neighbors.add(self)
+                profilers_to_update = self._all_instances_by_funcs[func_id]
+                profilers_to_update.add(self)
             except KeyError:
-                neighbors = self._all_instances_by_funcs[func_id] = WeakSet(
-                    {self})
+                profilers_to_update = WeakSet({self})
+                self._all_instances_by_funcs[func_id] = profilers_to_update
             # Maintain `.dupes_map` (legacy)
             try:
                 self.dupes_map[base_co_code].append(code)
@@ -413,8 +427,8 @@ cdef class LineProfiler:
                     func.__code__ = code
                 except AttributeError as e:
                     func.__func__.__code__ = code
-            else:  # No re-padding -> no need to update the neighbors
-                neighbors = {self}
+            else:  # No re-padding -> no need to update the other profs
+                profilers_to_update = {self}
             # TODO: Since each line can be many bytecodes, this is kinda
             # inefficient
             # See if this can be sped up by not needing to iterate over
@@ -446,11 +460,11 @@ cdef class LineProfiler:
             # We can't replace the code object on Cython functions, but
             # we can *store* a copy with the correct metadata
             code = code.replace(co_filename=cython_source)
-            neighbors = {self}
+            profilers_to_update = {self}
         # Update `._c_code_map` and `.code_hash_map` with the new line
         # hashes on `self` (and other instances profiling the same
         # function if we padded the bytecode)
-        for instance in neighbors:
+        for instance in profilers_to_update:
             prof = <LineProfiler>instance
             try:
                 line_hashes = prof.code_hash_map[code]