diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2abb86ed..effad2e6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -25,6 +25,7 @@ Changes like class methods and properties * ``LineProfiler`` can now be used as a class decorator * FIX: Fixed line tracing for Cython code; superseded use of the legacy tracing system with ``sys.monitoring`` +* ENH: Fixed edge case where :py:meth:`LineProfiler.get_stats()` neglects data from duplicate code objects (#348) 4.2.0 ~~~~~ diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx index 15510317..f0401160 100644 --- a/line_profiler/_line_profiler.pyx +++ b/line_profiler/_line_profiler.pyx @@ -26,13 +26,15 @@ import threading import opcode import os import types +from weakref import WeakSet NOP_VALUE: int = opcode.opmap['NOP'] # The Op code should be 2 bytes as stated in # https://docs.python.org/3/library/dis.html # if sys.version_info[0:2] >= (3, 11): -NOP_BYTES: bytes = NOP_VALUE.to_bytes(2, byteorder=byteorder) +NOP_BYTES_LEN: int = 2 +NOP_BYTES: bytes = NOP_VALUE.to_bytes(NOP_BYTES_LEN, byteorder=byteorder) # This should be true for Python >=3.11a1 HAS_CO_QUALNAME: bool = hasattr(types.CodeType, 'co_qualname') @@ -150,6 +152,24 @@ cdef inline int64 compute_line_hash(uint64 block_hash, uint64 linenum): return block_hash ^ linenum +cdef inline object multibyte_rstrip(bytes bytecode): + """ + Returns: + result (tuple[bytes, int]) + - First item is the bare unpadded bytecode + - Second item is the number of :py:const:`NOP_BYTES` + ``bytecode`` has been padded with + """ + npad: int = 0 + nop_len: int = -NOP_BYTES_LEN + nop_bytes: bytes = NOP_BYTES + unpadded: bytes = bytecode + while unpadded.endswith(nop_bytes): + unpadded = unpadded[:nop_len] + npad += 1 + return (unpadded, npad) + + if CAN_USE_SYS_MONITORING: def _is_main_thread() -> bool: return threading.current_thread() == threading.main_thread() @@ -318,8 +338,13 @@ cdef class LineProfiler: cdef public double timer_unit cdef public object threaddata - # This is shared between instances and threads + # These are shared between instances and threads + # type: dict[int, set[LineProfiler]], int = thread id _all_active_instances = {} + # type: dict[bytes, int], bytes = bytecode + _all_paddings = {} + # type: dict[int, weakref.WeakSet[LineProfiler]], int = func id + _all_instances_by_funcs = {} def __init__(self, *functions): self.functions = [] @@ -345,39 +370,74 @@ cdef class LineProfiler: ) try: code = func.__code__ + func_id = id(func) except AttributeError: try: code = func.__func__.__code__ + func_id = id(func.__func__) except AttributeError: import warnings warnings.warn("Could not extract a code object for the object %r" % (func,)) return + # Note: if we are to alter the code object, other profilers + # which previously added this function would still expect the + # old bytecode, and thus will not see anything when the function + # is executed; + # hence: + # - When doing bytecode padding, take into account all instances + # which refers to the same base bytecode to ensure + # disambiguation + # - Update all existing instances referring to the old code + # object + # Since no code padding is/can be done with Cython mock + # "code objects", it is *probably* okay to only do the special + # handling on the non-Cython branch. + # XXX: tests for the above assertion if necessary + co_code: bytes = code.co_code code_hashes = [] - if any(code.co_code): # Normal Python functions - if code.co_code in self.dupes_map: - self.dupes_map[code.co_code] += [code] - # code hash already exists, so there must be a duplicate - # function. add no-op - co_padding : bytes = NOP_BYTES * (len(self.dupes_map[code.co_code]) + 1) - co_code = code.co_code + co_padding - CodeType = type(code) + if any(co_code): # Normal Python functions + # Figure out how much padding we need and strip the bytecode + base_co_code: bytes + npad_code: int + base_co_code, npad_code = multibyte_rstrip(co_code) + try: + npad = self._all_paddings[base_co_code] + except KeyError: + npad = 0 + self._all_paddings[base_co_code] = max(npad, npad_code) + 1 + try: + profilers_to_update = self._all_instances_by_funcs[func_id] + profilers_to_update.add(self) + except KeyError: + profilers_to_update = WeakSet({self}) + self._all_instances_by_funcs[func_id] = profilers_to_update + # Maintain `.dupes_map` (legacy) + try: + self.dupes_map[base_co_code].append(code) + except KeyError: + self.dupes_map[base_co_code] = [code] + if npad > npad_code: + # Code hash already exists, so there must be a duplicate + # function (on some instance); + # (re-)pad with no-op + co_code = base_co_code + NOP_BYTES * npad code = _code_replace(func, co_code=co_code) try: func.__code__ = code except AttributeError as e: func.__func__.__code__ = code - else: - self.dupes_map[code.co_code] = [code] + else: # No re-padding -> no need to update the other profs + profilers_to_update = {self} # TODO: Since each line can be many bytecodes, this is kinda # inefficient # See if this can be sped up by not needing to iterate over # every byte - for offset, _ in enumerate(code.co_code): - code_hash = compute_line_hash( - hash((code.co_code)), - PyCode_Addr2Line(code, offset)) - code_hashes.append(code_hash) + for offset, _ in enumerate(co_code): + code_hashes.append( + compute_line_hash( + hash(co_code), + PyCode_Addr2Line(code, offset))) else: # Cython functions have empty/zero bytecodes if CANNOT_LINE_TRACE_CYTHON: return @@ -400,13 +460,21 @@ cdef class LineProfiler: # We can't replace the code object on Cython functions, but # we can *store* a copy with the correct metadata code = code.replace(co_filename=cython_source) - for code_hash in code_hashes: - if not self._c_code_map.count(code_hash): - try: - self.code_hash_map[code].append(code_hash) - except KeyError: - self.code_hash_map[code] = [code_hash] - self._c_code_map[code_hash] + profilers_to_update = {self} + # Update `._c_code_map` and `.code_hash_map` with the new line + # hashes on `self` (and other instances profiling the same + # function if we padded the bytecode) + for instance in profilers_to_update: + prof = instance + try: + line_hashes = prof.code_hash_map[code] + except KeyError: + line_hashes = prof.code_hash_map[code] = [] + for code_hash in code_hashes: + line_hash = code_hash + if not prof._c_code_map.count(line_hash): + line_hashes.append(line_hash) + prof._c_code_map[line_hash] self.functions.append(func) @@ -530,35 +598,29 @@ cdef class LineProfiler: """ cdef dict cmap = self._c_code_map - stats = {} + all_entries = {} for code in self.code_hash_map: entries = [] for entry in self.code_hash_map[code]: - entries += list(cmap[entry].values()) + entries.extend(cmap[entry].values()) key = label(code) - # Merge duplicate line numbers, which occur for branch entrypoints like `if` - nhits_by_lineno = {} - total_time_by_lineno = {} + # Merge duplicate line numbers, which occur for branch + # entrypoints like `if` + entries_by_lineno = all_entries.setdefault(key, {}) for line_dict in entries: _, lineno, total_time, nhits = line_dict.values() - nhits_by_lineno[lineno] = nhits_by_lineno.setdefault(lineno, 0) + nhits - total_time_by_lineno[lineno] = total_time_by_lineno.setdefault(lineno, 0) + total_time - - entries = [(lineno, nhits, total_time_by_lineno[lineno]) for lineno, nhits in nhits_by_lineno.items()] - entries.sort() - - # NOTE: v4.x may produce more than one entry per line. For example: - # 1: for x in range(10): - # 2: pass - # will produce a 1-hit entry on line 1, and 10-hit entries on lines 1 and 2 - # This doesn't affect `print_stats`, because it uses the last entry for a given line (line number is - # used a dict key so earlier entries are overwritten), but to keep compatability with other tools, - # let's only keep the last entry for each line - # Remove all but the last entry for each line - entries = list({e[0]: e for e in entries}.values()) - stats[key] = entries + orig_nhits, orig_total_time = entries_by_lineno.get( + lineno, (0, 0)) + entries_by_lineno[lineno] = (orig_nhits + nhits, + orig_total_time + total_time) + + # Aggregate the timing data + stats = { + key: sorted((line, nhits, time) + for line, (nhits, time) in entries_by_lineno.items()) + for key, entries_by_lineno in all_entries.items()} return LineStats(stats, self.timer_unit) diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py index 954c77b1..166f05b9 100644 --- a/tests/test_line_profiler.py +++ b/tests/test_line_profiler.py @@ -41,6 +41,14 @@ def strip(s): return textwrap.dedent(s).strip('\n') +def get_prof_stats(prof, name='prof', **kwargs): + with io.StringIO() as sio: + prof.print_stats(sio, **kwargs) + output = sio.getvalue() + print(f'@{name}:', textwrap.indent(output, ' '), sep='\n\n') + return output + + class check_timings: """ Verify that the profiler starts without timing data and ends with @@ -252,10 +260,7 @@ def foo(cls) -> str: assert profile.enable_count == 0 assert len(profile.functions) == 1 assert Object.foo() == Object().foo() == 'ObjectObject' - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) - print(output) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `Object.foo()` assert output.endswith('foo') line, = (line for line in output.splitlines() if line.endswith('* 2')) @@ -286,10 +291,7 @@ def foo(x: int) -> int: assert profile.enable_count == 0 assert len(profile.functions) == 1 assert Object.foo(3) == Object().foo(3) == 6 - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) - print(output) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `Object.foo()` assert output.endswith('foo') line, = (line for line in output.splitlines() if line.endswith('* 2')) @@ -327,10 +329,7 @@ def foo(self, x: int) -> int: == profiled_foo_2(2) == obj.foo(2) == id(obj) * 2) - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) - print(output) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `Object.foo()` assert output.endswith('foo') line, = (line for line in output.splitlines() if line.endswith('* x')) @@ -363,10 +362,7 @@ def foo(self, x: int) -> int: assert profile.enable_count == 0 assert profile.functions == [Object.foo] assert obj.foo(1) == obj.bar() == id(obj) - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) - print(output) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `Object.foo()` (via `.bar()`) assert output.endswith('foo') line, = (line for line in output.splitlines() if line.endswith('* x')) @@ -403,10 +399,7 @@ def foo(x: int, y: int) -> int: == bar(3) == foo(2, 3) == 5) - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) - print(output) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `foo()` assert output.endswith('foo') line, = (line for line in output.splitlines() if line.endswith('x + y')) @@ -452,10 +445,7 @@ def foo(self, foo) -> None: obj.foo = 10 # Use setter assert obj.x == 5 assert obj.foo == 10 # Use getter - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) - print(output) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `Object.foo` assert output.endswith('foo') getter_line, = (line for line in output.splitlines() @@ -495,9 +485,7 @@ def foo(self) -> int: obj = Object(3) assert obj.foo == 6 # Use getter assert obj.foo == 6 # Getter not called because it's cached - with io.StringIO() as sio: - profile.print_stats(stream=sio, summarize=True) - output = strip(sio.getvalue()) + output = strip(get_prof_stats(profile, name='profile', summarize=True)) # Check that we have profiled `Object.foo` assert output.endswith('foo') line, = (line for line in output.splitlines() if line.endswith('* 2')) @@ -713,10 +701,7 @@ def test_profile_generated_code(): profiled_fn = profiler(fn) profiled_fn() - import io - s = io.StringIO() - profiler.print_stats(stream=s) - output = s.getvalue() + output = get_prof_stats(profiler, 'profiler') # Check that the output contains the generated code's source lines for line in code_lines: @@ -781,3 +766,230 @@ def sum_n_cb(n): assert t2['sum_n'][2][1] == n assert t1['sum_n_sq'][2][1] == n assert t2['sum_n_cb'][2][1] == n + + +def test_duplicate_code_objects(): + """ + Test that results are correctly aggregated between duplicate code + objects. + """ + code = textwrap.dedent(""" + @profile + def func(n): + x = 0 + for n in range(1, n + 1): + x += n + return x + """).strip('\n') + profile = LineProfiler() + # Create and call the function once + namespace_1 = {'profile': profile} + exec(code, namespace_1) + assert 'func' in namespace_1 + assert len(profile.functions) == 1 + assert namespace_1['func'].__wrapped__ in profile.functions + assert namespace_1['func'](10) == 10 * 11 // 2 + # Do it again + namespace_2 = {'profile': profile} + exec(code, namespace_2) + assert 'func' in namespace_2 + assert len(profile.functions) == 2 + assert namespace_2['func'].__wrapped__ in profile.functions + assert namespace_2['func'](20) == 20 * 21 // 2 + # Check that data from both calls are aggregated + # (Entries are represented as tuples `(lineno, nhits, time)`) + entries, = profile.get_stats().timings.values() + assert entries[-2][1] == 10 + 20 + + +@pytest.mark.parametrize('force_same_line_numbers', [True, False]) +@pytest.mark.parametrize( + 'ops', + [ + # Replication of the problematic case in issue #350 + 'func1:prof_all' + '-func2:prof_some:prof_all' + '-func3:prof_all' + '-func4:prof_some:prof_all', + # Invert the order of decoration + 'func1:prof_all' + '-func2:prof_all:prof_some' + '-func3:prof_all' + '-func4:prof_all:prof_some', + # More profiler stacks + 'func1:p1:p2' + '-func2:p2:p3' + '-func3:p3:p4' + '-func4:p4:p1', + 'func1:p1:p2:p3' + '-func2:p2:p3:p4' + '-func3:p3:p4:p1' + '-func4:p4:p1:p2', + 'func1:p1:p2:p3' + '-func2:p4:p3:p2' + '-func3:p3:p4:p1' + '-func4:p2:p1:p4', + # Misc. edge cases + # - Naive padding of the following case would cause `func1()` + # and `func2()` to end up with the same bytecode, so guard + # against it + 'func1:p1:p2' # `func1()` padded once? + '-func2:p3' # `func2()` padded twice? + '-func1:p4:p3', # `func1()` padded once (again)? + # - Check that double decoration doesn't mess things up + 'func1:p1:p2' + '-func2:p2:p3' + '-func3:p3:p4' + '-func4:p4:p1' + '-func1:p1', # Now we're passing `func1()` to `p1` twice + ]) +def test_multiple_profilers_identical_bytecode( + tmp_path, ops, force_same_line_numbers): + """ + Test that functions compiling down to the same bytecode are + correctly handled between multiple profilers. + + Notes + ----- + - `ops` should consist of chunks joined by hyphens, where each chunk + has the format `:[:[...]]`, + indicating that the profilers are to be used in order to decorate + the specified function. + - `force_same_line_numbers` is used to coerce all functions to + compile down to code objects with the same line numbers. + """ + def check_seen(name, output, func_id, expected): + lines = [line for line in output.splitlines() + if line.startswith('Function: ')] + if any(func_id in line for line in lines) == expected: + return + if expected: + raise AssertionError( + f'profiler `@{name}` didn\'t see `{func_id}()`') + raise AssertionError( + f'profiler `@{name}` saw `{func_id}()`') + + def check_has_profiling_data(name, output, func_id, expected): + assert func_id.startswith('func') + nloops = func_id[len('func'):] + try: + line = next(line for line in output.splitlines() + if line.endswith(f'result.append({nloops})')) + except StopIteration: + if expected: + raise AssertionError( + f'profiler `@{name}` didn\'t see `{func_id}()`') + else: + return + if (line.split()[1] == nloops) == expected: + return + if expected: + raise AssertionError( + f'profiler `@{name}` didn\'t get data from `{func_id}()`') + raise AssertionError( + f'profiler `@{name}` got data from `{func_id}()`') + + if force_same_line_numbers: + funcs = {} + pattern = strip(""" + def func{0}(): + result = [] + for _ in range({0}): + result.append({0}) + return result + """) + for i in [1, 2, 3, 4]: + tempfile = tmp_path / f'func{i}.py' + source = pattern.format(i) + tempfile.write_text(source) + exec(compile(source, str(tempfile), 'exec'), funcs) + else: + def func1(): + result = [] + for _ in range(1): + result.append(1) + return result + + def func2(): + result = [] + for _ in range(2): + result.append(2) + return result + + def func3(): + result = [] + for _ in range(3): + result.append(3) + return result + + def func4(): + result = [] + for _ in range(4): + result.append(4) + return result + + funcs = {'func1': func1, 'func2': func2, + 'func3': func3, 'func4': func4} + + # Apply the decorators in order + all_dec_names = {f'func{i}': set() for i in [1, 2, 3, 4]} + all_profs = {} + for op in ops.split('-'): + func_id, *profs = op.split(':') + all_dec_names[func_id].update(profs) + for name in profs: + try: + prof = all_profs[name] + except KeyError: + prof = all_profs[name] = LineProfiler() + funcs[func_id] = prof(funcs[func_id]) + # Call each function once + assert funcs['func1']() == [1] + assert funcs['func2']() == [2, 2] + assert funcs['func3']() == [3, 3, 3] + assert funcs['func4']() == [4, 4, 4, 4] + # Check that the bytecodes of the profiled functions are distinct + profiled_funcs = {funcs[name].__line_profiler_id__.func + for name, decs in all_dec_names.items() if decs} + assert len({func.__code__.co_code + for func in profiled_funcs}) == len(profiled_funcs) + # Check the profiling results + for name, prof in sorted(all_profs.items()): + output = get_prof_stats(prof, name=name, summarize=True) + for func_id, decs in all_dec_names.items(): + profiled = name in decs + check_seen(name, output, func_id, profiled) + check_has_profiling_data(name, output, func_id, profiled) + + +def test_aggregate_profiling_data_between_code_versions(): + """ + Test that profiling data from previous versions of the code object + are preserved when another profiler causes the code object of a + function to be overwritten. + """ + def func(n): + x = 0 + for n in range(1, n + 1): + x += n + return x + + prof1 = LineProfiler() + prof2 = LineProfiler() + + # Gather data with `@prof1` + wrapper1 = prof1(func) + assert wrapper1(10) == 10 * 11 // 2 + code = func.__code__ + # Gather data with `@prof2`; the code object is overwritten here + wrapper2 = prof2(wrapper1) + assert func.__code__ != code + assert wrapper2(15) == 15 * 16 // 2 + # Despite the overwrite of the code object, the old data should + # still remain, and be aggregated with the new data when calling + # `prof1.get_stats()` + for prof, name, count in (prof1, 'prof1', 25), (prof2, 'prof2', 15): + result = get_prof_stats(prof, name) + loop_body = next(line for line in result.splitlines() + if line.endswith('x += n')) + assert loop_body.split()[1] == str(count)