From e7e411345c213a7599f0e86fbd593b894a823d0b Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 16:17:39 +0100 Subject: [PATCH 01/11] ignore .venv --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c9dfbba..3025943 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__ dist uv.lock .benchmarks/ +.venv From 3a3688f42445d9776798537c46734027230d3510 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 17:06:54 +0100 Subject: [PATCH 02/11] expand bench --- benchmarks/benchmark.py | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index 2cca2e3..7884247 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -19,6 +19,7 @@ import pytest from patchdiff import apply, diff +from patchdiff.pointer import Pointer # Set seed for reproducibility random.seed(42) @@ -139,6 +140,27 @@ def test_dict_diff_nested(benchmark): benchmark(diff, a, b) +# ======================================== +# Set Diff Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="set-diff") +def test_set_diff_1000_elements(benchmark): + """Benchmark: Sets with 1000 elements, 10% difference.""" + a = set(generate_random_list(1000, 2000)) + b = a.copy() + # Remove 5% + a_list = list(a) + for i in range(50): + a.remove(a_list[i]) + # Add 5% + for i in range(50): + b.add(2000 + i) + + benchmark(diff, a, b) + + # ======================================== # Mixed Structure Benchmarks # ======================================== @@ -164,3 +186,35 @@ def test_apply_list_1000_elements(benchmark): ops, _ = diff(a, b) benchmark(apply, a, ops) + + + +# ======================================== +# Pointer Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="pointer") +def test_pointer_evaluate_deeply_nested(benchmark): + """Benchmark: Evaluate pointer on deeply nested structure.""" + obj = generate_nested_dict(5, 5) + ptr = Pointer.from_str("/key_0/key_1/key_2/key_3/key_4") + + benchmark(ptr.evaluate, obj) + + +@pytest.mark.benchmark(group="pointer") +def test_pointer_evaluate_large_list(benchmark): + """Benchmark: Evaluate pointer on large list.""" + obj = generate_random_list(10000) + ptr = Pointer.from_str("/5000") + + benchmark(ptr.evaluate, obj) + + +@pytest.mark.benchmark(group="pointer") +def test_pointer_append(benchmark): + """Benchmark: Append token to pointer.""" + ptr = Pointer.from_str("/a/b/c/d/e/f/g/h/i/j") + + benchmark(ptr.append, "k") From 11a7f050fd0ecafeadcac03a0eaa0280eba78892 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 17:13:28 +0100 Subject: [PATCH 03/11] micro optimization --- benchmarks/benchmark.py | 16 +++++++++--- patchdiff/apply.py | 2 ++ patchdiff/diff.py | 57 +++++++++++++++++++++++------------------ patchdiff/pointer.py | 16 ++++++------ 4 files changed, 55 insertions(+), 36 deletions(-) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index 7884247..f9e0d30 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -197,7 +197,17 @@ def test_apply_list_1000_elements(benchmark): @pytest.mark.benchmark(group="pointer") def test_pointer_evaluate_deeply_nested(benchmark): """Benchmark: Evaluate pointer on deeply nested structure.""" - obj = generate_nested_dict(5, 5) + obj = { + "key_0": { + "key_1": { + "key_2": { + "key_3": { + "key_4": 42 + } + } + } + } + } ptr = Pointer.from_str("/key_0/key_1/key_2/key_3/key_4") benchmark(ptr.evaluate, obj) @@ -206,8 +216,8 @@ def test_pointer_evaluate_deeply_nested(benchmark): @pytest.mark.benchmark(group="pointer") def test_pointer_evaluate_large_list(benchmark): """Benchmark: Evaluate pointer on large list.""" - obj = generate_random_list(10000) - ptr = Pointer.from_str("/5000") + obj = [i for i in range(10000)] + ptr = Pointer([5000]) benchmark(ptr.evaluate, obj) diff --git a/patchdiff/apply.py b/patchdiff/apply.py index 7d5d3cd..edc8b0a 100644 --- a/patchdiff/apply.py +++ b/patchdiff/apply.py @@ -6,6 +6,8 @@ def iapply(obj: Diffable, patches: List[Dict]) -> Diffable: """Apply a set of patches to an object, in-place""" + if not patches: + return obj for patch in patches: ptr = patch["path"] op = patch["op"] diff --git a/patchdiff/diff.py b/patchdiff/diff.py index 10185db..3c49b97 100644 --- a/patchdiff/diff.py +++ b/patchdiff/diff.py @@ -136,36 +136,43 @@ def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]: def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]: ops, rops = [], [] - input_keys = set(input.keys()) - output_keys = set(output.keys()) - for key in input_keys - output_keys: - ops.append({"op": "remove", "path": ptr.append(key)}) - rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]}) - for key in output_keys - input_keys: - ops.append( - { - "op": "add", - "path": ptr.append(key), - "value": output[key], - } - ) - rops.insert(0, {"op": "remove", "path": ptr.append(key)}) - for key in input_keys & output_keys: - key_ops, key_rops = diff(input[key], output[key], ptr.append(key)) - ops.extend(key_ops) - key_rops.extend(rops) - rops = key_rops + input_keys = set(input.keys()) if input else set() + output_keys = set(output.keys()) if output else set() + if (input_only := input_keys - output_keys): + for key in input_only: + key_ptr = ptr.append(key) + ops.append({"op": "remove", "path": key_ptr}) + rops.insert(0, {"op": "add", "path": key_ptr, "value": input[key]}) + if (output_only := output_keys - input_keys): + for key in output_only: + key_ptr = ptr.append(key) + ops.append( + { + "op": "add", + "path": key_ptr, + "value": output[key], + } + ) + rops.insert(0, {"op": "remove", "path": key_ptr}) + if (common := input_keys & output_keys): + for key in common: + key_ops, key_rops = diff(input[key], output[key], ptr.append(key)) + ops.extend(key_ops) + key_rops.extend(rops) + rops = key_rops return ops, rops def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]: ops, rops = [], [] - for value in input - output: - ops.append({"op": "remove", "path": ptr.append(value)}) - rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value}) - for value in output - input: - ops.append({"op": "add", "path": ptr.append("-"), "value": value}) - rops.insert(0, {"op": "remove", "path": ptr.append(value)}) + if (input_only := input - output): + for value in input_only: + ops.append({"op": "remove", "path": ptr.append(value)}) + rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value}) + if (output_only := output - input): + for value in output_only: + ops.append({"op": "add", "path": ptr.append("-"), "value": value}) + rops.insert(0, {"op": "remove", "path": ptr.append(value)}) return ops, rops diff --git a/patchdiff/pointer.py b/patchdiff/pointer.py index b1a1b31..abb2532 100644 --- a/patchdiff/pointer.py +++ b/patchdiff/pointer.py @@ -20,6 +20,8 @@ def escape(token: str) -> str: class Pointer: + __slots__ = ("tokens",) + def __init__(self, tokens: Iterable[Hashable] | None = None) -> None: if tokens is None: tokens = [] @@ -40,7 +42,7 @@ def __hash__(self) -> int: return hash(self.tokens) def __eq__(self, other: "Pointer") -> bool: - if not isinstance(other, self.__class__): + if other.__class__ != self.__class__: return False return self.tokens == other.tokens @@ -48,16 +50,14 @@ def evaluate(self, obj: Diffable) -> Tuple[Diffable, Hashable, Any]: key = "" parent = None cursor = obj - for key in self.tokens: + if not (tokens := self.tokens): + return parent, key, cursor + for key in tokens: parent = cursor - if hasattr(parent, "add"): # set - break - if hasattr(parent, "append"): # list - if key == "-": - break try: cursor = parent[key] - except KeyError: + except (KeyError, TypeError): + # KeyError for dicts, TypeError for sets and lists break return parent, key, cursor From cf28fa2736bf7ee6ba0730bb75ad5f6e1ad49c4e Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 17:17:09 +0100 Subject: [PATCH 04/11] ruff --- benchmarks/benchmark.py | 13 +------------ patchdiff/diff.py | 10 +++++----- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index f9e0d30..2f3c6ac 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -188,7 +188,6 @@ def test_apply_list_1000_elements(benchmark): benchmark(apply, a, ops) - # ======================================== # Pointer Benchmarks # ======================================== @@ -197,17 +196,7 @@ def test_apply_list_1000_elements(benchmark): @pytest.mark.benchmark(group="pointer") def test_pointer_evaluate_deeply_nested(benchmark): """Benchmark: Evaluate pointer on deeply nested structure.""" - obj = { - "key_0": { - "key_1": { - "key_2": { - "key_3": { - "key_4": 42 - } - } - } - } - } + obj = {"key_0": {"key_1": {"key_2": {"key_3": {"key_4": 42}}}}} ptr = Pointer.from_str("/key_0/key_1/key_2/key_3/key_4") benchmark(ptr.evaluate, obj) diff --git a/patchdiff/diff.py b/patchdiff/diff.py index 3c49b97..e74cba8 100644 --- a/patchdiff/diff.py +++ b/patchdiff/diff.py @@ -138,12 +138,12 @@ def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]: ops, rops = [], [] input_keys = set(input.keys()) if input else set() output_keys = set(output.keys()) if output else set() - if (input_only := input_keys - output_keys): + if input_only := input_keys - output_keys: for key in input_only: key_ptr = ptr.append(key) ops.append({"op": "remove", "path": key_ptr}) rops.insert(0, {"op": "add", "path": key_ptr, "value": input[key]}) - if (output_only := output_keys - input_keys): + if output_only := output_keys - input_keys: for key in output_only: key_ptr = ptr.append(key) ops.append( @@ -154,7 +154,7 @@ def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]: } ) rops.insert(0, {"op": "remove", "path": key_ptr}) - if (common := input_keys & output_keys): + if common := input_keys & output_keys: for key in common: key_ops, key_rops = diff(input[key], output[key], ptr.append(key)) ops.extend(key_ops) @@ -165,11 +165,11 @@ def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]: def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]: ops, rops = [], [] - if (input_only := input - output): + if input_only := input - output: for value in input_only: ops.append({"op": "remove", "path": ptr.append(value)}) rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value}) - if (output_only := output - input): + if output_only := output - input: for value in output_only: ops.append({"op": "add", "path": ptr.append("-"), "value": value}) rops.insert(0, {"op": "remove", "path": ptr.append(value)}) From 73135321287787db7945eb67effb12516a1bc0a0 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 17:18:26 +0100 Subject: [PATCH 05/11] slightly cleaner --- patchdiff/pointer.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/patchdiff/pointer.py b/patchdiff/pointer.py index abb2532..63856f1 100644 --- a/patchdiff/pointer.py +++ b/patchdiff/pointer.py @@ -50,15 +50,14 @@ def evaluate(self, obj: Diffable) -> Tuple[Diffable, Hashable, Any]: key = "" parent = None cursor = obj - if not (tokens := self.tokens): - return parent, key, cursor - for key in tokens: - parent = cursor - try: - cursor = parent[key] - except (KeyError, TypeError): - # KeyError for dicts, TypeError for sets and lists - break + if tokens := self.tokens: + for key in tokens: + parent = cursor + try: + cursor = parent[key] + except (KeyError, TypeError): + # KeyError for dicts, TypeError for sets and lists + break return parent, key, cursor def append(self, token: Hashable) -> "Pointer": From 401f2fe5d2528a428421fc53ec1682d1caf5d816 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 22:02:13 +0100 Subject: [PATCH 06/11] review --- .github/workflows/benchmark.yml | 61 +++++++++++++-------------------- patchdiff/pointer.py | 12 +++---- 2 files changed, 30 insertions(+), 43 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 2ed3411..324b579 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,9 +1,6 @@ name: Benchmarks on: - push: - branches: - - master pull_request: branches: - master @@ -21,49 +18,39 @@ jobs: - name: Set up Python 3.14 uses: actions/setup-python@v5 with: - python-version: '3.14' + python-version: "3.14" - name: Install dependencies run: uv sync - # Restore benchmark baseline (read-only for PRs) - - name: Restore benchmark baseline - uses: actions/cache/restore@v4 - with: - path: .benchmarks - key: benchmark-baseline-3.14-${{ runner.os }} - - # On master: save baseline results - - name: Run benchmarks and save baseline - if: github.ref == 'refs/heads/master' - continue-on-error: true + # On PRs: run benchmarks twice (PR code vs master code) and compare + - name: Run benchmarks run: | - uv run --no-sync pytest benchmarks/benchmark.py \ - --benchmark-only \ - --benchmark-autosave \ - --benchmark-sort=name + # Checkout master version of observ directory + git fetch origin master + git checkout origin/master -- observ/ - # On master: cache the new baseline results - - name: Save benchmark baseline - if: github.ref == 'refs/heads/master' - uses: actions/cache/save@v4 - with: - path: .benchmarks - key: benchmark-baseline-3.14-${{ runner.os }} + # Run benchmarks with master code as baseline + uv run --no-sync pytest bench \ + --benchmark-only \ + --benchmark-save=master \ + --benchmark-sort=mean || true - # On PRs: compare against baseline and fail if degraded - - name: Run benchmarks and compare - if: github.event_name == 'pull_request' - run: | - if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then - echo "No baseline found, not comparing" - uv run --no-sync pytest -v benchmarks/benchmark.py - exit - fi + # Restore PR code + git checkout HEAD -- observ/ - uv run --no-sync pytest benchmarks/benchmark.py \ + # Run benchmarks on PR code and compare + uv run --no-sync pytest bench \ --benchmark-only \ --benchmark-compare \ --benchmark-compare-fail=mean:5% \ - --benchmark-sort=name + --benchmark-save=branch \ + --benchmark-sort=mean + - name: Upload benchmarks + if: always() + uses: actions/upload-artifact@v4 + with: + name: Benchmarks + path: .benchmarks/ + include-hidden-files: true \ No newline at end of file diff --git a/patchdiff/pointer.py b/patchdiff/pointer.py index 63856f1..9948525 100644 --- a/patchdiff/pointer.py +++ b/patchdiff/pointer.py @@ -51,13 +51,13 @@ def evaluate(self, obj: Diffable) -> Tuple[Diffable, Hashable, Any]: parent = None cursor = obj if tokens := self.tokens: - for key in tokens: - parent = cursor - try: + try: + for key in tokens: + parent = cursor cursor = parent[key] - except (KeyError, TypeError): - # KeyError for dicts, TypeError for sets and lists - break + except (KeyError, TypeError): + # KeyError for dicts, TypeError for sets and lists + pass return parent, key, cursor def append(self, token: Hashable) -> "Pointer": From aa2af5253ee914c407edf2c85412db1a502a0361 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 22:03:26 +0100 Subject: [PATCH 07/11] pay attention --- .github/workflows/benchmark.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 324b579..9ce153f 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -26,9 +26,9 @@ jobs: # On PRs: run benchmarks twice (PR code vs master code) and compare - name: Run benchmarks run: | - # Checkout master version of observ directory + # Checkout master version of patchdiff directory git fetch origin master - git checkout origin/master -- observ/ + git checkout origin/master -- patchdiff/ # Run benchmarks with master code as baseline uv run --no-sync pytest bench \ @@ -37,7 +37,7 @@ jobs: --benchmark-sort=mean || true # Restore PR code - git checkout HEAD -- observ/ + git checkout HEAD -- patchdiff/ # Run benchmarks on PR code and compare uv run --no-sync pytest bench \ From 2a0adad8fa4126684a249738c1e4c89dc53b7b64 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 22:05:35 +0100 Subject: [PATCH 08/11] pay attention pt 2 --- .github/workflows/benchmark.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 9ce153f..e649f86 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -31,7 +31,7 @@ jobs: git checkout origin/master -- patchdiff/ # Run benchmarks with master code as baseline - uv run --no-sync pytest bench \ + uv run --no-sync pytest benchmarks \ --benchmark-only \ --benchmark-save=master \ --benchmark-sort=mean || true @@ -40,7 +40,7 @@ jobs: git checkout HEAD -- patchdiff/ # Run benchmarks on PR code and compare - uv run --no-sync pytest bench \ + uv run --no-sync pytest benchmarks \ --benchmark-only \ --benchmark-compare \ --benchmark-compare-fail=mean:5% \ From 504636806e7e3e1062e732a0694297773174b653 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 22:07:57 +0100 Subject: [PATCH 09/11] like this? --- .github/workflows/benchmark.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index e649f86..5937496 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -31,7 +31,7 @@ jobs: git checkout origin/master -- patchdiff/ # Run benchmarks with master code as baseline - uv run --no-sync pytest benchmarks \ + uv run --no-sync pytest benchmarks/benchmark.py \ --benchmark-only \ --benchmark-save=master \ --benchmark-sort=mean || true @@ -40,7 +40,7 @@ jobs: git checkout HEAD -- patchdiff/ # Run benchmarks on PR code and compare - uv run --no-sync pytest benchmarks \ + uv run --no-sync pytest benchmarks/benchmark.py \ --benchmark-only \ --benchmark-compare \ --benchmark-compare-fail=mean:5% \ From b18d9362460830323309554e2916dc7113fe3f95 Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 22:16:41 +0100 Subject: [PATCH 10/11] improve benchmarks --- benchmarks/benchmark.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index 2f3c6ac..a540dbe 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -194,19 +194,27 @@ def test_apply_list_1000_elements(benchmark): @pytest.mark.benchmark(group="pointer") -def test_pointer_evaluate_deeply_nested(benchmark): +def test_pointer_evaluate_deep_dict(benchmark): """Benchmark: Evaluate pointer on deeply nested structure.""" - obj = {"key_0": {"key_1": {"key_2": {"key_3": {"key_4": 42}}}}} - ptr = Pointer.from_str("/key_0/key_1/key_2/key_3/key_4") + depth = 100 + obj = 42 + for i in range(depth - 1, -1, -1): + obj = {f"key_{i}": obj} + ptr = Pointer([f"key_{i}" for i in range(depth)]) benchmark(ptr.evaluate, obj) @pytest.mark.benchmark(group="pointer") -def test_pointer_evaluate_large_list(benchmark): - """Benchmark: Evaluate pointer on large list.""" - obj = [i for i in range(10000)] - ptr = Pointer([5000]) +def test_pointer_evaluate_deep_list(benchmark): + """Benchmark: Evaluate pointer on deep lists.""" + # Build nested lists 100 levels deep; innermost value is 42. + depth = 100 + nested = 42 + for _ in range(depth): + nested = [nested] + obj = nested + ptr = Pointer([0] * depth) benchmark(ptr.evaluate, obj) From 024d2f87d9465d0518b7327c5f3530c98e3cc9eb Mon Sep 17 00:00:00 2001 From: Korijn van Golen Date: Sun, 14 Dec 2025 22:19:45 +0100 Subject: [PATCH 11/11] group benchmarks --- benchmarks/benchmark.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index a540dbe..700c031 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -189,11 +189,11 @@ def test_apply_list_1000_elements(benchmark): # ======================================== -# Pointer Benchmarks +# Pointer Evaluate Benchmarks # ======================================== -@pytest.mark.benchmark(group="pointer") +@pytest.mark.benchmark(group="pointer-evaluate") def test_pointer_evaluate_deep_dict(benchmark): """Benchmark: Evaluate pointer on deeply nested structure.""" depth = 100 @@ -205,7 +205,7 @@ def test_pointer_evaluate_deep_dict(benchmark): benchmark(ptr.evaluate, obj) -@pytest.mark.benchmark(group="pointer") +@pytest.mark.benchmark(group="pointer-evaluate") def test_pointer_evaluate_deep_list(benchmark): """Benchmark: Evaluate pointer on deep lists.""" # Build nested lists 100 levels deep; innermost value is 42. @@ -219,7 +219,12 @@ def test_pointer_evaluate_deep_list(benchmark): benchmark(ptr.evaluate, obj) -@pytest.mark.benchmark(group="pointer") +# ======================================== +# Pointer Append Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="pointer-append") def test_pointer_append(benchmark): """Benchmark: Append token to pointer.""" ptr = Pointer.from_str("/a/b/c/d/e/f/g/h/i/j")