diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 2ed3411..5937496 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,9 +1,6 @@ name: Benchmarks on: - push: - branches: - - master pull_request: branches: - master @@ -21,49 +18,39 @@ jobs: - name: Set up Python 3.14 uses: actions/setup-python@v5 with: - python-version: '3.14' + python-version: "3.14" - name: Install dependencies run: uv sync - # Restore benchmark baseline (read-only for PRs) - - name: Restore benchmark baseline - uses: actions/cache/restore@v4 - with: - path: .benchmarks - key: benchmark-baseline-3.14-${{ runner.os }} - - # On master: save baseline results - - name: Run benchmarks and save baseline - if: github.ref == 'refs/heads/master' - continue-on-error: true + # On PRs: run benchmarks twice (PR code vs master code) and compare + - name: Run benchmarks run: | - uv run --no-sync pytest benchmarks/benchmark.py \ - --benchmark-only \ - --benchmark-autosave \ - --benchmark-sort=name + # Checkout master version of patchdiff directory + git fetch origin master + git checkout origin/master -- patchdiff/ - # On master: cache the new baseline results - - name: Save benchmark baseline - if: github.ref == 'refs/heads/master' - uses: actions/cache/save@v4 - with: - path: .benchmarks - key: benchmark-baseline-3.14-${{ runner.os }} + # Run benchmarks with master code as baseline + uv run --no-sync pytest benchmarks/benchmark.py \ + --benchmark-only \ + --benchmark-save=master \ + --benchmark-sort=mean || true - # On PRs: compare against baseline and fail if degraded - - name: Run benchmarks and compare - if: github.event_name == 'pull_request' - run: | - if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then - echo "No baseline found, not comparing" - uv run --no-sync pytest -v benchmarks/benchmark.py - exit - fi + # Restore PR code + git checkout HEAD -- patchdiff/ + # Run benchmarks on PR code and compare uv run --no-sync pytest benchmarks/benchmark.py \ --benchmark-only \ --benchmark-compare \ --benchmark-compare-fail=mean:5% \ - --benchmark-sort=name + --benchmark-save=branch \ + --benchmark-sort=mean + - name: Upload benchmarks + if: always() + uses: actions/upload-artifact@v4 + with: + name: Benchmarks + path: .benchmarks/ + include-hidden-files: true \ No newline at end of file diff --git a/.gitignore b/.gitignore index c9dfbba..3025943 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__ dist uv.lock .benchmarks/ +.venv diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index 2cca2e3..700c031 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -19,6 +19,7 @@ import pytest from patchdiff import apply, diff +from patchdiff.pointer import Pointer # Set seed for reproducibility random.seed(42) @@ -139,6 +140,27 @@ def test_dict_diff_nested(benchmark): benchmark(diff, a, b) +# ======================================== +# Set Diff Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="set-diff") +def test_set_diff_1000_elements(benchmark): + """Benchmark: Sets with 1000 elements, 10% difference.""" + a = set(generate_random_list(1000, 2000)) + b = a.copy() + # Remove 5% + a_list = list(a) + for i in range(50): + a.remove(a_list[i]) + # Add 5% + for i in range(50): + b.add(2000 + i) + + benchmark(diff, a, b) + + # ======================================== # Mixed Structure Benchmarks # ======================================== @@ -164,3 +186,47 @@ def test_apply_list_1000_elements(benchmark): ops, _ = diff(a, b) benchmark(apply, a, ops) + + +# ======================================== +# Pointer Evaluate Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="pointer-evaluate") +def test_pointer_evaluate_deep_dict(benchmark): + """Benchmark: Evaluate pointer on deeply nested structure.""" + depth = 100 + obj = 42 + for i in range(depth - 1, -1, -1): + obj = {f"key_{i}": obj} + ptr = Pointer([f"key_{i}" for i in range(depth)]) + + benchmark(ptr.evaluate, obj) + + +@pytest.mark.benchmark(group="pointer-evaluate") +def test_pointer_evaluate_deep_list(benchmark): + """Benchmark: Evaluate pointer on deep lists.""" + # Build nested lists 100 levels deep; innermost value is 42. + depth = 100 + nested = 42 + for _ in range(depth): + nested = [nested] + obj = nested + ptr = Pointer([0] * depth) + + benchmark(ptr.evaluate, obj) + + +# ======================================== +# Pointer Append Benchmarks +# ======================================== + + +@pytest.mark.benchmark(group="pointer-append") +def test_pointer_append(benchmark): + """Benchmark: Append token to pointer.""" + ptr = Pointer.from_str("/a/b/c/d/e/f/g/h/i/j") + + benchmark(ptr.append, "k") diff --git a/patchdiff/apply.py b/patchdiff/apply.py index 7d5d3cd..edc8b0a 100644 --- a/patchdiff/apply.py +++ b/patchdiff/apply.py @@ -6,6 +6,8 @@ def iapply(obj: Diffable, patches: List[Dict]) -> Diffable: """Apply a set of patches to an object, in-place""" + if not patches: + return obj for patch in patches: ptr = patch["path"] op = patch["op"] diff --git a/patchdiff/diff.py b/patchdiff/diff.py index 10185db..e74cba8 100644 --- a/patchdiff/diff.py +++ b/patchdiff/diff.py @@ -136,36 +136,43 @@ def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]: def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]: ops, rops = [], [] - input_keys = set(input.keys()) - output_keys = set(output.keys()) - for key in input_keys - output_keys: - ops.append({"op": "remove", "path": ptr.append(key)}) - rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]}) - for key in output_keys - input_keys: - ops.append( - { - "op": "add", - "path": ptr.append(key), - "value": output[key], - } - ) - rops.insert(0, {"op": "remove", "path": ptr.append(key)}) - for key in input_keys & output_keys: - key_ops, key_rops = diff(input[key], output[key], ptr.append(key)) - ops.extend(key_ops) - key_rops.extend(rops) - rops = key_rops + input_keys = set(input.keys()) if input else set() + output_keys = set(output.keys()) if output else set() + if input_only := input_keys - output_keys: + for key in input_only: + key_ptr = ptr.append(key) + ops.append({"op": "remove", "path": key_ptr}) + rops.insert(0, {"op": "add", "path": key_ptr, "value": input[key]}) + if output_only := output_keys - input_keys: + for key in output_only: + key_ptr = ptr.append(key) + ops.append( + { + "op": "add", + "path": key_ptr, + "value": output[key], + } + ) + rops.insert(0, {"op": "remove", "path": key_ptr}) + if common := input_keys & output_keys: + for key in common: + key_ops, key_rops = diff(input[key], output[key], ptr.append(key)) + ops.extend(key_ops) + key_rops.extend(rops) + rops = key_rops return ops, rops def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]: ops, rops = [], [] - for value in input - output: - ops.append({"op": "remove", "path": ptr.append(value)}) - rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value}) - for value in output - input: - ops.append({"op": "add", "path": ptr.append("-"), "value": value}) - rops.insert(0, {"op": "remove", "path": ptr.append(value)}) + if input_only := input - output: + for value in input_only: + ops.append({"op": "remove", "path": ptr.append(value)}) + rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value}) + if output_only := output - input: + for value in output_only: + ops.append({"op": "add", "path": ptr.append("-"), "value": value}) + rops.insert(0, {"op": "remove", "path": ptr.append(value)}) return ops, rops diff --git a/patchdiff/pointer.py b/patchdiff/pointer.py index b1a1b31..9948525 100644 --- a/patchdiff/pointer.py +++ b/patchdiff/pointer.py @@ -20,6 +20,8 @@ def escape(token: str) -> str: class Pointer: + __slots__ = ("tokens",) + def __init__(self, tokens: Iterable[Hashable] | None = None) -> None: if tokens is None: tokens = [] @@ -40,7 +42,7 @@ def __hash__(self) -> int: return hash(self.tokens) def __eq__(self, other: "Pointer") -> bool: - if not isinstance(other, self.__class__): + if other.__class__ != self.__class__: return False return self.tokens == other.tokens @@ -48,17 +50,14 @@ def evaluate(self, obj: Diffable) -> Tuple[Diffable, Hashable, Any]: key = "" parent = None cursor = obj - for key in self.tokens: - parent = cursor - if hasattr(parent, "add"): # set - break - if hasattr(parent, "append"): # list - if key == "-": - break + if tokens := self.tokens: try: - cursor = parent[key] - except KeyError: - break + for key in tokens: + parent = cursor + cursor = parent[key] + except (KeyError, TypeError): + # KeyError for dicts, TypeError for sets and lists + pass return parent, key, cursor def append(self, token: Hashable) -> "Pointer":