Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 23 additions & 36 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
name: Benchmarks

on:
push:
branches:
- master
pull_request:
branches:
- master
Expand All @@ -21,49 +18,39 @@ jobs:
- name: Set up Python 3.14
uses: actions/setup-python@v5
with:
python-version: '3.14'
python-version: "3.14"

- name: Install dependencies
run: uv sync

# Restore benchmark baseline (read-only for PRs)
- name: Restore benchmark baseline
uses: actions/cache/restore@v4
with:
path: .benchmarks
key: benchmark-baseline-3.14-${{ runner.os }}

# On master: save baseline results
- name: Run benchmarks and save baseline
if: github.ref == 'refs/heads/master'
continue-on-error: true
# On PRs: run benchmarks twice (PR code vs master code) and compare
- name: Run benchmarks
run: |
uv run --no-sync pytest benchmarks/benchmark.py \
--benchmark-only \
--benchmark-autosave \
--benchmark-sort=name
# Checkout master version of patchdiff directory
git fetch origin master
git checkout origin/master -- patchdiff/

# On master: cache the new baseline results
- name: Save benchmark baseline
if: github.ref == 'refs/heads/master'
uses: actions/cache/save@v4
with:
path: .benchmarks
key: benchmark-baseline-3.14-${{ runner.os }}
# Run benchmarks with master code as baseline
uv run --no-sync pytest benchmarks/benchmark.py \
--benchmark-only \
--benchmark-save=master \
--benchmark-sort=mean || true

# On PRs: compare against baseline and fail if degraded
- name: Run benchmarks and compare
if: github.event_name == 'pull_request'
run: |
if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then
echo "No baseline found, not comparing"
uv run --no-sync pytest -v benchmarks/benchmark.py
exit
fi
# Restore PR code
git checkout HEAD -- patchdiff/

# Run benchmarks on PR code and compare
uv run --no-sync pytest benchmarks/benchmark.py \
--benchmark-only \
--benchmark-compare \
--benchmark-compare-fail=mean:5% \
--benchmark-sort=name
--benchmark-save=branch \
--benchmark-sort=mean

- name: Upload benchmarks
if: always()
uses: actions/upload-artifact@v4
with:
name: Benchmarks
path: .benchmarks/
include-hidden-files: true
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
dist
uv.lock
.benchmarks/
.venv
66 changes: 66 additions & 0 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest

from patchdiff import apply, diff
from patchdiff.pointer import Pointer

# Set seed for reproducibility
random.seed(42)
Expand Down Expand Up @@ -139,6 +140,27 @@ def test_dict_diff_nested(benchmark):
benchmark(diff, a, b)


# ========================================
# Set Diff Benchmarks
# ========================================


@pytest.mark.benchmark(group="set-diff")
def test_set_diff_1000_elements(benchmark):
"""Benchmark: Sets with 1000 elements, 10% difference."""
a = set(generate_random_list(1000, 2000))
b = a.copy()
# Remove 5%
a_list = list(a)
for i in range(50):
a.remove(a_list[i])
# Add 5%
for i in range(50):
b.add(2000 + i)

benchmark(diff, a, b)


# ========================================
# Mixed Structure Benchmarks
# ========================================
Expand All @@ -164,3 +186,47 @@ def test_apply_list_1000_elements(benchmark):
ops, _ = diff(a, b)

benchmark(apply, a, ops)


# ========================================
# Pointer Evaluate Benchmarks
# ========================================


@pytest.mark.benchmark(group="pointer-evaluate")
def test_pointer_evaluate_deep_dict(benchmark):
"""Benchmark: Evaluate pointer on deeply nested structure."""
depth = 100
obj = 42
for i in range(depth - 1, -1, -1):
obj = {f"key_{i}": obj}
ptr = Pointer([f"key_{i}" for i in range(depth)])

benchmark(ptr.evaluate, obj)


@pytest.mark.benchmark(group="pointer-evaluate")
def test_pointer_evaluate_deep_list(benchmark):
"""Benchmark: Evaluate pointer on deep lists."""
# Build nested lists 100 levels deep; innermost value is 42.
depth = 100
nested = 42
for _ in range(depth):
nested = [nested]
obj = nested
ptr = Pointer([0] * depth)

benchmark(ptr.evaluate, obj)


# ========================================
# Pointer Append Benchmarks
# ========================================


@pytest.mark.benchmark(group="pointer-append")
def test_pointer_append(benchmark):
"""Benchmark: Append token to pointer."""
ptr = Pointer.from_str("/a/b/c/d/e/f/g/h/i/j")

benchmark(ptr.append, "k")
2 changes: 2 additions & 0 deletions patchdiff/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

def iapply(obj: Diffable, patches: List[Dict]) -> Diffable:
"""Apply a set of patches to an object, in-place"""
if not patches:
return obj
for patch in patches:
ptr = patch["path"]
op = patch["op"]
Expand Down
57 changes: 32 additions & 25 deletions patchdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,36 +136,43 @@ def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:

def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
ops, rops = [], []
input_keys = set(input.keys())
output_keys = set(output.keys())
for key in input_keys - output_keys:
ops.append({"op": "remove", "path": ptr.append(key)})
rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]})
for key in output_keys - input_keys:
ops.append(
{
"op": "add",
"path": ptr.append(key),
"value": output[key],
}
)
rops.insert(0, {"op": "remove", "path": ptr.append(key)})
for key in input_keys & output_keys:
key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
ops.extend(key_ops)
key_rops.extend(rops)
rops = key_rops
input_keys = set(input.keys()) if input else set()
output_keys = set(output.keys()) if output else set()
if input_only := input_keys - output_keys:
for key in input_only:
key_ptr = ptr.append(key)
ops.append({"op": "remove", "path": key_ptr})
rops.insert(0, {"op": "add", "path": key_ptr, "value": input[key]})
if output_only := output_keys - input_keys:
for key in output_only:
key_ptr = ptr.append(key)
ops.append(
{
"op": "add",
"path": key_ptr,
"value": output[key],
}
)
rops.insert(0, {"op": "remove", "path": key_ptr})
if common := input_keys & output_keys:
for key in common:
key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
ops.extend(key_ops)
key_rops.extend(rops)
rops = key_rops
return ops, rops


def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
ops, rops = [], []
for value in input - output:
ops.append({"op": "remove", "path": ptr.append(value)})
rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
for value in output - input:
ops.append({"op": "add", "path": ptr.append("-"), "value": value})
rops.insert(0, {"op": "remove", "path": ptr.append(value)})
if input_only := input - output:
for value in input_only:
ops.append({"op": "remove", "path": ptr.append(value)})
rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
if output_only := output - input:
for value in output_only:
ops.append({"op": "add", "path": ptr.append("-"), "value": value})
rops.insert(0, {"op": "remove", "path": ptr.append(value)})
return ops, rops


Expand Down
21 changes: 10 additions & 11 deletions patchdiff/pointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def escape(token: str) -> str:


class Pointer:
__slots__ = ("tokens",)

def __init__(self, tokens: Iterable[Hashable] | None = None) -> None:
if tokens is None:
tokens = []
Expand All @@ -40,25 +42,22 @@ def __hash__(self) -> int:
return hash(self.tokens)

def __eq__(self, other: "Pointer") -> bool:
if not isinstance(other, self.__class__):
if other.__class__ != self.__class__:
return False
return self.tokens == other.tokens

def evaluate(self, obj: Diffable) -> Tuple[Diffable, Hashable, Any]:
key = ""
parent = None
cursor = obj
for key in self.tokens:
parent = cursor
if hasattr(parent, "add"): # set
break
if hasattr(parent, "append"): # list
if key == "-":
break
if tokens := self.tokens:
try:
cursor = parent[key]
except KeyError:
break
for key in tokens:
parent = cursor
cursor = parent[key]
except (KeyError, TypeError):
# KeyError for dicts, TypeError for sets and lists
pass
return parent, key, cursor

def append(self, token: Hashable) -> "Pointer":
Expand Down