diff --git a/.github/compare-benchmarks.py b/.github/compare-benchmarks.py new file mode 100644 index 0000000..5bab1f8 --- /dev/null +++ b/.github/compare-benchmarks.py @@ -0,0 +1,103 @@ +import json +import subprocess +from argparse import ArgumentParser +from glob import iglob +from itertools import chain, groupby +from operator import itemgetter + +from tabulate import tabulate + + +COMMENT_TEMPLATE = """ + + +Comparing *{stat}* ({better} is better) metric of benchmarks between this PR's target ({old}) and the HEAD of this PR ({new}): + +{table} + +*(This comment will be updated on subsequent pushes)* +""" + +BETTER = { + 'median': 'lower', + 'ops': 'higher', +} + +def compare_benchmarks(benchmarks, old, new): + for benchmark, python, by_commit in benchmarks: + result_old, result_new = by_commit[old], by_commit[new] + yield benchmark, python, (result_new - result_old) / result_old + + +def combine_runs(runs, commits): + stats = sorted(chain.from_iterable(runs)) + for benchmark, by_python in groupby(stats, key=itemgetter(0)): + for python, by_commit in groupby(by_python, key=itemgetter(1)): + by_commit = {commit: value for *_, commit, value in by_commit if commit in commits} + if len(by_commit) == len(commits): + yield benchmark, python, by_commit + + +def read_run(run, stat='median'): + python_implementation = run['machine_info']['python_implementation'] + python_version = '.'.join(run['machine_info']['python_version'].split('.')[:2]) + if python_implementation != 'CPython': + python_version = f'{python_implementation} {python_version}' + + commit = run['commit_info']['id'] + + for benchmark in run['benchmarks']: + yield benchmark['name'], python_version, commit, benchmark['stats'][stat] + + +def loadf(f): + with open(f, 'r') as f: + return json.load(f) + + +def to_table(benchmarks): + headers = None + table = [] + + for benchmark_name, by_python in groupby(benchmarks, key=itemgetter(0)): + by_python = {python: difference for *_, python, difference in by_python} + if not headers: + headers = ('', *by_python.keys()) + + table.append((benchmark_name, *by_python.values())) + + return headers, table + + +if __name__ == '__main__': + args = ArgumentParser() + args.add_argument('--old', metavar='REF', required=True) + args.add_argument('--new', metavar='REF', default='HEAD') + args.add_argument('--stat', default='median') + args.add_argument('--comment-file') + + args = args.parse_args() + + # pytest-benchmark will store full commit hashes, git rev-parse the old and new references to get the commit hashes + args.old = subprocess.check_output(('git', 'rev-parse', args.old), text=True).strip() + args.new = subprocess.check_output(('git', 'rev-parse', args.new), text=True).strip() + + benchmarks = (read_run(loadf(f), stat=args.stat) for f in iglob('.benchmarks/*/*.json')) + benchmarks = combine_runs(benchmarks, commits={args.old, args.new}) + benchmarks = compare_benchmarks(benchmarks, old=args.old, new=args.new) + headers, table = to_table(benchmarks) + + table = tabulate(table, headers=headers, tablefmt='github', floatfmt='+.0%') + print(table) + + if args.comment_file: + with open(args.comment_file, 'wt') as comment_file: + comment_file.write( + COMMENT_TEMPLATE.format( + old=args.old, + new=args.new, + stat=args.stat, + better=BETTER.get(args.stat, 'lower'), + table=table, + ) + ) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a8bd436..6dfb052 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,6 +6,7 @@ on: pull_request: jobs: + check: runs-on: ubuntu-latest steps: @@ -19,13 +20,17 @@ jobs: run: pdm install --group check - name: Run checks run: pdm run check + test: runs-on: ubuntu-latest + needs: check strategy: matrix: python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', 'pypy-3.10', 'pypy-3.11'] steps: - uses: actions/checkout@v5 + with: + fetch-depth: 0 - uses: pdm-project/setup-pdm@v4 with: python-version: ${{ matrix.python-version }} @@ -38,10 +43,45 @@ jobs: run: pdm run test - name: Run benchmarks run: pdm run benchmark --benchmark-save '${{ matrix.python-version }}-${{ github.sha }}' + - name: Run benchmarks for target + if: ${{ github.event.pull_request.base.sha }} + run: | + git switch --detach ${{ github.event.pull_request.base.sha }} + pdm run benchmark --benchmark-save '${{ matrix.python-version }}-${{ github.event.pull_request.base.sha }}' + git switch --detach - - name: Upload benchmark results + if: ${{ github.event.pull_request.base.sha }} uses: actions/upload-artifact@v4 with: - name: 'benchmarks-${{ matrix.python-version }}-${{ github.sha }}' + name: 'benchmarks-${{ matrix.python-version }}' path: '.benchmarks/*/*.json' include-hidden-files: true retention-days: 1 + + compare-benchmarks: + runs-on: ubuntu-latest + needs: test + if: ${{ github.event.pull_request }} + steps: + - uses: actions/checkout@v5 + - uses: pdm-project/setup-pdm@v4 + with: + python-version: '3.14' + cache: true + cache-dependency-path: 'pylock.toml' + - name: Install test dependencies + run: pdm install --group test + - uses: actions/download-artifact@v5 + with: + pattern: benchmarks-* + path: .benchmarks/ + merge-multiple: true + - name: Compare benchmarks + run: pdm run python .github/compare-benchmarks.py --old ${{ github.event.pull_request.base.sha }} --new ${{ github.sha }} --comment-file ./BENCHMARK_COMMENT + - uses: edumserrano/find-create-or-update-comment@v3 + with: + issue-number: ${{ github.event.pull_request.number }} + body-includes: '' + comment-author: 'github-actions[bot]' + edit-mode: replace + body-path: './BENCHMARK_COMMENT' diff --git a/pylock.toml b/pylock.toml index 7da8eb6..667c5a4 100644 --- a/pylock.toml +++ b/pylock.toml @@ -310,6 +310,19 @@ marker = "\"check\" in dependency_groups" [packages.tool.pdm] dependencies = [] +[[packages]] +name = "tabulate" +version = "0.9.0" +requires-python = ">=3.7" +sdist = {name = "tabulate-0.9.0.tar.gz", url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hashes = {sha256 = "0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}} +wheels = [ + {name = "tabulate-0.9.0-py3-none-any.whl",url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl",hashes = {sha256 = "024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}}, +] +marker = "\"test\" in dependency_groups" + +[packages.tool.pdm] +dependencies = [] + [[packages]] name = "types-pyyaml" version = "6.0.12.20250915" @@ -590,7 +603,7 @@ marker = "\"test\" in dependency_groups" dependencies = [] [tool.pdm] -hashes = {sha256 = "4d844899bab0d12815b3587bacd58900cc01d8718caf0ba2d0274e0803136606"} +hashes = {sha256 = "84bb8a9b1c809227e3051f501aa2ea8a09d588205a7c1204a4ddc2fb23b57550"} strategy = ["inherit_metadata", "static_urls"] [[tool.pdm.targets]] diff --git a/pyproject.toml b/pyproject.toml index 5a904e3..6ead834 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,6 @@ classifiers = [ [project.urls] homepage = "https://github.com/NetherlandsForensicInstitute/confidence/" -[tool.pdm] -version = {source = "scm"} - [dependency-groups] check = [ "mypy", @@ -44,11 +41,23 @@ test = [ "coverage", "pytest", "pytest-benchmark", + "tabulate", ] +[tool.pdm] +version = {source = "scm"} + [tool.pdm.scripts] all = {composite = ["check", "test"]} benchmark = "pdm run test --benchmark-only --benchmark-autosave tests/" +benchmark-against = {keep_going = true, composite = [ + "git rev-parse {args}", + "pdm run benchmark", + "git switch --detach {args}", + "pdm run benchmark", + "git switch -", + "pdm run python .github/compare-benchmarks.py --old {args}" +]} check = {composite = ["check-lock", "format", "lint", "type-check"]} check-lock = "pdm lock --check" format = "ruff format --diff confidence/ tests/"