Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions .github/compare-benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import json
import subprocess
from argparse import ArgumentParser
from glob import iglob
from itertools import chain, groupby
from operator import itemgetter

from tabulate import tabulate


COMMENT_TEMPLATE = """
<!-- compare-benchmarks.py -->

Comparing *{stat}* ({better} is better) metric of benchmarks between this PR's target ({old}) and the HEAD of this PR ({new}):

{table}

*(This comment will be updated on subsequent pushes)*
"""

BETTER = {
'median': 'lower',
'ops': 'higher',
}

def compare_benchmarks(benchmarks, old, new):
for benchmark, python, by_commit in benchmarks:
result_old, result_new = by_commit[old], by_commit[new]
yield benchmark, python, (result_new - result_old) / result_old


def combine_runs(runs, commits):
stats = sorted(chain.from_iterable(runs))
for benchmark, by_python in groupby(stats, key=itemgetter(0)):
for python, by_commit in groupby(by_python, key=itemgetter(1)):
by_commit = {commit: value for *_, commit, value in by_commit if commit in commits}
if len(by_commit) == len(commits):
yield benchmark, python, by_commit


def read_run(run, stat='median'):
python_implementation = run['machine_info']['python_implementation']
python_version = '.'.join(run['machine_info']['python_version'].split('.')[:2])
if python_implementation != 'CPython':
python_version = f'{python_implementation} {python_version}'

commit = run['commit_info']['id']

for benchmark in run['benchmarks']:
yield benchmark['name'], python_version, commit, benchmark['stats'][stat]


def loadf(f):
with open(f, 'r') as f:
return json.load(f)


def to_table(benchmarks):
headers = None
table = []

for benchmark_name, by_python in groupby(benchmarks, key=itemgetter(0)):
by_python = {python: difference for *_, python, difference in by_python}
if not headers:
headers = ('', *by_python.keys())

table.append((benchmark_name, *by_python.values()))

return headers, table


if __name__ == '__main__':
args = ArgumentParser()
args.add_argument('--old', metavar='REF', required=True)
args.add_argument('--new', metavar='REF', default='HEAD')
args.add_argument('--stat', default='median')
args.add_argument('--comment-file')

args = args.parse_args()

# pytest-benchmark will store full commit hashes, git rev-parse the old and new references to get the commit hashes
args.old = subprocess.check_output(('git', 'rev-parse', args.old), text=True).strip()
args.new = subprocess.check_output(('git', 'rev-parse', args.new), text=True).strip()

benchmarks = (read_run(loadf(f), stat=args.stat) for f in iglob('.benchmarks/*/*.json'))
benchmarks = combine_runs(benchmarks, commits={args.old, args.new})
benchmarks = compare_benchmarks(benchmarks, old=args.old, new=args.new)
headers, table = to_table(benchmarks)

table = tabulate(table, headers=headers, tablefmt='github', floatfmt='+.0%')
print(table)

if args.comment_file:
with open(args.comment_file, 'wt') as comment_file:
comment_file.write(
COMMENT_TEMPLATE.format(
old=args.old,
new=args.new,
stat=args.stat,
better=BETTER.get(args.stat, 'lower'),
table=table,
)
)
42 changes: 41 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
pull_request:

jobs:

check:
runs-on: ubuntu-latest
steps:
Expand All @@ -19,13 +20,17 @@ jobs:
run: pdm install --group check
- name: Run checks
run: pdm run check

test:
runs-on: ubuntu-latest
needs: check
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', 'pypy-3.10', 'pypy-3.11']
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
- uses: pdm-project/setup-pdm@v4
with:
python-version: ${{ matrix.python-version }}
Expand All @@ -38,10 +43,45 @@ jobs:
run: pdm run test
- name: Run benchmarks
run: pdm run benchmark --benchmark-save '${{ matrix.python-version }}-${{ github.sha }}'
- name: Run benchmarks for target
if: ${{ github.event.pull_request.base.sha }}
run: |
git switch --detach ${{ github.event.pull_request.base.sha }}
pdm run benchmark --benchmark-save '${{ matrix.python-version }}-${{ github.event.pull_request.base.sha }}'
git switch --detach -
- name: Upload benchmark results
if: ${{ github.event.pull_request.base.sha }}
uses: actions/upload-artifact@v4
with:
name: 'benchmarks-${{ matrix.python-version }}-${{ github.sha }}'
name: 'benchmarks-${{ matrix.python-version }}'
path: '.benchmarks/*/*.json'
include-hidden-files: true
retention-days: 1

compare-benchmarks:
runs-on: ubuntu-latest
needs: test
if: ${{ github.event.pull_request }}
steps:
- uses: actions/checkout@v5
- uses: pdm-project/setup-pdm@v4
with:
python-version: '3.14'
cache: true
cache-dependency-path: 'pylock.toml'
- name: Install test dependencies
run: pdm install --group test
- uses: actions/download-artifact@v5
with:
pattern: benchmarks-*
path: .benchmarks/
merge-multiple: true
- name: Compare benchmarks
run: pdm run python .github/compare-benchmarks.py --old ${{ github.event.pull_request.base.sha }} --new ${{ github.sha }} --comment-file ./BENCHMARK_COMMENT
- uses: edumserrano/find-create-or-update-comment@v3
with:
issue-number: ${{ github.event.pull_request.number }}
body-includes: '<!-- compare-benchmarks.py -->'
comment-author: 'github-actions[bot]'
edit-mode: replace
body-path: './BENCHMARK_COMMENT'
15 changes: 14 additions & 1 deletion pylock.toml
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,19 @@ marker = "\"check\" in dependency_groups"
[packages.tool.pdm]
dependencies = []

[[packages]]
name = "tabulate"
version = "0.9.0"
requires-python = ">=3.7"
sdist = {name = "tabulate-0.9.0.tar.gz", url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hashes = {sha256 = "0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}}
wheels = [
{name = "tabulate-0.9.0-py3-none-any.whl",url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl",hashes = {sha256 = "024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}},
]
marker = "\"test\" in dependency_groups"

[packages.tool.pdm]
dependencies = []

[[packages]]
name = "types-pyyaml"
version = "6.0.12.20250915"
Expand Down Expand Up @@ -590,7 +603,7 @@ marker = "\"test\" in dependency_groups"
dependencies = []

[tool.pdm]
hashes = {sha256 = "4d844899bab0d12815b3587bacd58900cc01d8718caf0ba2d0274e0803136606"}
hashes = {sha256 = "84bb8a9b1c809227e3051f501aa2ea8a09d588205a7c1204a4ddc2fb23b57550"}
strategy = ["inherit_metadata", "static_urls"]

[[tool.pdm.targets]]
Expand Down
15 changes: 12 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ classifiers = [
[project.urls]
homepage = "https://github.com/NetherlandsForensicInstitute/confidence/"

[tool.pdm]
version = {source = "scm"}

[dependency-groups]
check = [
"mypy",
Expand All @@ -44,11 +41,23 @@ test = [
"coverage",
"pytest",
"pytest-benchmark",
"tabulate",
]

[tool.pdm]
version = {source = "scm"}

[tool.pdm.scripts]
all = {composite = ["check", "test"]}
benchmark = "pdm run test --benchmark-only --benchmark-autosave tests/"
benchmark-against = {keep_going = true, composite = [
"git rev-parse {args}",
"pdm run benchmark",
"git switch --detach {args}",
"pdm run benchmark",
"git switch -",
"pdm run python .github/compare-benchmarks.py --old {args}"
]}
check = {composite = ["check-lock", "format", "lint", "type-check"]}
check-lock = "pdm lock --check"
format = "ruff format --diff confidence/ tests/"
Expand Down