Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions .github/workflows/criterion-benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: Criterion benchmarks

on:
pull_request:
branches: [main, develop]
push:
branches: [main]
schedule:
- cron: '0 6 * * 1'
workflow_dispatch:

jobs:
benchmark-pr:
if: github.event_name == 'pull_request'
name: Benchmark comparison (PR)
runs-on: ubuntu-latest
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
CARGO_TARGET_DIR: ${{ github.workspace }}/target
CRITERION_BASELINE: base
CRITERION_CURRENT: pr
CRITERION_REGRESSION_THRESHOLD: '0.10'
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable

- name: Cache cargo build artifacts
uses: Swatinem/rust-cache@v2
with:
shared-key: criterion

- name: Reset Criterion output
run: rm -rf "$CARGO_TARGET_DIR/criterion"

- name: Run benchmarks on base commit
run: |
BASE_SHA=${{ github.event.pull_request.base.sha }}
git fetch --no-tags --depth=1 origin "$BASE_SHA"
git worktree add --detach ../base "$BASE_SHA"
pushd ../base
cargo bench --workspace -- --save-baseline "$CRITERION_BASELINE"
popd
git worktree remove ../base --force

- name: Run benchmarks on PR commit
run: cargo bench --workspace -- --baseline "$CRITERION_BASELINE" --save-baseline "$CRITERION_CURRENT"

- name: Compare benchmark results
run: python3 ci/criterion_compare.py "$CARGO_TARGET_DIR/criterion"

- name: Upload benchmark artifacts
uses: actions/upload-artifact@v4
with:
name: criterion-pr-${{ github.run_id }}
path: ${{ env.CARGO_TARGET_DIR }}/criterion

benchmark-main:
if: github.event_name != 'pull_request'
name: Benchmark snapshot
runs-on: ubuntu-latest
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
CARGO_TARGET_DIR: ${{ github.workspace }}/target
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable

- name: Cache cargo build artifacts
uses: Swatinem/rust-cache@v2
with:
shared-key: criterion

- name: Reset Criterion output
run: rm -rf "$CARGO_TARGET_DIR/criterion"

- name: Run benchmarks
run: cargo bench --workspace -- --save-baseline main

- name: Upload benchmark artifacts
uses: actions/upload-artifact@v4
with:
name: criterion-main-${{ github.run_id }}
path: ${{ env.CARGO_TARGET_DIR }}/criterion
134 changes: 134 additions & 0 deletions ci/criterion_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
"""Compare Criterion benchmark baselines and detect regressions.

This script expects a Criterion `target/criterion` directory with two baselines
present. It compares the mean estimate for each benchmark and flags any that
regress beyond the configured threshold.
"""

from __future__ import annotations

import json
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List


@dataclass
class BenchmarkResult:
name: str
baseline_mean: float
current_mean: float

@property
def delta(self) -> float:
return self.current_mean - self.baseline_mean

@property
def percent_change(self) -> float:
if self.baseline_mean == 0:
return float("inf")
return self.delta / self.baseline_mean


def load_mean_estimate(path: Path) -> float:
with path.open("r", encoding="utf-8") as fh:
data = json.load(fh)
for key in ("mean", "Mean"):
section = data.get(key)
if isinstance(section, dict) and "point_estimate" in section:
return float(section["point_estimate"])
raise RuntimeError(f"Unexpected estimate structure in {path}")


def collect_results(target_dir: Path, baseline: str, current: str) -> List[BenchmarkResult]:
results: List[BenchmarkResult] = []
for estimate_path in target_dir.rglob("estimates.json"):
if estimate_path.parent.name != baseline:
continue
bench_root = estimate_path.parent.parent
current_path = bench_root / current / "estimates.json"
if not current_path.exists():
continue
baseline_mean = load_mean_estimate(estimate_path)
current_mean = load_mean_estimate(current_path)
rel_name = str(bench_root.relative_to(target_dir))
results.append(BenchmarkResult(rel_name, baseline_mean, current_mean))
return sorted(results, key=lambda result: result.name)


def format_ns(value: float) -> str:
return f"{value:,.2f}"


def render_table(results: Iterable[BenchmarkResult], threshold: float) -> str:
lines = [
"| Benchmark | Baseline mean (ns) | Current mean (ns) | Δ% | Status |",
"|-----------|--------------------|-------------------|----|--------|",
]
for result in results:
percent = result.percent_change * 100
status = "improved" if percent < 0 else "regressed" if percent > threshold * 100 else "unchanged"
lines.append(
"| {name} | {baseline} | {current} | {percent:+.2f}% | {status} |".format(
name=result.name,
baseline=format_ns(result.baseline_mean),
current=format_ns(result.current_mean),
percent=percent,
status=status,
)
)
return "\n".join(lines)


def main(argv: List[str]) -> int:
if len(argv) != 2:
print("Usage: criterion_compare.py <criterion-target-dir>", file=sys.stderr)
return 2

target_dir = Path(argv[1])
if not target_dir.exists():
print(f"Criterion directory '{target_dir}' does not exist", file=sys.stderr)
return 2

baseline = os.environ.get("CRITERION_BASELINE", "base")
current = os.environ.get("CRITERION_CURRENT", "new")
threshold = float(os.environ.get("CRITERION_REGRESSION_THRESHOLD", "0.05"))

results = collect_results(target_dir, baseline, current)
if not results:
print(
f"No benchmarks found for baseline '{baseline}' and comparison '{current}'.",
file=sys.stderr,
)
return 2

summary = render_table(results, threshold)
print(summary)

summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if summary_path:
with Path(summary_path).open("a", encoding="utf-8") as fh:
fh.write("\n### Criterion benchmark comparison\n\n")
fh.write(summary)
fh.write("\n")

regressions = [
result for result in results if result.percent_change > threshold
]
if regressions:
print("\nDetected performance regressions exceeding threshold:")
for result in regressions:
percent = result.percent_change * 100
print(
f"- {result.name}: {percent:+.2f}% (baseline {result.baseline_mean:.2f} ns -> {result.current_mean:.2f} ns)"
)
return 1

return 0


if __name__ == "__main__":
sys.exit(main(sys.argv))
Loading