From 6add9d9cfd1f6cb9409d545e1a1e2383d9bc24c8 Mon Sep 17 00:00:00 2001 From: Lucca Bertoncini Date: Sun, 15 Feb 2026 20:36:23 -0800 Subject: [PATCH 1/2] Switch benchmarks to comment-triggered invocation Benchmarks are expensive and shouldn't run on every PR push. Switch to comment-triggered invocation via `/run-nps-benchmark` and `/run-stockfish-benchmark` commands. A help comment is posted automatically when a PR is opened that touches engine code. --- .github/workflows/benchmark-help.yml | 35 ++++++++++ .github/workflows/benchmark.yml | 46 +++++-------- .github/workflows/nps-benchmark.yml | 99 ++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 30 deletions(-) create mode 100644 .github/workflows/benchmark-help.yml create mode 100644 .github/workflows/nps-benchmark.yml diff --git a/.github/workflows/benchmark-help.yml b/.github/workflows/benchmark-help.yml new file mode 100644 index 0000000..ec106f0 --- /dev/null +++ b/.github/workflows/benchmark-help.yml @@ -0,0 +1,35 @@ +name: Benchmark Help + +on: + pull_request: + types: [opened] + paths: + - 'moonfish/**' + - 'opening_book/**' + - 'scripts/**' + - 'pyproject.toml' + - 'requirements.txt' + +permissions: + pull-requests: write + +jobs: + comment: + runs-on: ubuntu-latest + steps: + - name: Post benchmark instructions + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr comment ${{ github.event.pull_request.number }} \ + --repo ${{ github.repository }} \ + --body '### Benchmarks + + The following benchmarks are available for this PR: + + | Command | Description | + |---------|-------------| + | `/run-nps-benchmark` | NPS speed benchmark (depth 5, 48 positions) | + | `/run-stockfish-benchmark` | Stockfish strength benchmark (300 games) | + + Post a comment with the command to trigger a benchmark run.' diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 30d54b8..eed94bb 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,14 +1,8 @@ name: Stockfish Benchmark on: - pull_request: - paths: - # Only run benchmarks when engine code changes - - 'moonfish/**' - - 'opening_book/**' - - 'scripts/**' - - 'pyproject.toml' - - 'requirements.txt' + issue_comment: + types: [created] permissions: contents: read @@ -19,20 +13,22 @@ env: MOONFISH_OPENING_BOOK: ${{ github.workspace }}/opening_book/cerebellum.bin jobs: - react-start: + react: runs-on: ubuntu-latest - if: github.event_name == 'pull_request' + if: >- + github.event.issue.pull_request && + contains(github.event.comment.body, '/run-stockfish-benchmark') steps: - - name: Add eyes reaction to PR + - name: React to comment env: GH_TOKEN: ${{ github.token }} run: | - gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \ - -f content='eyes' --silent || true + gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \ + -f content='rocket' --silent || true benchmark: runs-on: ubuntu-latest - needs: react-start + needs: react strategy: fail-fast: false matrix: @@ -47,6 +43,11 @@ jobs: lfs: false fetch-depth: 0 + - name: Checkout PR branch + env: + GH_TOKEN: ${{ github.token }} + run: gh pr checkout ${{ github.event.issue.number }} + - name: Ensure opening book run: | set -euo pipefail @@ -308,25 +309,10 @@ jobs: cat pr-comment.md >> $GITHUB_STEP_SUMMARY - name: Comment on PR - if: github.event_name == 'pull_request' - env: - GH_TOKEN: ${{ github.token }} - run: | - gh pr comment ${{ github.event.pull_request.number }} --body-file pr-comment.md - - - name: Update PR reaction (eyes -> thumbs up) - if: github.event_name == 'pull_request' env: GH_TOKEN: ${{ github.token }} run: | - # Remove eyes reaction - REACTIONS=$(gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions --jq '.[] | select(.content == "eyes") | .id' || true) - for ID in $REACTIONS; do - gh api -X DELETE repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions/$ID --silent || true - done - # Add thumbs up - gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \ - -f content='+1' --silent || true + gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md - name: Upload aggregated results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/nps-benchmark.yml b/.github/workflows/nps-benchmark.yml new file mode 100644 index 0000000..e4e8b2a --- /dev/null +++ b/.github/workflows/nps-benchmark.yml @@ -0,0 +1,99 @@ +name: NPS Benchmark + +on: + issue_comment: + types: [created] + +permissions: + contents: read + pull-requests: write + +env: + UV_SYSTEM_PYTHON: 1 + +jobs: + nps-benchmark: + runs-on: ubuntu-latest + if: >- + github.event.issue.pull_request && + contains(github.event.comment.body, '/run-nps-benchmark') + + steps: + - name: React to comment + env: + GH_TOKEN: ${{ github.token }} + run: | + gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \ + -f content='rocket' --silent || true + + - uses: actions/checkout@v4 + + - name: Checkout PR branch + env: + GH_TOKEN: ${{ github.token }} + run: gh pr checkout ${{ github.event.issue.number }} + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "requirements.txt" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: make install + + - name: Run NPS benchmark + run: | + python -m moonfish.main --mode bench --depth 5 2>&1 | tee bench-output.txt + + - name: Parse results and comment on PR + env: + GH_TOKEN: ${{ github.token }} + run: | + OUTPUT="bench-output.txt" + + TOTAL_TIME=$(grep "^Total time" "$OUTPUT" | awk '{print $NF}') + TOTAL_NODES=$(grep "^Nodes searched" "$OUTPUT" | awk '{print $NF}') + NPS=$(grep "^Nodes/second" "$OUTPUT" | awk '{print $NF}') + NUM_POSITIONS=$(grep -c "^Position" "$OUTPUT") + + # Format numbers with commas + TOTAL_NODES_FMT=$(printf "%'d" "$TOTAL_NODES") + NPS_FMT=$(printf "%'d" "$NPS") + + # Build per-position breakdown + PER_POS=$(grep "^Position" "$OUTPUT") + + cat > pr-comment.md << EOF + ## ⚡ NPS Benchmark Results + + | Metric | Value | + |--------|-------| + | Depth | 5 | + | Positions | $NUM_POSITIONS | + | Total nodes | $TOTAL_NODES_FMT | + | Total time | ${TOTAL_TIME}s | + | Nodes/second | $NPS_FMT | + + > **Node count is the primary signal** — it's deterministic and catches search behavior changes. If the node count changes, the PR changed search behavior. NPS is informational only (CI runner performance varies). + +
Per-position breakdown + + \`\`\` + $PER_POS + \`\`\` + +
+ EOF + + # Remove leading whitespace from heredoc + sed -i 's/^ //' pr-comment.md + + cat pr-comment.md >> $GITHUB_STEP_SUMMARY + + gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md From d548386a4113fa625edbf55aadb17fef9d5f31ce Mon Sep 17 00:00:00 2001 From: Lucca Bertoncini Date: Sun, 15 Feb 2026 20:42:33 -0800 Subject: [PATCH 2/2] Use eyes reaction on start and thumbs up on completion --- .github/workflows/benchmark.yml | 9 ++++++++- .github/workflows/nps-benchmark.yml | 6 +++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index eed94bb..a2c82d9 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -24,7 +24,7 @@ jobs: GH_TOKEN: ${{ github.token }} run: | gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \ - -f content='rocket' --silent || true + -f content='eyes' --silent || true benchmark: runs-on: ubuntu-latest @@ -314,6 +314,13 @@ jobs: run: | gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md + - name: React with thumbs up on completion + env: + GH_TOKEN: ${{ github.token }} + run: | + gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \ + -f content='+1' --silent || true + - name: Upload aggregated results uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/nps-benchmark.yml b/.github/workflows/nps-benchmark.yml index e4e8b2a..771a055 100644 --- a/.github/workflows/nps-benchmark.yml +++ b/.github/workflows/nps-benchmark.yml @@ -24,7 +24,7 @@ jobs: GH_TOKEN: ${{ github.token }} run: | gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \ - -f content='rocket' --silent || true + -f content='eyes' --silent || true - uses: actions/checkout@v4 @@ -97,3 +97,7 @@ jobs: cat pr-comment.md >> $GITHUB_STEP_SUMMARY gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md + + # Add thumbs up reaction to signal completion + gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \ + -f content='+1' --silent || true