From 6add9d9cfd1f6cb9409d545e1a1e2383d9bc24c8 Mon Sep 17 00:00:00 2001
From: Lucca Bertoncini <luccabazzo@gmail.com>
Date: Sun, 15 Feb 2026 20:36:23 -0800
Subject: [PATCH 1/2] Switch benchmarks to comment-triggered invocation

Benchmarks are expensive and shouldn't run on every PR push. Switch to
comment-triggered invocation via `/run-nps-benchmark` and
`/run-stockfish-benchmark` commands. A help comment is posted
automatically when a PR is opened that touches engine code.
---
 .github/workflows/benchmark-help.yml | 35 ++++++++++
 .github/workflows/benchmark.yml      | 46 +++++--------
 .github/workflows/nps-benchmark.yml  | 99 ++++++++++++++++++++++++++++
 3 files changed, 150 insertions(+), 30 deletions(-)
 create mode 100644 .github/workflows/benchmark-help.yml
 create mode 100644 .github/workflows/nps-benchmark.yml

diff --git a/.github/workflows/benchmark-help.yml b/.github/workflows/benchmark-help.yml
new file mode 100644
index 0000000..ec106f0
--- /dev/null
+++ b/.github/workflows/benchmark-help.yml
@@ -0,0 +1,35 @@
+name: Benchmark Help
+
+on:
+  pull_request:
+    types: [opened]
+    paths:
+      - 'moonfish/**'
+      - 'opening_book/**'
+      - 'scripts/**'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+
+permissions:
+  pull-requests: write
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Post benchmark instructions
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        gh pr comment ${{ github.event.pull_request.number }} \
+          --repo ${{ github.repository }} \
+          --body '### Benchmarks
+
+        The following benchmarks are available for this PR:
+
+        | Command | Description |
+        |---------|-------------|
+        | `/run-nps-benchmark` | NPS speed benchmark (depth 5, 48 positions) |
+        | `/run-stockfish-benchmark` | Stockfish strength benchmark (300 games) |
+
+        Post a comment with the command to trigger a benchmark run.'
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 30d54b8..eed94bb 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -1,14 +1,8 @@
 name: Stockfish Benchmark
 
 on:
-  pull_request:
-    paths:
-      # Only run benchmarks when engine code changes
-      - 'moonfish/**'
-      - 'opening_book/**'
-      - 'scripts/**'
-      - 'pyproject.toml'
-      - 'requirements.txt'
+  issue_comment:
+    types: [created]
 
 permissions:
   contents: read
@@ -19,20 +13,22 @@ env:
   MOONFISH_OPENING_BOOK: ${{ github.workspace }}/opening_book/cerebellum.bin
 
 jobs:
-  react-start:
+  react:
     runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request'
+    if: >-
+      github.event.issue.pull_request &&
+      contains(github.event.comment.body, '/run-stockfish-benchmark')
     steps:
-    - name: Add eyes reaction to PR
+    - name: React to comment
       env:
         GH_TOKEN: ${{ github.token }}
       run: |
-        gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \
-          -f content='eyes' --silent || true
+        gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
+          -f content='rocket' --silent || true
 
   benchmark:
     runs-on: ubuntu-latest
-    needs: react-start
+    needs: react
     strategy:
       fail-fast: false
       matrix:
@@ -47,6 +43,11 @@ jobs:
         lfs: false
         fetch-depth: 0
 
+    - name: Checkout PR branch
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: gh pr checkout ${{ github.event.issue.number }}
+
     - name: Ensure opening book
       run: |
         set -euo pipefail
@@ -308,25 +309,10 @@ jobs:
         cat pr-comment.md >> $GITHUB_STEP_SUMMARY
 
     - name: Comment on PR
-      if: github.event_name == 'pull_request'
-      env:
-        GH_TOKEN: ${{ github.token }}
-      run: |
-        gh pr comment ${{ github.event.pull_request.number }} --body-file pr-comment.md
-
-    - name: Update PR reaction (eyes -> thumbs up)
-      if: github.event_name == 'pull_request'
       env:
         GH_TOKEN: ${{ github.token }}
       run: |
-        # Remove eyes reaction
-        REACTIONS=$(gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions --jq '.[] | select(.content == "eyes") | .id' || true)
-        for ID in $REACTIONS; do
-          gh api -X DELETE repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions/$ID --silent || true
-        done
-        # Add thumbs up
-        gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \
-          -f content='+1' --silent || true
+        gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md
 
     - name: Upload aggregated results
       uses: actions/upload-artifact@v4
diff --git a/.github/workflows/nps-benchmark.yml b/.github/workflows/nps-benchmark.yml
new file mode 100644
index 0000000..e4e8b2a
--- /dev/null
+++ b/.github/workflows/nps-benchmark.yml
@@ -0,0 +1,99 @@
+name: NPS Benchmark
+
+on:
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+env:
+  UV_SYSTEM_PYTHON: 1
+
+jobs:
+  nps-benchmark:
+    runs-on: ubuntu-latest
+    if: >-
+      github.event.issue.pull_request &&
+      contains(github.event.comment.body, '/run-nps-benchmark')
+
+    steps:
+    - name: React to comment
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
+          -f content='rocket' --silent || true
+
+    - uses: actions/checkout@v4
+
+    - name: Checkout PR branch
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: gh pr checkout ${{ github.event.issue.number }}
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v5
+      with:
+        enable-cache: true
+        cache-dependency-glob: "requirements.txt"
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: make install
+
+    - name: Run NPS benchmark
+      run: |
+        python -m moonfish.main --mode bench --depth 5 2>&1 | tee bench-output.txt
+
+    - name: Parse results and comment on PR
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        OUTPUT="bench-output.txt"
+
+        TOTAL_TIME=$(grep "^Total time" "$OUTPUT" | awk '{print $NF}')
+        TOTAL_NODES=$(grep "^Nodes searched" "$OUTPUT" | awk '{print $NF}')
+        NPS=$(grep "^Nodes/second" "$OUTPUT" | awk '{print $NF}')
+        NUM_POSITIONS=$(grep -c "^Position" "$OUTPUT")
+
+        # Format numbers with commas
+        TOTAL_NODES_FMT=$(printf "%'d" "$TOTAL_NODES")
+        NPS_FMT=$(printf "%'d" "$NPS")
+
+        # Build per-position breakdown
+        PER_POS=$(grep "^Position" "$OUTPUT")
+
+        cat > pr-comment.md << EOF
+        ## ⚡ NPS Benchmark Results
+
+        | Metric | Value |
+        |--------|-------|
+        | Depth | 5 |
+        | Positions | $NUM_POSITIONS |
+        | Total nodes | $TOTAL_NODES_FMT |
+        | Total time | ${TOTAL_TIME}s |
+        | Nodes/second | $NPS_FMT |
+
+        > **Node count is the primary signal** — it's deterministic and catches search behavior changes. If the node count changes, the PR changed search behavior. NPS is informational only (CI runner performance varies).
+
+        <details><summary>Per-position breakdown</summary>
+
+        \`\`\`
+        $PER_POS
+        \`\`\`
+
+        </details>
+        EOF
+
+        # Remove leading whitespace from heredoc
+        sed -i 's/^        //' pr-comment.md
+
+        cat pr-comment.md >> $GITHUB_STEP_SUMMARY
+
+        gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md

From d548386a4113fa625edbf55aadb17fef9d5f31ce Mon Sep 17 00:00:00 2001
From: Lucca Bertoncini <luccabazzo@gmail.com>
Date: Sun, 15 Feb 2026 20:42:33 -0800
Subject: [PATCH 2/2] Use eyes reaction on start and thumbs up on completion

---
 .github/workflows/benchmark.yml     | 9 ++++++++-
 .github/workflows/nps-benchmark.yml | 6 +++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index eed94bb..a2c82d9 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -24,7 +24,7 @@ jobs:
         GH_TOKEN: ${{ github.token }}
       run: |
         gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
-          -f content='rocket' --silent || true
+          -f content='eyes' --silent || true
 
   benchmark:
     runs-on: ubuntu-latest
@@ -314,6 +314,13 @@ jobs:
       run: |
         gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md
 
+    - name: React with thumbs up on completion
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
+          -f content='+1' --silent || true
+
     - name: Upload aggregated results
       uses: actions/upload-artifact@v4
       with:
diff --git a/.github/workflows/nps-benchmark.yml b/.github/workflows/nps-benchmark.yml
index e4e8b2a..771a055 100644
--- a/.github/workflows/nps-benchmark.yml
+++ b/.github/workflows/nps-benchmark.yml
@@ -24,7 +24,7 @@ jobs:
         GH_TOKEN: ${{ github.token }}
       run: |
         gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
-          -f content='rocket' --silent || true
+          -f content='eyes' --silent || true
 
     - uses: actions/checkout@v4
 
@@ -97,3 +97,7 @@ jobs:
         cat pr-comment.md >> $GITHUB_STEP_SUMMARY
 
         gh pr comment ${{ github.event.issue.number }} --body-file pr-comment.md
+
+        # Add thumbs up reaction to signal completion
+        gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
+          -f content='+1' --silent || true