From 010352b0f63f19a7e33640e76c8fcf6751d67d8b Mon Sep 17 00:00:00 2001 From: Lucca Bertoncini Date: Sat, 14 Feb 2026 23:19:26 -0800 Subject: [PATCH 1/3] Add NPS benchmark for search speed regression testing Add a node counter to AlphaBeta and a bench mode that searches 48 positions from Stockfish's bench suite, reporting per-position and total nodes, time, and NPS. Node count is deterministic and serves as the primary signal for detecting search behavior changes. Includes a CI workflow that runs on PRs and posts results as a comment. --- .github/workflows/nps-benchmark.yml | 89 ++++++++++++++++++++ moonfish/bench.py | 123 ++++++++++++++++++++++++++++ moonfish/engines/alpha_beta.py | 7 ++ moonfish/main.py | 5 +- 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/nps-benchmark.yml create mode 100644 moonfish/bench.py diff --git a/.github/workflows/nps-benchmark.yml b/.github/workflows/nps-benchmark.yml new file mode 100644 index 0000000..3959bd7 --- /dev/null +++ b/.github/workflows/nps-benchmark.yml @@ -0,0 +1,89 @@ +name: NPS Benchmark + +on: + pull_request: + paths: + # Only run benchmarks when engine code changes + - 'moonfish/**' + - 'pyproject.toml' + - 'requirements.txt' + +permissions: + contents: read + pull-requests: write + +env: + UV_SYSTEM_PYTHON: 1 + +jobs: + nps-benchmark: + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "requirements.txt" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: make install + + - name: Run NPS benchmark + run: | + python -m moonfish.main --mode bench --depth 5 2>&1 | tee bench-output.txt + + - name: Parse results and comment on PR + env: + GH_TOKEN: ${{ github.token }} + run: | + OUTPUT="bench-output.txt" + + TOTAL_TIME=$(grep "^Total time" "$OUTPUT" | awk '{print $NF}') + TOTAL_NODES=$(grep "^Nodes searched" "$OUTPUT" | awk '{print $NF}') + NPS=$(grep "^Nodes/second" "$OUTPUT" | awk '{print $NF}') + NUM_POSITIONS=$(grep -c "^Position" "$OUTPUT") + + # Format numbers with commas + TOTAL_NODES_FMT=$(printf "%'d" "$TOTAL_NODES") + NPS_FMT=$(printf "%'d" "$NPS") + + # Build per-position breakdown + PER_POS=$(grep "^Position" "$OUTPUT") + + cat > pr-comment.md << EOF + ## ⚡ NPS Benchmark Results + + | Metric | Value | + |--------|-------| + | Depth | 5 | + | Positions | $NUM_POSITIONS | + | Total nodes | $TOTAL_NODES_FMT | + | Total time | ${TOTAL_TIME}s | + | Nodes/second | $NPS_FMT | + + > **Node count is the primary signal** — it's deterministic and catches search behavior changes. If the node count changes, the PR changed search behavior. NPS is informational only (CI runner performance varies). + +
Per-position breakdown + + \`\`\` + $PER_POS + \`\`\` + +
+ EOF + + # Remove leading whitespace from heredoc + sed -i 's/^ //' pr-comment.md + + cat pr-comment.md >> $GITHUB_STEP_SUMMARY + + gh pr comment ${{ github.event.pull_request.number }} --body-file pr-comment.md diff --git a/moonfish/bench.py b/moonfish/bench.py new file mode 100644 index 0000000..8240a2d --- /dev/null +++ b/moonfish/bench.py @@ -0,0 +1,123 @@ +import random +import time + +from chess import Board, Move + +from moonfish.config import Config +from moonfish.engines.alpha_beta import AlphaBeta + +# 48 positions from Stockfish's bench command (excluding 2 Chess960 positions). +# Some entries include "moves ..." suffixes that are applied before searching. +BENCH_POSITIONS = [ + "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", + "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10", + "8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - - 0 11", + "4rrk1/pp1n3p/3q2pQ/2p1pb2/2PP4/2P3N1/P2B2PP/4RRK1 b - - 7 19", + "rq3rk1/ppp2ppp/1bnpb3/3N2B1/3NP3/7P/PPPQ1PP1/2KR3R w - - 7 14 moves d4e6", + "r1bq1r1k/1pp1n1pp/1p1p4/4p2Q/4Pp2/1BNP4/PPP2PPP/3R1RK1 w - - 2 14 moves g2g4", + "r3r1k1/2p2ppp/p1p1bn2/8/1q2P3/2NPQN2/PPP3PP/R4RK1 b - - 2 15", + "r1bbk1nr/pp3p1p/2n5/1N4p1/2Np1B2/8/PPP2PPP/2KR1B1R w kq - 0 13", + "r1bq1rk1/ppp1nppp/4n3/3p3Q/3P4/1BP1B3/PP1N2PP/R4RK1 w - - 1 16", + "4r1k1/r1q2ppp/ppp2n2/4P3/5Rb1/1N1BQ3/PPP3PP/R5K1 w - - 1 17", + "2rqkb1r/ppp2p2/2npb1p1/1N1Nn2p/2P1PP2/8/PP2B1PP/R1BQK2R b KQ - 0 11", + "r1bq1r1k/b1p1npp1/p2p3p/1p6/3PP3/1B2NN2/PP3PPP/R2Q1RK1 w - - 1 16", + "3r1rk1/p5pp/bpp1pp2/8/q1PP1P2/b3P3/P2NQRPP/1R2B1K1 b - - 6 22", + "r1q2rk1/2p1bppp/2Pp4/p6b/Q1PNp3/4B3/PP1R1PPP/2K4R w - - 2 18", + "4k2r/1pb2ppp/1p2p3/1R1p4/3P4/2r1PN2/P4PPP/1R4K1 b - - 3 22", + "3q2k1/pb3p1p/4pbp1/2r5/PpN2N2/1P2P2P/5PP1/Q2R2K1 b - - 4 26", + "6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1", + "3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1", + "2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3", + "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", + "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", + "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", + "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", + "8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1", + "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", + "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", + "6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1", + "1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1", + "6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1", + "8/3p3B/5p2/5P2/p7/PP5b/k7/6K1 w - - 0 1", + "5rk1/q6p/2p3bR/1pPp1rP1/1P1Pp3/P3B1Q1/1K3P2/R7 w - - 93 90", + "4rrk1/1p1nq3/p7/2p1P1pp/3P2bp/3Q1Bn1/PPPB4/1K2R1NR w - - 40 21", + "r3k2r/3nnpbp/q2pp1p1/p7/Pp1PPPP1/4BNN1/1P5P/R2Q1RK1 w kq - 0 16", + "3Qb1k1/1r2ppb1/pN1n2q1/Pp1Pp1Pr/4P2p/4BP2/4B1R1/1R5K b - - 11 40", + "4k3/3q1r2/1N2r1b1/3ppN2/2nPP3/1B1R2n1/2R1Q3/3K4 w - - 5 1", + # Positions with high numbers of changed threats + "k7/2n1n3/1nbNbn2/2NbRBn1/1nbRQR2/2NBRBN1/3N1N2/7K w - - 0 1", + "K7/8/8/BNQNQNB1/N5N1/R1Q1q2r/n5n1/bnqnqnbk w - - 0 1", + # 5-man positions + "8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", + "8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", + "8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", + # 6-man positions + "8/8/1P6/5pr1/8/4R3/7k/2K5 w - - 0 1", + "8/2p4P/8/kr6/6R1/8/8/1K6 w - - 0 1", + "8/8/3P3k/8/1p6/8/1P6/1K3n2 b - - 0 1", + # 7-man positions + "8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", + # Mate and stalemate positions + "6k1/3b3r/1p1p4/p1n2p2/1PPNpP1q/P3Q1p1/1R1RB1P1/5K2 b - - 0 1", + "r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1", + "8/8/8/8/8/6k1/6p1/6K1 w - -", + "7k/7P/6K1/8/3B4/8/8/8 b - -", +] + + +def _make_board(position: str) -> Board: + """Parse a position string into a Board, applying any trailing moves.""" + parts = position.split(" moves ") + board = Board(parts[0]) + if len(parts) > 1: + for uci in parts[1].split(): + board.push(Move.from_uci(uci)) + return board + + +def run_bench(depth: int) -> None: + """Run the benchmark: search all positions at the given depth and report NPS.""" + # Seed RNG for deterministic node counts (move ordering uses random.shuffle) + random.seed(0) + config = Config( + mode="bench", + algorithm="alpha_beta", + negamax_depth=depth, + null_move=True, + null_move_r=2, + quiescence_search_depth=3, + syzygy_path=None, + syzygy_pieces=5, + ) + engine = AlphaBeta(config) + + total_nodes = 0 + total_time = 0.0 + n = len(BENCH_POSITIONS) + + for i, position in enumerate(BENCH_POSITIONS, 1): + board = _make_board(position) + config.negamax_depth = depth + + # Skip terminal positions (checkmate/stalemate) — no moves to search + if not list(board.legal_moves): + print(f"Position {i:>2}/{n}: nodes=0 time=0.00s nps=0 (terminal)") + continue + + start = time.perf_counter() + engine.search_move(board) + elapsed = time.perf_counter() - start + + nodes = engine.nodes + nps = int(nodes / elapsed) if elapsed > 0 else 0 + total_nodes += nodes + total_time += elapsed + + print(f"Position {i:>2}/{n}: nodes={nodes:<10} time={elapsed:.2f}s nps={nps}") + + total_nps = int(total_nodes / total_time) if total_time > 0 else 0 + + print("===========================") + print(f"Total time (s) : {total_time:.2f}") + print(f"Nodes searched : {total_nodes}") + print(f"Nodes/second : {total_nps}") diff --git a/moonfish/engines/alpha_beta.py b/moonfish/engines/alpha_beta.py index 5244aed..dd7ab06 100644 --- a/moonfish/engines/alpha_beta.py +++ b/moonfish/engines/alpha_beta.py @@ -21,6 +21,7 @@ class AlphaBeta: def __init__(self, config: Config): self.config = config + self.nodes: int = 0 # Open Syzygy tablebase once at initialization (not on every eval) self.tablebase = None @@ -93,6 +94,8 @@ def quiescence_search( """ in_check = board.is_check() + self.nodes += 1 + if board.is_checkmate(): return -self.config.checkmate_score @@ -202,6 +205,9 @@ def negamax( - best_score, best_move: returns best move that it found and its value. """ cache_key = (board.fen(), depth, null_move, alpha, beta) + + self.nodes += 1 + # check if board was already evaluated if cache_key in cache: return cache[cache_key] @@ -302,6 +308,7 @@ def negamax( return best_score, best_move def search_move(self, board: Board) -> Move: + self.nodes = 0 # create shared cache cache: CACHE_KEY = {} diff --git a/moonfish/main.py b/moonfish/main.py index 1b85759..3f028de 100644 --- a/moonfish/main.py +++ b/moonfish/main.py @@ -6,6 +6,7 @@ from moonfish.config import Config from moonfish.mode.api import main as api_main from moonfish.mode.uci import main as uci_main +from moonfish.bench import run_bench def run(config: Config): @@ -13,13 +14,15 @@ def run(config: Config): uci_main(config) elif config.mode == "api": api_main() + elif config.mode == "bench": + run_bench(depth=5) else: raise ValueError("mode not supported, type --help to see supported modes.") @click.command() @click.option( - "--mode", type=str, help="Mode to run the engine, one of [uci, api].", default="uci" + "--mode", type=str, help="Mode to run the engine, one of [uci, api, bench].", default="uci" ) @click.option( "--algorithm", From c5bbecdeca709c6680bfb870b4daf38feca758dd Mon Sep 17 00:00:00 2001 From: Lucca Bertoncini Date: Sat, 14 Feb 2026 23:21:51 -0800 Subject: [PATCH 2/3] Add source link for Stockfish bench positions --- moonfish/bench.py | 1 + 1 file changed, 1 insertion(+) diff --git a/moonfish/bench.py b/moonfish/bench.py index 8240a2d..e529084 100644 --- a/moonfish/bench.py +++ b/moonfish/bench.py @@ -7,6 +7,7 @@ from moonfish.engines.alpha_beta import AlphaBeta # 48 positions from Stockfish's bench command (excluding 2 Chess960 positions). +# Source: https://github.com/official-stockfish/Stockfish/blob/master/src/benchmark.cpp # Some entries include "moves ..." suffixes that are applied before searching. BENCH_POSITIONS = [ "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", From 40ad1d71162d0e50b64a7cd063af9727d51a8e2d Mon Sep 17 00:00:00 2001 From: Lucca Bertoncini Date: Sat, 14 Feb 2026 23:50:29 -0800 Subject: [PATCH 3/3] Fix formatting and import sorting --- moonfish/bench.py | 4 +++- moonfish/main.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/moonfish/bench.py b/moonfish/bench.py index e529084..135c567 100644 --- a/moonfish/bench.py +++ b/moonfish/bench.py @@ -102,7 +102,9 @@ def run_bench(depth: int) -> None: # Skip terminal positions (checkmate/stalemate) — no moves to search if not list(board.legal_moves): - print(f"Position {i:>2}/{n}: nodes=0 time=0.00s nps=0 (terminal)") + print( + f"Position {i:>2}/{n}: nodes=0 time=0.00s nps=0 (terminal)" + ) continue start = time.perf_counter() diff --git a/moonfish/main.py b/moonfish/main.py index 3f028de..39f5466 100644 --- a/moonfish/main.py +++ b/moonfish/main.py @@ -3,10 +3,10 @@ import click +from moonfish.bench import run_bench from moonfish.config import Config from moonfish.mode.api import main as api_main from moonfish.mode.uci import main as uci_main -from moonfish.bench import run_bench def run(config: Config): @@ -22,7 +22,10 @@ def run(config: Config): @click.command() @click.option( - "--mode", type=str, help="Mode to run the engine, one of [uci, api, bench].", default="uci" + "--mode", + type=str, + help="Mode to run the engine, one of [uci, api, bench].", + default="uci", ) @click.option( "--algorithm",