ci: add conformance + operator benchmark job with runner summary

teamchong · teamchong · commit 93e7362669fa · 2026-03-05T13:12:24.000-05:00
- Add Vitest bench file comparing QueryMode (Miniflare full DO stack)
  vs DuckDB (native Node FFI) across 7 query patterns
- Add `conformance` CI job: runs operator conformance tests, then
  head-to-head benchmarks with wrangler dev
- Post benchmark results to GitHub Actions job summary for both
  the operator and E2E benchmark jobs
- Add `bench:operators` package.json script
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,6 +32,61 @@ jobs:
       - name: Unit tests
         run: pnpm test
 
+  conformance:
+    runs-on: ubuntu-latest
+    needs: test
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - run: pnpm install
+
+      - name: Install Zig
+        uses: mlugg/setup-zig@v2
+        with:
+          version: 0.15.2
+
+      - name: Build (WASM + TypeScript)
+        run: pnpm build
+
+      - name: Conformance tests
+        run: npx vitest run src/operators-conformance.test.ts
+
+      - name: Generate benchmark data
+        run: npx tsx scripts/generate-bench-data.ts
+
+      - name: Start wrangler dev
+        run: |
+          pnpm dev &
+          for i in $(seq 1 30); do
+            if curl -sf http://localhost:8787/health > /dev/null 2>&1; then
+              echo "Worker ready"
+              break
+            fi
+            sleep 1
+          done
+
+      - name: Seed local R2
+        run: npx tsx scripts/seed-local-r2.ts
+
+      - name: Operator benchmarks (QueryMode vs DuckDB)
+        run: npx vitest bench src/bench-vs-duckdb.bench.ts 2>&1 | tee /tmp/bench-output.txt
+
+      - name: Post benchmark results to summary
+        if: always()
+        run: |
+          echo "## Operator Benchmarks — QueryMode (Miniflare) vs DuckDB (native)" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          cat /tmp/bench-output.txt >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
   bench:
     runs-on: ubuntu-latest
     needs: test
@@ -73,4 +128,13 @@ jobs:
         run: npx tsx scripts/seed-local-r2.ts
 
       - name: Run benchmarks
-        run: npx tsx scripts/bench.ts
+        run: npx tsx scripts/bench.ts 2>&1 | tee /tmp/bench-output.txt
+
+      - name: Post benchmark results to summary
+        if: always()
+        run: |
+          echo "## E2E Benchmarks — Full DO Stack" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          cat /tmp/bench-output.txt >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
diff --git a/package.json b/package.json
@@ -26,11 +26,13 @@
     "deploy": "pnpm run build && wrangler deploy",
     "wasm": "pnpm run build:wasm",
     "bench:seed": "npx tsx scripts/seed-local-r2.ts",
-    "bench": "npx tsx scripts/bench.ts"
+    "bench": "npx tsx scripts/bench.ts",
+    "bench:operators": "vitest bench src/bench-vs-duckdb.bench.ts"
   },
   "devDependencies": {
     "@cloudflare/workers-types": "^4.20250224.0",
     "@types/node": "^25.3.3",
+    "duckdb": "^1.4.4",
     "typescript": "^5.7.0",
     "vitest": "^3.0.0",
     "wrangler": "^4.0.0"
@@ -43,5 +45,8 @@
   "packageManager": "pnpm@10.21.0",
   "dependencies": {
     "zod": "^4.3.6"
+  },
+  "pnpm": {
+    "onlyBuiltDependencies": ["duckdb"]
   }
 }
diff --git a/src/bench-vs-duckdb.bench.ts b/src/bench-vs-duckdb.bench.ts
@@ -0,0 +1,240 @@
+/**
+ * Head-to-head benchmarks: QueryMode (Miniflare, full DO stack) vs DuckDB (native Node).
+ *
+ * QueryMode runs on the real CF Worker runtime via wrangler dev:
+ *   HTTP → Worker → Query DO → R2 → WASM decode → operators → response
+ *
+ * DuckDB runs natively in Node.js — no serialization, no HTTP, no Worker overhead.
+ *
+ * Prerequisites:
+ *   1. `pnpm dev` running on localhost:8787
+ *   2. `npx tsx scripts/generate-bench-data.ts`
+ *   3. `npx tsx scripts/seed-local-r2.ts`
+ *
+ * Usage: pnpm bench:operators
+ */
+
+import { describe, bench, beforeAll, afterAll } from "vitest";
+import duckdb from "duckdb";
+
+// ---------------------------------------------------------------------------
+// QueryMode (Miniflare) helpers
+// ---------------------------------------------------------------------------
+
+const BASE_URL = process.env.WORKER_URL ?? "http://localhost:8787";
+
+async function qmQuery(body: unknown): Promise<Record<string, unknown>> {
+  const resp = await fetch(`${BASE_URL}/query`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  if (!resp.ok) throw new Error(`QueryMode ${resp.status}: ${(await resp.text()).slice(0, 200)}`);
+  return resp.json() as Promise<Record<string, unknown>>;
+}
+
+// ---------------------------------------------------------------------------
+// DuckDB helpers
+// ---------------------------------------------------------------------------
+
+let db: duckdb.Database;
+let con: duckdb.Connection;
+
+function duckRun(sql: string): Promise<void> {
+  return new Promise((resolve, reject) => {
+    con.run(sql, (err: Error | null) => {
+      if (err) reject(err);
+      else resolve();
+    });
+  });
+}
+
+function duckQuery(sql: string): Promise<Record<string, unknown>[]> {
+  return new Promise((resolve, reject) => {
+    con.all(sql, (err: Error | null, rows: Record<string, unknown>[]) => {
+      if (err) reject(err);
+      else resolve(rows);
+    });
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Setup: DuckDB in-memory tables matching the seeded R2 Parquet data
+// ---------------------------------------------------------------------------
+
+beforeAll(async () => {
+  // Verify worker is reachable
+  const health = await fetch(`${BASE_URL}/health`);
+  if (!health.ok) throw new Error(`Worker not reachable at ${BASE_URL}. Is 'pnpm dev' running?`);
+
+  // Warm up Query DO registration
+  await fetch(`${BASE_URL}/tables`);
+
+  db = new duckdb.Database(":memory:");
+  con = new duckdb.Connection(db);
+
+  // bench_1m_numeric: id (BIGINT), value (DOUBLE) — 1M rows, deterministic
+  // Matches generate-bench-data.ts: ids 0..999999, values random * 100000
+  // For fair comparison, use the same deterministic pattern
+  await duckRun(`CREATE TABLE bench_1m AS SELECT i AS id, (i * 7 + 13) % 10000 AS value FROM generate_series(0, 999999) t(i)`);
+
+  // bench_100k_3col: id (BIGINT), value (DOUBLE), category (VARCHAR) — 100K rows
+  const cats = ["alpha", "beta", "gamma", "delta", "epsilon"];
+  await duckRun(`
+    CREATE TABLE bench_100k AS
+    SELECT i AS id,
+           (i * 7 + 13) % 1000 AS value,
+           CASE i % 5
+             WHEN 0 THEN '${cats[0]}'
+             WHEN 1 THEN '${cats[1]}'
+             WHEN 2 THEN '${cats[2]}'
+             WHEN 3 THEN '${cats[3]}'
+             ELSE '${cats[4]}'
+           END AS category
+    FROM generate_series(0, 99999) t(i)
+  `);
+}, 60_000);
+
+afterAll(() => {
+  con?.close();
+  db?.close();
+});
+
+// ===================================================================
+// 1. Full scan — 1M rows, 2 numeric columns
+// ===================================================================
+
+describe("Full scan 1M×2col numeric", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_1m_numeric",
+      filters: [],
+      projections: [],
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(`SELECT * FROM bench_1m`);
+  }, { time: 30_000, warmupIterations: 2 });
+});
+
+// ===================================================================
+// 2. Filter scan — 1M rows, filter id > 900000 (~10% selectivity)
+// ===================================================================
+
+describe("Filter scan 1M id>900000", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_1m_numeric",
+      filters: [{ column: "id", op: "gt", value: 900000 }],
+      projections: [],
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(`SELECT * FROM bench_1m WHERE id > 900000`);
+  }, { time: 30_000, warmupIterations: 2 });
+});
+
+// ===================================================================
+// 3. Aggregate SUM — 1M rows
+// ===================================================================
+
+describe("Aggregate SUM 1M", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_1m_numeric",
+      filters: [],
+      projections: [],
+      aggregates: [{ fn: "sum", column: "value", alias: "total" }],
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(`SELECT SUM(value) as total FROM bench_1m`);
+  }, { time: 30_000, warmupIterations: 2 });
+});
+
+// ===================================================================
+// 4. Aggregate group by category — 100K×3col
+// ===================================================================
+
+describe("Aggregate group by category 100K", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_100k_3col",
+      filters: [],
+      projections: [],
+      aggregates: [
+        { fn: "sum", column: "value", alias: "sum_value" },
+        { fn: "count", column: "id", alias: "cnt" },
+      ],
+      groupBy: ["category"],
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(
+      `SELECT category, SUM(value) as sum_value, COUNT(id) as cnt
+       FROM bench_100k GROUP BY category`,
+    );
+  }, { time: 30_000, warmupIterations: 2 });
+});
+
+// ===================================================================
+// 5. Sort + Limit (TopK) — 1M rows, top 100
+// ===================================================================
+
+describe("TopK 100 from 1M", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_1m_numeric",
+      filters: [],
+      projections: [],
+      sortColumn: "value",
+      sortDirection: "desc",
+      limit: 100,
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(`SELECT * FROM bench_1m ORDER BY value DESC LIMIT 100`);
+  }, { time: 30_000, warmupIterations: 2 });
+});
+
+// ===================================================================
+// 6. Column projection — 100K, select 1 of 3 columns
+// ===================================================================
+
+describe("Projection 100K select 1 col", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_100k_3col",
+      filters: [],
+      projections: ["id"],
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(`SELECT id FROM bench_100k`);
+  }, { time: 30_000, warmupIterations: 2 });
+});
+
+// ===================================================================
+// 7. Filter + Aggregate — 100K, filter + count
+// ===================================================================
+
+describe("Filter + Count 100K id>50000", () => {
+  bench("QueryMode (Miniflare)", async () => {
+    await qmQuery({
+      table: "bench_100k_3col",
+      filters: [{ column: "id", op: "gt", value: 50000 }],
+      projections: [],
+      aggregates: [{ fn: "count", column: "id", alias: "cnt" }],
+    });
+  }, { time: 30_000, warmupIterations: 2 });
+
+  bench("DuckDB (native)", async () => {
+    await duckQuery(`SELECT COUNT(id) as cnt FROM bench_100k WHERE id > 50000`);
+  }, { time: 30_000, warmupIterations: 2 });
+});