Skip to content

Commit 19e09c6

Browse files
committed
feat: add local micro-benchmark and quickstart example
- scripts/bench-local.ts: bitmap decode, int64/float64 decode, coalesce, canSkipPage, and in-memory query pipeline benchmarks - examples/local-quickstart.ts: minimal LocalExecutor usage example - package.json: add bench:local script
1 parent 0fa42bb commit 19e09c6

File tree

3 files changed

+335
-1
lines changed

3 files changed

+335
-1
lines changed

examples/local-quickstart.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* QueryMode local quickstart — run with: npx tsx examples/local-quickstart.ts
3+
*
4+
* Reads a Lance or Parquet file from disk, applies filter + sort + limit,
5+
* and prints the result. Replace the path with your own data file.
6+
*/
7+
import { LocalExecutor } from "../src/local-executor.js";
8+
import { DataFrame } from "../src/client.js";
9+
10+
const executor = new LocalExecutor();
11+
const TABLE = process.argv[2] ?? "./data/events.parquet";
12+
13+
const df = new DataFrame(TABLE, executor);
14+
15+
const result = await df
16+
.filter("amount", "gt", 100)
17+
.whereNotNull("region")
18+
.select("id", "amount", "region")
19+
.sort("amount", "desc")
20+
.limit(10)
21+
.collect();
22+
23+
console.log(`${result.rowCount} rows, ${result.pagesSkipped} pages skipped`);
24+
console.table(result.rows);

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
"wasm": "pnpm run build:wasm",
2828
"bench:seed": "npx tsx scripts/seed-local-r2.ts",
2929
"bench": "npx tsx scripts/bench.ts",
30-
"bench:operators": "vitest bench src/bench-vs-duckdb.bench.ts"
30+
"bench:operators": "vitest bench src/bench-vs-duckdb.bench.ts",
31+
"bench:local": "npx tsx scripts/bench-local.ts"
3132
},
3233
"devDependencies": {
3334
"@cloudflare/workers-types": "^4.20250224.0",

scripts/bench-local.ts

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
#!/usr/bin/env npx tsx
2+
/**
3+
* Local micro-benchmark — measures core code paths without wrangler dev.
4+
*
5+
* Tests the specific optimizations:
6+
* 1. Null bitmap decode (fast path for 0xFF/0x00 bytes)
7+
* 2. Coalesce range merging (autoCoalesceGap + coalesceRanges)
8+
* 3. canSkipPage (page-level filter pushdown)
9+
* 4. Operator pipeline (ScanOperator → FilterOperator → TopK)
10+
* 5. In-memory query via MaterializedExecutor
11+
*
12+
* Usage: npx tsx scripts/bench-local.ts
13+
*/
14+
15+
import { decodePage, canSkipPage } from "../src/decode.js";
16+
import { coalesceRanges, autoCoalesceGap, type Range } from "../src/coalesce.js";
17+
import { QueryMode } from "../src/local.js";
18+
19+
// ---------------------------------------------------------------------------
20+
// Helpers
21+
// ---------------------------------------------------------------------------
22+
23+
function timeIt(name: string, fn: () => void, iterations = 1000): { name: string; totalMs: number; opsPerSec: number; avgUs: number } {
24+
// Warmup
25+
for (let i = 0; i < Math.min(50, iterations); i++) fn();
26+
27+
const start = performance.now();
28+
for (let i = 0; i < iterations; i++) fn();
29+
const totalMs = performance.now() - start;
30+
const avgUs = (totalMs / iterations) * 1000;
31+
const opsPerSec = Math.round(iterations / (totalMs / 1000));
32+
return { name, totalMs, opsPerSec, avgUs };
33+
}
34+
35+
async function timeItAsync(name: string, fn: () => Promise<void>, iterations = 100): Promise<{ name: string; totalMs: number; opsPerSec: number; avgUs: number }> {
36+
// Warmup
37+
for (let i = 0; i < Math.min(10, iterations); i++) await fn();
38+
39+
const start = performance.now();
40+
for (let i = 0; i < iterations; i++) await fn();
41+
const totalMs = performance.now() - start;
42+
const avgUs = (totalMs / iterations) * 1000;
43+
const opsPerSec = Math.round(iterations / (totalMs / 1000));
44+
return { name, totalMs, opsPerSec, avgUs };
45+
}
46+
47+
type BenchResult = { name: string; totalMs: number; opsPerSec: number; avgUs: number };
48+
49+
function printTable(results: BenchResult[]): void {
50+
console.log(
51+
"\n" +
52+
[
53+
"Benchmark".padEnd(55),
54+
"avg".padStart(12),
55+
"ops/sec".padStart(12),
56+
"total".padStart(10),
57+
].join(" | ")
58+
);
59+
console.log("-".repeat(95));
60+
for (const r of results) {
61+
const avgStr = r.avgUs < 1000 ? `${r.avgUs.toFixed(1)}µs` : `${(r.avgUs / 1000).toFixed(2)}ms`;
62+
console.log(
63+
[
64+
r.name.padEnd(55),
65+
avgStr.padStart(12),
66+
r.opsPerSec.toLocaleString().padStart(12),
67+
`${r.totalMs.toFixed(0)}ms`.padStart(10),
68+
].join(" | ")
69+
);
70+
}
71+
console.log("-".repeat(95));
72+
}
73+
74+
// ---------------------------------------------------------------------------
75+
// 1. Null bitmap decode
76+
// ---------------------------------------------------------------------------
77+
78+
function benchBitmapDecode(): BenchResult[] {
79+
const results: BenchResult[] = [];
80+
const rowCount = 100_000;
81+
82+
// All valid (0xFF bytes) — fast path should fly
83+
const allValid = new ArrayBuffer(8 + Math.ceil(rowCount / 8) + rowCount * 8);
84+
new Uint8Array(allValid, 0, Math.ceil(rowCount / 8)).fill(0xFF);
85+
// Write int64 data after bitmap
86+
const dv = new DataView(allValid, Math.ceil(rowCount / 8));
87+
for (let i = 0; i < rowCount; i++) dv.setBigInt64(i * 8, BigInt(i), true);
88+
results.push(timeIt("bitmap: 100K rows, all valid (0xFF fast path)", () => {
89+
decodePage(allValid, "int64", 0, rowCount);
90+
}, 200));
91+
92+
// 50% null (alternating 0xAA bytes)
93+
const halfNull = new ArrayBuffer(Math.ceil(rowCount / 8) + rowCount * 8);
94+
new Uint8Array(halfNull, 0, Math.ceil(rowCount / 8)).fill(0xAA); // 10101010
95+
results.push(timeIt("bitmap: 100K rows, 50% null (bit-by-bit)", () => {
96+
decodePage(halfNull, "int64", rowCount / 2, rowCount);
97+
}, 200));
98+
99+
// All null (0x00 bytes) — fast path should batch-add
100+
const allNull = new ArrayBuffer(Math.ceil(rowCount / 8) + rowCount * 8);
101+
new Uint8Array(allNull, 0, Math.ceil(rowCount / 8)).fill(0x00);
102+
results.push(timeIt("bitmap: 100K rows, all null (0x00 fast path)", () => {
103+
decodePage(allNull, "int64", rowCount, rowCount);
104+
}, 200));
105+
106+
return results;
107+
}
108+
109+
// ---------------------------------------------------------------------------
110+
// 2. Coalesce ranges
111+
// ---------------------------------------------------------------------------
112+
113+
function benchCoalesce(): BenchResult[] {
114+
const results: BenchResult[] = [];
115+
116+
// Dense ranges (small gaps — should merge aggressively)
117+
const denseRanges: Range[] = [];
118+
for (let i = 0; i < 500; i++) {
119+
denseRanges.push({ column: `col${i % 5}`, offset: i * 8200, length: 8000 });
120+
}
121+
results.push(timeIt("coalesce: 500 dense ranges (200B gaps)", () => {
122+
const gap = autoCoalesceGap(denseRanges);
123+
coalesceRanges(denseRanges, gap);
124+
}, 5000));
125+
126+
// Sparse ranges (large gaps — should keep separate)
127+
const sparseRanges: Range[] = [];
128+
for (let i = 0; i < 500; i++) {
129+
sparseRanges.push({ column: `col${i % 5}`, offset: i * 1_000_000, length: 8000 });
130+
}
131+
results.push(timeIt("coalesce: 500 sparse ranges (992KB gaps)", () => {
132+
const gap = autoCoalesceGap(sparseRanges);
133+
coalesceRanges(sparseRanges, gap);
134+
}, 5000));
135+
136+
// Mixed — realistic scenario
137+
const mixedRanges: Range[] = [];
138+
for (let i = 0; i < 200; i++) {
139+
// Clustered in groups of 10
140+
const group = Math.floor(i / 10);
141+
const inGroup = i % 10;
142+
mixedRanges.push({ column: `col${i % 3}`, offset: group * 500_000 + inGroup * 10_000, length: 8000 });
143+
}
144+
results.push(timeIt("coalesce: 200 mixed ranges (clustered)", () => {
145+
const gap = autoCoalesceGap(mixedRanges);
146+
coalesceRanges(mixedRanges, gap);
147+
}, 5000));
148+
149+
return results;
150+
}
151+
152+
// ---------------------------------------------------------------------------
153+
// 3. canSkipPage (page-level filter pushdown)
154+
// ---------------------------------------------------------------------------
155+
156+
function benchCanSkipPage(): BenchResult[] {
157+
const results: BenchResult[] = [];
158+
const pages = Array.from({ length: 100 }, (_, i) => ({
159+
byteOffset: BigInt(i * 80000),
160+
byteLength: 80000,
161+
rowCount: 10000,
162+
minValue: i * 10000,
163+
maxValue: (i + 1) * 10000 - 1,
164+
}));
165+
166+
const filters = [{ column: "id", op: "gt" as const, value: 500000 }];
167+
168+
results.push(timeIt("canSkipPage: 100 pages × gt filter (50% skip)", () => {
169+
let skipped = 0;
170+
for (const page of pages) {
171+
if (canSkipPage(page, filters, "id")) skipped++;
172+
}
173+
}, 10000));
174+
175+
const rangeFilters = [
176+
{ column: "id", op: "gte" as const, value: 200000 },
177+
{ column: "id", op: "lt" as const, value: 800000 },
178+
];
179+
results.push(timeIt("canSkipPage: 100 pages × range filter (40% skip)", () => {
180+
let skipped = 0;
181+
for (const page of pages) {
182+
if (canSkipPage(page, rangeFilters, "id")) skipped++;
183+
}
184+
}, 10000));
185+
186+
return results;
187+
}
188+
189+
// ---------------------------------------------------------------------------
190+
// 4. In-memory query pipeline (MaterializedExecutor via fromJSON)
191+
// ---------------------------------------------------------------------------
192+
193+
async function benchInMemoryQuery(): Promise<BenchResult[]> {
194+
const results: BenchResult[] = [];
195+
196+
// Generate 10K rows
197+
const data = Array.from({ length: 10_000 }, (_, i) => ({
198+
id: i,
199+
value: Math.random() * 1000,
200+
category: ["alpha", "beta", "gamma", "delta", "epsilon"][i % 5],
201+
region: ["us", "eu", "asia"][i % 3],
202+
}));
203+
204+
const qm = QueryMode.fromJSON(data, "bench_data");
205+
206+
// Full scan
207+
results.push(await timeItAsync("query: 10K full scan", async () => {
208+
await qm.select("id", "value", "category").collect();
209+
}, 500));
210+
211+
// Filter + collect
212+
results.push(await timeItAsync("query: 10K filter id>5000 (50% sel)", async () => {
213+
await qm.filter("id", "gt", 5000).collect();
214+
}, 500));
215+
216+
// Filter + sort + limit (TopK)
217+
results.push(await timeItAsync("query: 10K filter+sort+limit(10)", async () => {
218+
await qm.filter("id", "gt", 5000).sort("value", "desc").limit(10).collect();
219+
}, 500));
220+
221+
// Projection only
222+
results.push(await timeItAsync("query: 10K project 1 of 4 cols", async () => {
223+
await qm.select("id").collect();
224+
}, 500));
225+
226+
// 100K rows
227+
const bigData = Array.from({ length: 100_000 }, (_, i) => ({
228+
id: i,
229+
amount: Math.random() * 10000,
230+
category: ["A", "B", "C", "D", "E"][i % 5],
231+
}));
232+
const bigQm = QueryMode.fromJSON(bigData, "big_bench");
233+
234+
results.push(await timeItAsync("query: 100K full scan", async () => {
235+
await bigQm.collect();
236+
}, 50));
237+
238+
results.push(await timeItAsync("query: 100K filter+sort+limit(100)", async () => {
239+
await bigQm.filter("id", "gt", 50000).sort("amount", "desc").limit(100).collect();
240+
}, 100));
241+
242+
results.push(await timeItAsync("query: 100K filter id>90000 (10% sel)", async () => {
243+
await bigQm.filter("id", "gt", 90000).collect();
244+
}, 100));
245+
246+
return results;
247+
}
248+
249+
// ---------------------------------------------------------------------------
250+
// 5. Int64 decode (pure decode path)
251+
// ---------------------------------------------------------------------------
252+
253+
function benchInt64Decode(): BenchResult[] {
254+
const results: BenchResult[] = [];
255+
256+
// 100K int64 values, no nulls
257+
const rowCount = 100_000;
258+
const buf = new ArrayBuffer(rowCount * 8);
259+
const dv = new DataView(buf);
260+
for (let i = 0; i < rowCount; i++) dv.setBigInt64(i * 8, BigInt(i), true);
261+
262+
results.push(timeIt("decode: 100K int64 values (no nulls)", () => {
263+
decodePage(buf, "int64", 0, rowCount);
264+
}, 200));
265+
266+
// 100K float64 values
267+
const fbuf = new ArrayBuffer(rowCount * 8);
268+
const fdv = new DataView(fbuf);
269+
for (let i = 0; i < rowCount; i++) fdv.setFloat64(i * 8, i * 1.5, true);
270+
271+
results.push(timeIt("decode: 100K float64 values (no nulls)", () => {
272+
decodePage(fbuf, "float64", 0, rowCount);
273+
}, 200));
274+
275+
return results;
276+
}
277+
278+
// ---------------------------------------------------------------------------
279+
// Main
280+
// ---------------------------------------------------------------------------
281+
282+
async function main(): Promise<void> {
283+
console.log("QueryMode Local Micro-Benchmark");
284+
console.log(`Node ${process.version} | ${process.platform} ${process.arch}`);
285+
console.log("=".repeat(95));
286+
287+
const allResults: BenchResult[] = [];
288+
289+
console.log("\n## Bitmap Decode");
290+
allResults.push(...benchBitmapDecode());
291+
292+
console.log("\n## Int64/Float64 Decode");
293+
allResults.push(...benchInt64Decode());
294+
295+
console.log("\n## Coalesce Ranges");
296+
allResults.push(...benchCoalesce());
297+
298+
console.log("\n## Page Skip (canSkipPage)");
299+
allResults.push(...benchCanSkipPage());
300+
301+
console.log("\n## In-Memory Query Pipeline");
302+
allResults.push(...await benchInMemoryQuery());
303+
304+
console.log("\n\n" + "=".repeat(95));
305+
console.log("FULL RESULTS");
306+
printTable(allResults);
307+
}
308+
309+
main().catch(console.error);

0 commit comments

Comments
 (0)