Skip to content

Commit 8cf2de3

Browse files
committed
fix: Lance v2 nullable columns, column validation, cursor batching, BigInt export
- Rewrite Lance v2 parser to handle nullable columns with 64-byte aligned bitmap+data layout (all row slots stored, null bitmap as mask) - Add column validation in dataset queries (filter, projection, sort) - Fix cursor to respect batchSize via row-level chunking - Export bigIntReplacer from both entry points for JSON serialization - Fix CLAUDE.md API examples to match actual filter(col, op, val) syntax - Add dataOffsetInPage to PageInfo for Lance v2 alignment padding - Copy WASM binary in build:ts script
1 parent d8f98af commit 8cf2de3

File tree

10 files changed

+448
-101
lines changed

10 files changed

+448
-101
lines changed

.claude/CLAUDE.md

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,14 @@ import { QueryMode } from "querymode"
162162
const local = QueryMode.local()
163163
const results = await local
164164
.table("./data/users.lance")
165-
.filter(r => r.age > 25)
165+
.filter("age", "gt", 25)
166166
.exec()
167167

168168
// Edge mode (Durable Objects)
169169
const edge = QueryMode.remote(env.QUERY_DO)
170170
const results = await edge
171171
.table("users")
172-
.filter(r => r.age > 25)
172+
.filter("age", "gt", 25)
173173
.select("name", "email")
174174
.sort("age", "desc")
175175
.limit(100)
@@ -184,15 +184,13 @@ const similar = await edge
184184
// JOIN (code, not SQL)
185185
const orders = edge.table("orders")
186186
const users = edge.table("users")
187-
const results = await edge.query(async () => {
188-
const topOrders = await orders.filter(r => r.amount > 100).exec()
189-
const userIds = topOrders.map(r => r.user_id)
190-
const orderUsers = await users.filter(r => userIds.includes(r.id)).exec()
191-
return topOrders.map(o => ({
192-
...o,
193-
user: orderUsers.find(u => u.id === o.user_id)
194-
}))
195-
})
187+
const topOrders = await orders.filter("amount", "gt", 100).exec()
188+
const userIds = topOrders.rows.map(r => r.user_id)
189+
const orderUsers = await users.filter("id", "in", userIds).exec()
190+
const joined = topOrders.rows.map(o => ({
191+
...o,
192+
user: orderUsers.rows.find(u => u.id === o.user_id)
193+
}))
196194

197195
// Same API works everywhere:
198196
// - Browser (WASM + fetch)
@@ -202,7 +200,7 @@ const results = await edge.query(async () => {
202200

203201
API surface:
204202
- `.table(name)` — typed handle (schema from cached footer)
205-
- `.filter(predicate)` — pushdown to page-level skipping via min/max stats
203+
- `.filter(column, op, value)` — pushdown to page-level skipping via min/max stats
206204
- `.select(...columns)` — column projection (only fetch needed byte ranges)
207205
- `.sort(column, dir)` — use index if available, otherwise top-K heap
208206
- `.limit(n)` — early termination
@@ -309,7 +307,7 @@ tools: [{
309307

310308
// Agent generates code directly — no SQL to learn
311309
await query_data({
312-
code: `table.filter(r => r.embedding.similarity(queryVec) > 0.8)`
310+
code: `table.vector("embedding", queryVec, 10)`
313311
})
314312
```
315313

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
"scripts": {
1919
"dev": "wrangler dev",
2020
"prepublishOnly": "pnpm run build",
21-
"build": "pnpm run build:wasm && tsc",
21+
"build": "pnpm run build:wasm && pnpm run build:ts",
2222
"build:wasm": "cd wasm && zig build wasm && mkdir -p ../src/wasm && cp zig-out/bin/querymode.wasm ../src/wasm/",
23-
"build:ts": "tsc",
23+
"build:ts": "tsc && mkdir -p dist/wasm && cp src/wasm/querymode.wasm dist/wasm/",
2424
"test": "vitest run",
2525
"test:watch": "vitest",
2626
"deploy": "pnpm run build && wrangler deploy",

src/client.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import type {
1414
* Usage:
1515
* const results = await querymode
1616
* .table("users")
17-
* .filter(r => r.age > 25)
17+
* .filter("age", "gt", 25)
1818
* .select("name", "email")
1919
* .sort("age", "desc")
2020
* .limit(100)

src/decode.ts

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import type { ColumnMeta, PageInfo, Row, DataType } from "./types.js";
22
import type { QueryDescriptor } from "./client.js";
33
import type { WasmEngine } from "./wasm-engine.js";
44
import { decodeParquetColumnChunk, decodePlainValues } from "./parquet-decode.js";
5+
import { decodeLanceV2Utf8 } from "./lance-v2.js";
56

67
/** Check if a page can be skipped via min/max stats. */
78
export function canSkipPage(page: PageInfo, filters: QueryDescriptor["filters"], columnName: string): boolean {
@@ -94,7 +95,7 @@ function decodeAllColumns(
9495
const pi = col.pages[i];
9596
const decoded = pi?.encoding
9697
? decodeParquetColumnChunk(pages[i], pi.encoding, col.dtype, pi.rowCount, wasm)
97-
: decodePage(pages[i], col.dtype, pi?.nullCount ?? 0, pi?.rowCount ?? 0);
98+
: decodePage(pages[i], col.dtype, pi?.nullCount ?? 0, pi?.rowCount ?? 0, pi?.dataOffsetInPage);
9899
for (const v of decoded) {
99100
values.push(v);
100101
}
@@ -121,12 +122,15 @@ function decodeFixedSizeListPages(pages: ArrayBuffer[], dim: number): Float32Arr
121122
}
122123

123124

124-
/** Decode a raw page buffer into typed values. Handles null bitmaps when nullCount > 0. */
125+
/** Decode a raw page buffer into typed values. Handles null bitmaps when nullCount > 0.
126+
* @param dataOffsetInPage - For Lance v2 nullable pages: byte offset where data starts (after bitmap + alignment padding)
127+
*/
125128
export function decodePage(
126129
buf: ArrayBuffer,
127130
dtype: string,
128131
nullCount = 0,
129132
rowCount = 0,
133+
dataOffsetInPage?: number,
130134
): (number | bigint | string | null)[] {
131135
let nulls: Set<number> | null = null;
132136
if (nullCount > 0 && rowCount > 0) {
@@ -140,14 +144,55 @@ export function decodePage(
140144
if (((bytes[b] >> bit) & 1) === 0) nulls.add(idx);
141145
}
142146
}
143-
buf = buf.slice(bitmapBytes);
147+
// Lance v2 uses alignment padding between bitmap and data.
148+
// dataOffsetInPage gives the exact data start; otherwise strip only bitmap bytes.
149+
const stripBytes = dataOffsetInPage ?? bitmapBytes;
150+
buf = buf.slice(stripBytes);
144151
}
145152

146153
const bytes = new Uint8Array(buf);
147-
const numValues = rowCount > 0 ? rowCount - (nulls?.size ?? 0) : Number.MAX_SAFE_INTEGER;
154+
// Lance v2 stores ALL row slots (including zeros at null positions).
155+
// When dataOffsetInPage is set, decode all rowCount values and mask nulls.
156+
// Parquet-style packs only non-null values, so decode rowCount - nulls.size.
157+
const isLanceV2Nullable = dataOffsetInPage !== undefined;
158+
const numValues = rowCount > 0
159+
? (isLanceV2Nullable ? rowCount : rowCount - (nulls?.size ?? 0))
160+
: Number.MAX_SAFE_INTEGER;
161+
162+
// For utf8/binary with rowCount known and buffer large enough to contain an offsets array,
163+
// try Lance v2 format (i64 offsets + string data) first.
164+
if ((dtype === "utf8" || dtype === "binary") && rowCount > 0 && buf.byteLength >= rowCount * 8) {
165+
const decodeCount = isLanceV2Nullable ? rowCount : numValues;
166+
const v2Strings = decodeLanceV2Utf8(buf, decodeCount);
167+
if (v2Strings.length === decodeCount && v2Strings.every(s => typeof s === "string")) {
168+
const looksValid = v2Strings.every(s => s.length < buf.byteLength);
169+
if (looksValid) {
170+
if (nulls && nulls.size > 0) {
171+
if (isLanceV2Nullable) {
172+
// Lance v2: all slots present, just null-mask them
173+
return v2Strings.map((s, i) => nulls!.has(i) ? null : s);
174+
}
175+
// Parquet-style: packed non-null values, interleave with nulls
176+
const withNulls: (string | null)[] = [];
177+
let vi = 0;
178+
for (let i = 0; i < rowCount; i++) {
179+
withNulls.push(nulls.has(i) ? null : (vi < v2Strings.length ? v2Strings[vi++] : null));
180+
}
181+
return withNulls;
182+
}
183+
return v2Strings;
184+
}
185+
}
186+
}
187+
148188
const values = decodePlainValues(bytes, dtype as DataType, numValues) as (number | bigint | string | null)[];
149189

150190
if (nulls && nulls.size > 0) {
191+
if (isLanceV2Nullable) {
192+
// Lance v2: all row slots present, replace null positions with null
193+
return values.map((v, i) => nulls!.has(i) ? null : v);
194+
}
195+
// Parquet-style: packed non-null values, interleave with nulls
151196
const withNulls: (number | bigint | string | null)[] = [];
152197
let vi = 0;
153198
for (let i = 0; i < rowCount; i++) {

src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export { FragmentDO } from "./fragment-do.js";
1010
export { TableQuery } from "./client.js";
1111
export { QueryModeError } from "./errors.js";
1212
export { LocalExecutor } from "./local-executor.js";
13+
export { bigIntReplacer } from "./decode.js";
1314
export type { QueryExecutor, QueryDescriptor } from "./client.js";
1415
export type {
1516
Env,

0 commit comments

Comments
 (0)