Skip to content

Commit bf93a71

Browse files
committed
feat: add IS NULL/IS NOT NULL page skip via nullCount stats
- IS NULL skips pages with nullCount=0 (no nulls to find) - IS NOT NULL skips pages with nullCount=rowCount (all nulls) - Updated performance docs to list all 12 page-skip filter ops
1 parent 5006030 commit bf93a71

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

docs/src/content/docs/performance.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ This is automatic — no configuration needed. Supported filter ops:
106106
| `not_in` | Uniform page (min = max) appears in NOT IN list |
107107
| `between` | Page range and filter range don't overlap |
108108
| `not_between` | Entire page range inside the excluded range |
109+
| `is_null` | Page has zero null values (nullCount = 0) |
110+
| `is_not_null` | Page is entirely null (nullCount = rowCount) |
109111

110112
String columns also have min/max stats (lexicographic). `like` and `not_like` filters don't benefit from page skip.
111113

src/decode.test.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,25 @@ describe("canSkipPage", () => {
180180
it("does not skip range page for NOT IN", () => {
181181
expect(canSkipPage(page, [{ column: "x", op: "not_in", value: [10, 50, 90] }], "x")).toBe(false);
182182
});
183+
184+
it("skips page with IS NULL when nullCount is 0", () => {
185+
expect(canSkipPage(page, [{ column: "x", op: "is_null", value: 0 }], "x")).toBe(true);
186+
});
187+
188+
it("does not skip page with IS NULL when nullCount > 0", () => {
189+
const withNulls: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 5, minValue: 10, maxValue: 90 };
190+
expect(canSkipPage(withNulls, [{ column: "x", op: "is_null", value: 0 }], "x")).toBe(false);
191+
});
192+
193+
it("skips page with IS NOT NULL when all rows are null", () => {
194+
const allNull: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 50 };
195+
expect(canSkipPage(allNull, [{ column: "x", op: "is_not_null", value: 0 }], "x")).toBe(true);
196+
});
197+
198+
it("does not skip page with IS NOT NULL when some rows are non-null", () => {
199+
const withNulls: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 5, minValue: 10, maxValue: 90 };
200+
expect(canSkipPage(withNulls, [{ column: "x", op: "is_not_null", value: 0 }], "x")).toBe(false);
201+
});
183202
});
184203

185204
describe("assembleRows", () => {

src/decode.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import { decodeLanceV2Utf8 } from "./lance-v2.js";
99
export function canSkipPage(page: PageInfo, filters: QueryDescriptor["filters"], columnName: string): boolean {
1010
for (const filter of filters) {
1111
if (filter.column !== columnName) continue;
12-
if (filter.op === "is_null" || filter.op === "is_not_null") continue;
12+
// is_null: skip when page has no nulls; is_not_null: skip when page is all nulls
13+
if (filter.op === "is_null") { if (page.nullCount === 0) return true; continue; }
14+
if (filter.op === "is_not_null") { if (page.nullCount === page.rowCount) return true; continue; }
1315
if (page.minValue === undefined || page.maxValue === undefined) continue;
1416

1517
let { minValue: min, maxValue: max } = page;

0 commit comments

Comments
 (0)