Skip to content

Commit cb2a6f9

Browse files
committed
feat: add LIKE prefix page skip for string columns
Extracts the fixed prefix from LIKE patterns (before the first wildcard) and skips pages where the string range doesn't overlap. Patterns starting with % or _ fall through to row-level evaluation.
1 parent 31d46f2 commit cb2a6f9

3 files changed

Lines changed: 42 additions & 1 deletion

File tree

docs/src/content/docs/performance.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ This is automatic — no configuration needed. Supported filter ops:
109109
| `is_null` | Page has zero null values (nullCount = 0) |
110110
| `is_not_null` | Page is entirely null (nullCount = rowCount) |
111111

112-
String columns also have min/max stats (lexicographic). `like` and `not_like` filters don't benefit from page skip.
112+
String columns also have min/max stats (lexicographic). `like` patterns with a fixed prefix (e.g., `'abc%'`) can skip pages where the string range doesn't overlap the prefix. Patterns starting with a wildcard (e.g., `'%xyz'`) cannot be pruned. `not_like` does not benefit from page skip.
113113

114114
### OR filters and page skip
115115

src/decode.test.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,25 @@ describe("canSkipPage", () => {
199199
const withNulls: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 5, minValue: 10, maxValue: 90 };
200200
expect(canSkipPage(withNulls, [{ column: "x", op: "is_not_null", value: 0 }], "x")).toBe(false);
201201
});
202+
203+
it("skips page with LIKE prefix when range doesn't overlap", () => {
204+
const strPage: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 0, minValue: "apple", maxValue: "banana" };
205+
// Prefix "car" is entirely above [apple, banana]
206+
expect(canSkipPage(strPage, [{ column: "x", op: "like", value: "car%" }], "x")).toBe(true);
207+
// Prefix "aa" is entirely below [apple, banana]
208+
expect(canSkipPage(strPage, [{ column: "x", op: "like", value: "aa%" }], "x")).toBe(true);
209+
});
210+
211+
it("does not skip page with LIKE prefix when range overlaps", () => {
212+
const strPage: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 0, minValue: "apple", maxValue: "banana" };
213+
expect(canSkipPage(strPage, [{ column: "x", op: "like", value: "ba%" }], "x")).toBe(false);
214+
expect(canSkipPage(strPage, [{ column: "x", op: "like", value: "app%" }], "x")).toBe(false);
215+
});
216+
217+
it("does not skip page with LIKE pattern starting with wildcard", () => {
218+
const strPage: PageInfo = { byteOffset: 0n, byteLength: 100, rowCount: 50, nullCount: 0, minValue: "apple", maxValue: "banana" };
219+
expect(canSkipPage(strPage, [{ column: "x", op: "like", value: "%xyz%" }], "x")).toBe(false);
220+
});
202221
});
203222

204223
describe("assembleRows", () => {

src/decode.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ export function canSkipPage(page: PageInfo, filters: QueryDescriptor["filters"],
7171
if (min >= lo && max <= hi) return true;
7272
break;
7373
}
74+
// like: skip when the fixed prefix doesn't overlap the page's string range
75+
case "like": {
76+
if (typeof filter.value !== "string" || typeof min !== "string") break;
77+
const prefix = extractLikePrefix(filter.value);
78+
if (!prefix) break;
79+
// Page max < prefix or page min >= prefix + '\uffff' → no match possible
80+
const prefixEnd = prefix.slice(0, -1) + String.fromCharCode(prefix.charCodeAt(prefix.length - 1) + 1);
81+
if ((max as string) < prefix || (min as string) >= prefixEnd) return true;
82+
break;
83+
}
7484
}
7585
}
7686
return false;
@@ -547,6 +557,18 @@ export function rowPassesFilters(row: Row, filters: FilterOp[], filterGroups?: F
547557
return true;
548558
}
549559

560+
/** Extract the fixed prefix from a LIKE pattern (before the first wildcard). Returns null if no useful prefix. */
561+
function extractLikePrefix(pattern: string): string | null {
562+
let prefix = "";
563+
for (let i = 0; i < pattern.length; i++) {
564+
const ch = pattern[i];
565+
if (ch === "%" || ch === "_") break;
566+
if (ch === "\\" && i + 1 < pattern.length) { prefix += pattern[++i]; continue; }
567+
prefix += ch;
568+
}
569+
return prefix.length > 0 ? prefix : null;
570+
}
571+
550572
/** Cache compiled LIKE regexes — avoids re-compilation per row. */
551573
const likeRegexCache = new Map<string, RegExp>();
552574

0 commit comments

Comments
 (0)