Skip to content

Commit cea7292

Browse files
committed
feat: NOT BETWEEN WASM filter exports + scanFilterIndices WASM path
- Add filterFloat64NotRange, filterInt32NotRange, filterInt64NotRange Zig exports for inverted range filter (val < low OR val > high) - wasmFilterNotRange() TS helper mirrors wasmFilterRange() - scanFilterIndices dispatches NOT BETWEEN to WASM not-range exports - WasmAggregateOperator.evalAndFilters handles BETWEEN/NOT BETWEEN - canUseWasmAggregate now accepts NOT BETWEEN filters
1 parent 57e4f3d commit cea7292

File tree

4 files changed

+147
-11
lines changed

4 files changed

+147
-11
lines changed

research/zig-engine-roadmap.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,16 @@ Learnings from sibling Zig repos, prioritized by impact on QueryMode's WASM engi
7171
- Client API: `.whereOr(...groups: FilterOp[][])`
7272
- `canUseWasmAggregate` rejects queries with filterGroups (not optimized for OR yet)
7373

74+
### NOT BETWEEN WASM Filter — DONE
75+
- `filterFloat64NotRange`, `filterInt32NotRange`, `filterInt64NotRange` Zig exports
76+
- Returns indices where `val < low OR val > high`
77+
- `wasmFilterNotRange()` TS helper mirrors `wasmFilterRange()`
78+
- `scanFilterIndices`, `WasmAggregateOperator.evalAndFilters` dispatch BETWEEN/NOT BETWEEN to range/not-range exports
79+
- `canUseWasmAggregate` allows NOT BETWEEN filters
80+
7481
**Remaining:**
7582
- Full columnar pipeline: connect TS pipeline to Zig SelectionVector/DataChunk types (removes Row[] batch format)
7683
- WASM SIMD LIKE filter (comptime string pattern matching in Zig)
77-
- WasmAggregateOperator OR support (union index arrays in aggregate path)
7884

7985
### BETWEEN Support — DONE
8086
- `filterFloat64Range`, `filterInt32Range`, `filterInt64Range` Zig exports

src/operators.ts

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,8 @@ function applyAndFilters(
394394
return new Uint32Array(0);
395395
}
396396

397-
const isCompoundOp = filter.op === "in" || filter.op === "not_in" || filter.op === "between" || filter.op === "not_between" || filter.op === "like" || filter.op === "not_like";
397+
const isRangeOp = filter.op === "between" || filter.op === "not_between";
398+
const isCompoundOp = filter.op === "in" || filter.op === "not_in" || isRangeOp || filter.op === "like" || filter.op === "not_like";
398399
const wasmOp = !isCompoundOp ? filterOpToWasm(filter.op) : -1;
399400

400401
// Try WASM SIMD path for numeric scalar filters
@@ -410,11 +411,13 @@ function applyAndFilters(
410411
}
411412
}
412413

413-
// Try WASM BETWEEN path for numeric range filters
414-
if (filter.op === "between" && Array.isArray(filter.value) && filter.value.length === 2 &&
414+
// Try WASM BETWEEN/NOT BETWEEN path for numeric range filters
415+
if ((filter.op === "between" || filter.op === "not_between") &&
416+
Array.isArray(filter.value) && filter.value.length === 2 &&
415417
typeof filter.value[0] === "number" && typeof filter.value[1] === "number" &&
416418
(dtype === "float64" || dtype === "float32" || dtype === "int32" || dtype === "int64")) {
417-
const filterResult = wasmFilterRange(
419+
const filterFn = filter.op === "between" ? wasmFilterRange : wasmFilterNotRange;
420+
const filterResult = filterFn(
418421
values, dtype, filter.value[0], filter.value[1], rowCount, wasm,
419422
);
420423
if (filterResult) {
@@ -558,6 +561,60 @@ function wasmFilterRange(
558561
}
559562
}
560563

564+
/** Run WASM NOT BETWEEN (inverted range) filter on decoded numeric values. */
565+
function wasmFilterNotRange(
566+
values: DecodedValue[],
567+
dtype: string,
568+
low: number,
569+
high: number,
570+
rowCount: number,
571+
wasm: WasmEngine,
572+
): Uint32Array | null {
573+
try {
574+
wasm.exports.resetHeap();
575+
576+
if (dtype === "float64" || dtype === "float32") {
577+
const dataPtr = wasm.exports.alloc(rowCount * 8);
578+
if (!dataPtr) return null;
579+
const dst = new Float64Array(wasm.exports.memory.buffer, dataPtr, rowCount);
580+
for (let i = 0; i < rowCount; i++) dst[i] = (values[i] as number) ?? 0;
581+
const outPtr = wasm.exports.alloc(rowCount * 4);
582+
if (!outPtr) return null;
583+
const count = wasm.exports.filterFloat64NotRange(dataPtr, rowCount, low, high, outPtr, rowCount);
584+
return new Uint32Array(wasm.exports.memory.buffer.slice(outPtr, outPtr + count * 4));
585+
}
586+
587+
if (dtype === "int64") {
588+
const dataPtr = wasm.exports.alloc(rowCount * 8);
589+
if (!dataPtr) return null;
590+
const dst = new BigInt64Array(wasm.exports.memory.buffer, dataPtr, rowCount);
591+
for (let i = 0; i < rowCount; i++) {
592+
const v = values[i];
593+
dst[i] = typeof v === "bigint" ? v : BigInt((v as number) ?? 0);
594+
}
595+
const outPtr = wasm.exports.alloc(rowCount * 4);
596+
if (!outPtr) return null;
597+
const count = wasm.exports.filterInt64NotRange(dataPtr, rowCount, BigInt(low), BigInt(high), outPtr, rowCount);
598+
return new Uint32Array(wasm.exports.memory.buffer.slice(outPtr, outPtr + count * 4));
599+
}
600+
601+
if (dtype === "int32") {
602+
const dataPtr = wasm.exports.alloc(rowCount * 4);
603+
if (!dataPtr) return null;
604+
const dst = new Int32Array(wasm.exports.memory.buffer, dataPtr, rowCount);
605+
for (let i = 0; i < rowCount; i++) dst[i] = (values[i] as number) ?? 0;
606+
const outPtr = wasm.exports.alloc(rowCount * 4);
607+
if (!outPtr) return null;
608+
const count = wasm.exports.filterInt32NotRange(dataPtr, rowCount, low, high, outPtr, rowCount);
609+
return new Uint32Array(wasm.exports.memory.buffer.slice(outPtr, outPtr + count * 4));
610+
}
611+
612+
return null;
613+
} catch {
614+
return null;
615+
}
616+
}
617+
561618
/** Intersect two sorted index arrays using WASM. */
562619
function wasmIntersect(a: Uint32Array, b: Uint32Array, wasm: WasmEngine): Uint32Array {
563620
try {
@@ -1416,12 +1473,12 @@ export function canUseWasmAggregate(query: QueryDescriptor, columns: ColumnMeta[
14161473

14171474
// Validate filter is numeric with scalar/range op
14181475
const isValidFilter = (f: FilterOp): boolean => {
1419-
if (f.op === "in" || f.op === "not_in" || f.op === "like" || f.op === "not_like" || f.op === "not_between" || f.op === "is_null" || f.op === "is_not_null") return false;
1476+
if (f.op === "in" || f.op === "not_in" || f.op === "like" || f.op === "not_like" || f.op === "is_null" || f.op === "is_not_null") return false;
14201477
const fc = colMap.get(f.column);
14211478
if (!fc) return false;
14221479
if (fc.dtype !== "float64" && fc.dtype !== "int32" && fc.dtype !== "int64") return false;
14231480
if (fc.pages.some(p => p.encoding)) return false;
1424-
if (f.op === "between") {
1481+
if (f.op === "between" || f.op === "not_between") {
14251482
if (!Array.isArray(f.value) || f.value.length !== 2) return false;
14261483
if (typeof f.value[0] !== "number" || typeof f.value[1] !== "number") return false;
14271484
} else {
@@ -1659,13 +1716,20 @@ export class WasmAggregateOperator implements Operator {
16591716
if (!outPtr) return new Uint32Array(0);
16601717

16611718
let count: number;
1662-
if (f.op === "between" && Array.isArray(f.value) && f.value.length === 2) {
1719+
if ((f.op === "between" || f.op === "not_between") && Array.isArray(f.value) && f.value.length === 2) {
1720+
const isNotBetween = f.op === "not_between";
16631721
if (col.dtype === "float64") {
1664-
count = this.wasm.exports.filterFloat64Range(dataPtr, rowCount, f.value[0] as number, f.value[1] as number, outPtr, rowCount);
1722+
count = isNotBetween
1723+
? this.wasm.exports.filterFloat64NotRange(dataPtr, rowCount, f.value[0] as number, f.value[1] as number, outPtr, rowCount)
1724+
: this.wasm.exports.filterFloat64Range(dataPtr, rowCount, f.value[0] as number, f.value[1] as number, outPtr, rowCount);
16651725
} else if (col.dtype === "int64") {
1666-
count = this.wasm.exports.filterInt64Range(dataPtr, rowCount, BigInt(f.value[0] as number), BigInt(f.value[1] as number), outPtr, rowCount);
1726+
count = isNotBetween
1727+
? this.wasm.exports.filterInt64NotRange(dataPtr, rowCount, BigInt(f.value[0] as number), BigInt(f.value[1] as number), outPtr, rowCount)
1728+
: this.wasm.exports.filterInt64Range(dataPtr, rowCount, BigInt(f.value[0] as number), BigInt(f.value[1] as number), outPtr, rowCount);
16671729
} else {
1668-
count = this.wasm.exports.filterInt32Range(dataPtr, rowCount, f.value[0] as number, f.value[1] as number, outPtr, rowCount);
1730+
count = isNotBetween
1731+
? this.wasm.exports.filterInt32NotRange(dataPtr, rowCount, f.value[0] as number, f.value[1] as number, outPtr, rowCount)
1732+
: this.wasm.exports.filterInt32Range(dataPtr, rowCount, f.value[0] as number, f.value[1] as number, outPtr, rowCount);
16691733
}
16701734
} else {
16711735
const wasmOp = filterOpToWasm(f.op);

src/wasm-engine.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ export interface WasmExports {
7070
filterFloat64Range(dataPtr: number, len: number, low: number, high: number, outPtr: number, maxOut: number): number;
7171
filterInt32Range(dataPtr: number, len: number, low: number, high: number, outPtr: number, maxOut: number): number;
7272
filterInt64Range(dataPtr: number, len: number, low: bigint, high: bigint, outPtr: number, maxOut: number): number;
73+
filterFloat64NotRange(dataPtr: number, len: number, low: number, high: number, outPtr: number, maxOut: number): number;
74+
filterInt32NotRange(dataPtr: number, len: number, low: number, high: number, outPtr: number, maxOut: number): number;
75+
filterInt64NotRange(dataPtr: number, len: number, low: bigint, high: bigint, outPtr: number, maxOut: number): number;
7376
intersectIndices(aPtr: number, aLen: number, bPtr: number, bLen: number, outPtr: number, maxOut: number): number;
7477
unionIndices(aPtr: number, aLen: number, bPtr: number, bLen: number, outPtr: number, maxOut: number): number;
7578

wasm/src/wasm/aggregates.zig

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,69 @@ export fn filterInt64Range(
847847
return out_count;
848848
}
849849

850+
// Filter with inverted range (NOT BETWEEN): returns indices where val < low OR val > high
851+
export fn filterFloat64NotRange(
852+
data_ptr: [*]const f64,
853+
len: usize,
854+
low: f64,
855+
high: f64,
856+
out_indices: [*]u32,
857+
max_indices: usize,
858+
) usize {
859+
var out_count: usize = 0;
860+
for (0..len) |i| {
861+
if (out_count >= max_indices) break;
862+
const v = data_ptr[i];
863+
if (v < low or v > high) {
864+
out_indices[out_count] = @intCast(i);
865+
out_count += 1;
866+
}
867+
}
868+
return out_count;
869+
}
870+
871+
// Filter int32 with inverted range (NOT BETWEEN): returns indices where val < low OR val > high
872+
export fn filterInt32NotRange(
873+
data_ptr: [*]const i32,
874+
len: usize,
875+
low: i32,
876+
high: i32,
877+
out_indices: [*]u32,
878+
max_indices: usize,
879+
) usize {
880+
var out_count: usize = 0;
881+
for (0..len) |i| {
882+
if (out_count >= max_indices) break;
883+
const v = data_ptr[i];
884+
if (v < low or v > high) {
885+
out_indices[out_count] = @intCast(i);
886+
out_count += 1;
887+
}
888+
}
889+
return out_count;
890+
}
891+
892+
// Filter int64 with inverted range (NOT BETWEEN): returns indices where val < low OR val > high
893+
export fn filterInt64NotRange(
894+
data_ptr: [*]const i64,
895+
len: usize,
896+
low: i64,
897+
high: i64,
898+
out_indices: [*]u32,
899+
max_indices: usize,
900+
) usize {
901+
var out_count: usize = 0;
902+
for (0..len) |i| {
903+
if (out_count >= max_indices) break;
904+
const v = data_ptr[i];
905+
if (v < low or v > high) {
906+
out_indices[out_count] = @intCast(i);
907+
out_count += 1;
908+
}
909+
}
910+
return out_count;
911+
}
912+
850913
/// AND two index arrays (intersection)
851914
/// Uses O(n+m) sorted merge since filter outputs are always in ascending order
852915
export fn intersectIndices(

0 commit comments

Comments
 (0)