@@ -10,15 +10,15 @@ import type { AppendResult, ColumnMeta, DataType, DiffResult, ExplainResult, Pag
1010import { parseFooter , parseColumnMetaFromProtobuf , FOOTER_SIZE } from "./footer.js" ;
1111import { parseManifest } from "./manifest.js" ;
1212import { detectFormat , getParquetFooterLength , parseParquetFooter , parquetMetaToTableMeta } from "./parquet.js" ;
13- import { assembleRows , canSkipPage } from "./decode.js" ;
13+ import { assembleRows } from "./decode.js" ;
1414import { coalesceRanges , autoCoalesceGap } from "./coalesce.js" ;
1515import { instantiateWasm , type WasmEngine } from "./wasm-engine.js" ;
1616
1717const textEncoder = new TextEncoder ( ) ;
1818import { VipCache } from "./vip-cache.js" ;
1919import { QueryModeError } from "./errors.js" ;
2020import { parseLanceV2Columns , lanceV2ToColumnMeta , computeLanceV2Stats } from "./lance-v2.js" ;
21- import { buildPipeline , drainPipeline , DEFAULT_MEMORY_BUDGET , type FragmentSource , type PipelineOptions } from "./operators.js" ;
21+ import { buildPipeline , drainPipeline , DEFAULT_MEMORY_BUDGET , canSkipPageMultiCol , type FragmentSource , type PipelineOptions } from "./operators.js" ;
2222
2323/**
2424 * Executor for local mode (Node/Bun).
@@ -219,22 +219,28 @@ export class LocalExecutor implements QueryExecutor {
219219 const ranges : { column : string ; offset : number ; length : number } [ ] = [ ] ;
220220 const colDetails : ExplainResult [ "columns" ] = [ ] ;
221221
222- // Use first column for row estimation (all columns have same page structure)
222+ // Uniform page-level skip across all columns to match actual query behavior
223+ const maxPages = projectedColumns . reduce ( ( m , c ) => Math . max ( m , c . pages . length ) , 0 ) ;
224+ const keptPages = new Set < number > ( ) ;
225+ for ( let pi = 0 ; pi < maxPages ; pi ++ ) {
226+ if ( ! query . vectorSearch && canSkipPageMultiCol ( projectedColumns , pi , query . filters , query . filterGroups ) ) {
227+ pagesSkipped += projectedColumns . length ;
228+ } else {
229+ keptPages . add ( pi ) ;
230+ }
231+ }
232+ pagesTotal = maxPages * projectedColumns . length ;
233+
223234 const firstCol = projectedColumns [ 0 ] ;
224235 for ( const col of projectedColumns ) {
225236 let colBytes = 0 ;
226237 let colPages = 0 ;
227238 for ( let pi = 0 ; pi < col . pages . length ; pi ++ ) {
239+ if ( ! keptPages . has ( pi ) ) continue ;
228240 const page = col . pages [ pi ] ;
229- pagesTotal ++ ;
230- if ( ! query . vectorSearch && canSkipPage ( page , query . filters , col . name ) ) {
231- pagesSkipped ++ ;
232- continue ;
233- }
234241 colPages ++ ;
235242 colBytes += page . byteLength ;
236243 ranges . push ( { column : col . name , offset : Number ( page . byteOffset ) , length : page . byteLength } ) ;
237- // Count estimated rows from first projected column's non-skipped pages
238244 if ( col === firstCol ) estimatedRows += page . rowCount ;
239245 }
240246 colDetails . push ( { name : col . name , dtype : col . dtype as DataType , pages : colPages , bytes : colBytes } ) ;
@@ -531,12 +537,20 @@ export class LocalExecutor implements QueryExecutor {
531537 const pageRanges : { column : string ; offset : bigint ; length : number } [ ] = [ ] ;
532538 let pagesSkipped = 0 ;
533539
540+ // Uniform page-level skip: decide once per page index across all columns to avoid row misalignment.
541+ const maxPages = projectedColumns . reduce ( ( m , c ) => Math . max ( m , c . pages . length ) , 0 ) ;
542+ const keptPageIndices : number [ ] = [ ] ;
543+ for ( let pi = 0 ; pi < maxPages ; pi ++ ) {
544+ if ( canSkipPageMultiCol ( projectedColumns , pi , query . filters , query . filterGroups ) ) {
545+ pagesSkipped += projectedColumns . length ;
546+ continue ;
547+ }
548+ keptPageIndices . push ( pi ) ;
549+ }
534550 for ( const col of projectedColumns ) {
535- for ( const page of col . pages ) {
536- if ( canSkipPage ( page , query . filters , col . name ) ) {
537- pagesSkipped ++ ;
538- continue ;
539- }
551+ for ( const pi of keptPageIndices ) {
552+ const page = col . pages [ pi ] ;
553+ if ( ! page ) continue ;
540554 pageRanges . push ( { column : col . name , offset : page . byteOffset , length : page . byteLength } ) ;
541555 }
542556 }
0 commit comments