11import { TableQuery } from "./client.js" ;
22import type { QueryDescriptor , QueryExecutor } from "./client.js" ;
3- import type { AppendResult , ColumnMeta , Env , QueryResult , Row , TableMeta , DatasetMeta } from "./types.js" ;
3+ import type { AppendResult , ColumnMeta , DataType , Env , ExplainResult , QueryResult , Row , TableMeta , DatasetMeta } from "./types.js" ;
44import { parseFooter , parseColumnMetaFromProtobuf , FOOTER_SIZE } from "./footer.js" ;
55import { parseManifest , type ManifestInfo } from "./manifest.js" ;
66import { detectFormat , getParquetFooterLength , parseParquetFooter , parquetMetaToTableMeta } from "./parquet.js" ;
77import { assembleRows , canSkipPage , bigIntReplacer } from "./decode.js" ;
8+ import { coalesceRanges } from "./coalesce.js" ;
89import { instantiateWasm , WasmEngine } from "./wasm-engine.js" ;
910
1011export { MasterDO } from "./master-do.js" ;
@@ -31,6 +32,7 @@ export type {
3132 IcebergSchema ,
3233 IcebergDatasetMeta ,
3334 AppendResult ,
35+ ExplainResult ,
3436 VectorIndexInfo ,
3537} from "./types.js" ;
3638
@@ -203,6 +205,39 @@ class RemoteExecutor implements QueryExecutor {
203205 } ,
204206 } ) ;
205207 }
208+
209+ private async postQuery < T > ( path : string , query : QueryDescriptor ) : Promise < T > {
210+ const queryDo = this . getQueryDo ( ) ;
211+ const response = await queryDo . fetch ( new Request ( `http://internal${ path } ` , {
212+ method : "POST" ,
213+ body : JSON . stringify ( query , bigIntReplacer ) ,
214+ headers : { "content-type" : "application/json" } ,
215+ } ) ) ;
216+ if ( ! response . ok ) {
217+ const error = await response . text ( ) ;
218+ throw new Error ( `QueryMode ${ path } failed: ${ error } ` ) ;
219+ }
220+ return response . json ( ) as Promise < T > ;
221+ }
222+
223+ async count ( query : QueryDescriptor ) : Promise < number > {
224+ const body = await this . postQuery < { count : number } > ( "/query/count" , query ) ;
225+ return body . count ;
226+ }
227+
228+ async exists ( query : QueryDescriptor ) : Promise < boolean > {
229+ const body = await this . postQuery < { exists : boolean } > ( "/query/exists" , query ) ;
230+ return body . exists ;
231+ }
232+
233+ async first ( query : QueryDescriptor ) : Promise < Row | null > {
234+ const body = await this . postQuery < { row : Row | null } > ( "/query/first" , query ) ;
235+ return body . row ;
236+ }
237+
238+ async explain ( query : QueryDescriptor ) : Promise < ExplainResult > {
239+ return this . postQuery < ExplainResult > ( "/query/explain" , query ) ;
240+ }
206241}
207242
208243/**
@@ -321,6 +356,181 @@ class LocalExecutor implements QueryExecutor {
321356 } ;
322357 }
323358
359+ /** Count matching rows. No-filter case uses metadata only (zero I/O). */
360+ async count ( query : QueryDescriptor ) : Promise < number > {
361+ const meta = await this . getOrLoadMeta ( query . table ) ;
362+ if ( query . filters . length === 0 ) {
363+ return meta . columns [ 0 ] ?. pages . reduce ( ( s , p ) => s + p . rowCount , 0 ) ?? 0 ;
364+ }
365+ // With filters: fall through to aggregate path
366+ const desc = { ...query , aggregates : [ { fn : "count" as const , column : "*" } ] } ;
367+ const result = await this . execute ( desc ) ;
368+ return ( result . rows [ 0 ] ?. [ "count_*" ] as number ) ?? 0 ;
369+ }
370+
371+ async exists ( query : QueryDescriptor ) : Promise < boolean > {
372+ const desc = { ...query , limit : 1 } ;
373+ const result = await this . execute ( desc ) ;
374+ return result . rowCount > 0 ;
375+ }
376+
377+ async first ( query : QueryDescriptor ) : Promise < Row | null > {
378+ const desc = { ...query , limit : 1 } ;
379+ const result = await this . execute ( desc ) ;
380+ return result . rows [ 0 ] ?? null ;
381+ }
382+
383+ async explain ( query : QueryDescriptor ) : Promise < ExplainResult > {
384+ const meta = await this . getOrLoadMeta ( query . table ) ;
385+ const { columns } = meta ;
386+ const projectedColumns = query . projections . length > 0
387+ ? columns . filter ( c => query . projections . includes ( c . name ) )
388+ : columns ;
389+
390+ let pagesTotal = 0 ;
391+ let pagesSkipped = 0 ;
392+ const ranges : { column : string ; offset : number ; length : number } [ ] = [ ] ;
393+ const colDetails : ExplainResult [ "columns" ] = [ ] ;
394+
395+ for ( const col of projectedColumns ) {
396+ let colBytes = 0 ;
397+ let colPages = 0 ;
398+ for ( const page of col . pages ) {
399+ pagesTotal ++ ;
400+ if ( ! query . vectorSearch && canSkipPage ( page , query . filters , col . name ) ) {
401+ pagesSkipped ++ ;
402+ continue ;
403+ }
404+ colPages ++ ;
405+ colBytes += page . byteLength ;
406+ ranges . push ( { column : col . name , offset : Number ( page . byteOffset ) , length : page . byteLength } ) ;
407+ }
408+ colDetails . push ( { name : col . name , dtype : col . dtype as DataType , pages : colPages , bytes : colBytes } ) ;
409+ }
410+
411+ const coalesced = coalesceRanges ( ranges , 64 * 1024 ) ;
412+ const estimatedBytes = ranges . reduce ( ( s , r ) => s + r . length , 0 ) ;
413+ const totalRows = columns [ 0 ] ?. pages . reduce ( ( s , p ) => s + p . rowCount , 0 ) ?? 0 ;
414+
415+ return {
416+ table : query . table ,
417+ format : "lance" ,
418+ totalRows,
419+ columns : colDetails ,
420+ pagesTotal,
421+ pagesSkipped,
422+ pagesScanned : pagesTotal - pagesSkipped ,
423+ estimatedBytes,
424+ estimatedR2Reads : coalesced . length ,
425+ fragments : 1 ,
426+ filters : query . filters . map ( f => ( {
427+ column : f . column ,
428+ op : f . op ,
429+ pushable : f . op !== "in" && f . op !== "neq" ,
430+ } ) ) ,
431+ metaCached : this . metaCache . has ( query . table ) ,
432+ } ;
433+ }
434+
435+ async * cursor ( query : QueryDescriptor , batchSize : number ) : AsyncIterable < Row [ ] > {
436+ const meta = await this . getOrLoadMeta ( query . table ) ;
437+ const { columns } = meta ;
438+ const projectedColumns = query . projections . length > 0
439+ ? columns . filter ( c => query . projections . includes ( c . name ) )
440+ : columns ;
441+
442+ const isUrl = query . table . startsWith ( "http://" ) || query . table . startsWith ( "https://" ) ;
443+ const firstCol = projectedColumns [ 0 ] ;
444+ if ( ! firstCol ) return ;
445+
446+ // If sorted, must buffer all rows then chunk
447+ if ( query . sortColumn ) {
448+ const result = await this . execute ( query ) ;
449+ for ( let i = 0 ; i < result . rows . length ; i += batchSize ) {
450+ yield result . rows . slice ( i , i + batchSize ) ;
451+ }
452+ return ;
453+ }
454+
455+ const totalPages = firstCol . pages . length ;
456+ let pageIdx = 0 ;
457+ let totalYielded = 0 ;
458+
459+ const fs = isUrl ? null : await import ( "node:fs/promises" ) ;
460+ const handle = isUrl ? null : await fs ! . open ( query . table , "r" ) ;
461+ const wasm = await this . getWasm ( ) ;
462+
463+ try {
464+ while ( pageIdx < totalPages ) {
465+ let batchRows = 0 ;
466+ const batchStartPage = pageIdx ;
467+ while ( pageIdx < totalPages && batchRows < batchSize ) {
468+ const page = firstCol . pages [ pageIdx ] ;
469+ if ( ! query . vectorSearch && canSkipPage ( page , query . filters , firstCol . name ) ) {
470+ pageIdx ++ ;
471+ continue ;
472+ }
473+ batchRows += page . rowCount ;
474+ pageIdx ++ ;
475+ }
476+
477+ if ( batchRows === 0 ) continue ;
478+
479+ const columnData = new Map < string , ArrayBuffer [ ] > ( ) ;
480+ for ( const col of projectedColumns ) {
481+ for ( let pi = batchStartPage ; pi < pageIdx ; pi ++ ) {
482+ const page = col . pages [ pi ] ;
483+ if ( ! page ) continue ;
484+ if ( ! query . vectorSearch && canSkipPage ( page , query . filters , col . name ) ) continue ;
485+
486+ let ab : ArrayBuffer ;
487+ if ( isUrl ) {
488+ const start = Number ( page . byteOffset ) ;
489+ const end = start + page . byteLength - 1 ;
490+ const resp = await fetch ( query . table , { headers : { Range : `bytes=${ start } -${ end } ` } } ) ;
491+ ab = await resp . arrayBuffer ( ) ;
492+ } else {
493+ const buf = Buffer . alloc ( page . byteLength ) ;
494+ await handle ! . read ( buf , 0 , page . byteLength , Number ( page . byteOffset ) ) ;
495+ ab = buf . buffer . slice ( buf . byteOffset , buf . byteOffset + buf . byteLength ) ;
496+ }
497+
498+ const arr = columnData . get ( col . name ) ?? [ ] ;
499+ arr . push ( ab ) ;
500+ columnData . set ( col . name , arr ) ;
501+ }
502+ }
503+
504+ const rows = assembleRows ( columnData , projectedColumns , query , wasm ) ;
505+
506+ if ( rows . length > 0 ) {
507+ if ( query . limit && totalYielded + rows . length > query . limit ) {
508+ yield rows . slice ( 0 , query . limit - totalYielded ) ;
509+ return ;
510+ }
511+ yield rows ;
512+ totalYielded += rows . length ;
513+ }
514+ }
515+ } finally {
516+ if ( handle ) await handle . close ( ) ;
517+ }
518+ }
519+
520+ /** Get or load table metadata (footer + columns). Caches in-memory. */
521+ private async getOrLoadMeta ( table : string ) : Promise < { columns : ColumnMeta [ ] ; fileSize : number } > {
522+ let cached = this . metaCache . get ( table ) ;
523+ if ( cached ) return cached ;
524+ const isUrl = table . startsWith ( "http://" ) || table . startsWith ( "https://" ) ;
525+ cached = isUrl ? await this . loadMetaFromUrl ( table ) : await this . loadMetaFromFile ( table ) ;
526+ this . metaCache . set ( table , cached ) ;
527+ if ( this . metaCache . size > 1000 ) {
528+ const firstKey = this . metaCache . keys ( ) . next ( ) . value ;
529+ if ( firstKey ) this . metaCache . delete ( firstKey ) ;
530+ }
531+ return cached ;
532+ }
533+
324534 async execute ( query : QueryDescriptor ) : Promise < QueryResult > {
325535 const startTime = Date . now ( ) ;
326536 const isUrl = query . table . startsWith ( "http://" ) || query . table . startsWith ( "https://" ) ;
0 commit comments