@@ -814,9 +814,9 @@ export class WasmEngine {
814814 * Returns the raw Lance binary bytes ready to write to R2/disk.
815815 */
816816 buildFragment ( columns : FragmentColumn [ ] ) : Uint8Array {
817- // Estimate capacity: sum of all values plus overhead
817+ // Estimate capacity: sum of all values + null bitmaps + overhead
818818 let totalBytes = 0 ;
819- for ( const col of columns ) totalBytes += col . values . byteLength ;
819+ for ( const col of columns ) totalBytes += col . values . byteLength + ( col . nullBitmap ?. byteLength ?? 0 ) ;
820820 const capacity = totalBytes + columns . length * 256 + 4096 ; // metadata overhead
821821
822822 if ( ! this . exports . fragmentBegin ( capacity ) ) {
@@ -827,6 +827,15 @@ export class WasmEngine {
827827 const { ptr : namePtr , len : nameLen } = this . writeString ( col . name ) ;
828828 if ( ! namePtr ) throw new Error ( `WASM OOM writing column name "${ col . name } "` ) ;
829829
830+ // Write null bitmap to WASM if present
831+ let nullablePtr = 0 ;
832+ if ( col . nullBitmap ) {
833+ nullablePtr = this . safeAlloc ( col . nullBitmap . byteLength ) ;
834+ if ( ! nullablePtr ) throw new Error ( "WASM OOM writing null bitmap" ) ;
835+ new Uint8Array ( this . exports . memory . buffer , nullablePtr , col . nullBitmap . byteLength )
836+ . set ( col . nullBitmap ) ;
837+ }
838+
830839 let result = 0 ;
831840 switch ( col . dtype ) {
832841 case "int64" : {
@@ -835,7 +844,7 @@ export class WasmEngine {
835844 if ( ! dataPtr ) throw new Error ( "WASM OOM" ) ;
836845 new Uint8Array ( this . exports . memory . buffer , dataPtr , col . values . byteLength )
837846 . set ( new Uint8Array ( col . values instanceof ArrayBuffer ? col . values : col . values . slice ( 0 ) ) ) ;
838- result = this . exports . fragmentAddInt64Column ( namePtr , nameLen , dataPtr , i64 . length , 0 ) ;
847+ result = this . exports . fragmentAddInt64Column ( namePtr , nameLen , dataPtr , i64 . length , nullablePtr ) ;
839848 break ;
840849 }
841850 case "int32" : {
@@ -844,7 +853,7 @@ export class WasmEngine {
844853 if ( ! dataPtr ) throw new Error ( "WASM OOM" ) ;
845854 new Uint8Array ( this . exports . memory . buffer , dataPtr , col . values . byteLength )
846855 . set ( new Uint8Array ( col . values instanceof ArrayBuffer ? col . values : col . values . slice ( 0 ) ) ) ;
847- result = this . exports . fragmentAddInt32Column ( namePtr , nameLen , dataPtr , i32 . length , 0 ) ;
856+ result = this . exports . fragmentAddInt32Column ( namePtr , nameLen , dataPtr , i32 . length , nullablePtr ) ;
848857 break ;
849858 }
850859 case "float64" : {
@@ -853,7 +862,7 @@ export class WasmEngine {
853862 if ( ! dataPtr ) throw new Error ( "WASM OOM" ) ;
854863 new Uint8Array ( this . exports . memory . buffer , dataPtr , col . values . byteLength )
855864 . set ( new Uint8Array ( col . values instanceof ArrayBuffer ? col . values : col . values . slice ( 0 ) ) ) ;
856- result = this . exports . fragmentAddFloat64Column ( namePtr , nameLen , dataPtr , f64 . length , 0 ) ;
865+ result = this . exports . fragmentAddFloat64Column ( namePtr , nameLen , dataPtr , f64 . length , nullablePtr ) ;
857866 break ;
858867 }
859868 case "float32" : {
@@ -862,7 +871,7 @@ export class WasmEngine {
862871 if ( ! dataPtr ) throw new Error ( "WASM OOM" ) ;
863872 new Uint8Array ( this . exports . memory . buffer , dataPtr , col . values . byteLength )
864873 . set ( new Uint8Array ( col . values instanceof ArrayBuffer ? col . values : col . values . slice ( 0 ) ) ) ;
865- result = this . exports . fragmentAddFloat32Column ( namePtr , nameLen , dataPtr , f32 . length , 0 ) ;
874+ result = this . exports . fragmentAddFloat32Column ( namePtr , nameLen , dataPtr , f32 . length , nullablePtr ) ;
866875 break ;
867876 }
868877 case "utf8" : case "string" : {
@@ -901,7 +910,7 @@ export class WasmEngine {
901910 new Uint8Array ( this . exports . memory . buffer , offsetsPtr , offsets . byteLength )
902911 . set ( new Uint8Array ( offsets . buffer ) ) ;
903912 result = this . exports . fragmentAddStringColumn (
904- namePtr , nameLen , dataPtr , totalStrBytes , offsetsPtr / 4 , count , 0 ,
913+ namePtr , nameLen , dataPtr , totalStrBytes , offsetsPtr / 4 , count , nullablePtr ,
905914 ) ;
906915 break ;
907916 }
@@ -913,7 +922,7 @@ export class WasmEngine {
913922 const byteCount = col . values . byteLength ;
914923 // rowCount must be provided for exact count; byteCount * 8 overestimates when rows % 8 != 0
915924 const rowCount = col . rowCount ?? byteCount * 8 ;
916- result = this . exports . fragmentAddBoolColumn ( namePtr , nameLen , dataPtr , byteCount , rowCount , 0 ) ;
925+ result = this . exports . fragmentAddBoolColumn ( namePtr , nameLen , dataPtr , byteCount , rowCount , nullablePtr ) ;
917926 break ;
918927 }
919928 default :
@@ -1041,6 +1050,8 @@ export interface FragmentColumn {
10411050 values : ArrayBufferLike ;
10421051 /** Required for bool columns (byteCount*8 overestimates when rows%8!=0). */
10431052 rowCount ?: number ;
1053+ /** Validity bitmap — bit i set means row i is valid (non-null). Absent = all valid. */
1054+ nullBitmap ?: Uint8Array ;
10441055}
10451056
10461057/**
@@ -1056,33 +1067,45 @@ export function rowsToColumnArrays(rows: Record<string, unknown>[]): FragmentCol
10561067 const sample = rows . find ( r => r [ colName ] != null ) ?. [ colName ] ;
10571068 if ( sample === undefined ) continue ;
10581069
1070+ // Build null bitmap for columns with null/undefined values
1071+ const bitmapBytes = Math . ceil ( rows . length / 8 ) ;
1072+ let hasNull = false ;
1073+ const nullBitmap = new Uint8Array ( bitmapBytes ) ;
1074+ for ( let i = 0 ; i < rows . length ; i ++ ) {
1075+ if ( rows [ i ] [ colName ] != null ) {
1076+ nullBitmap [ i >> 3 ] |= 1 << ( i & 7 ) ; // bit set = valid
1077+ } else {
1078+ hasNull = true ;
1079+ }
1080+ }
1081+
10591082 if ( typeof sample === "number" ) {
10601083 if ( Number . isInteger ( sample ) ) {
10611084 const arr = new BigInt64Array ( rows . length ) ;
1062- for ( let i = 0 ; i < rows . length ; i ++ ) { const v = rows [ i ] [ colName ] ; arr [ i ] = typeof v === "bigint" ? v as bigint : BigInt ( Math . trunc ( Number ( v ?? 0 ) ) ) ; }
1063- result . push ( { name : colName , dtype : "int64" , values : arr . buffer } ) ;
1085+ for ( let i = 0 ; i < rows . length ; i ++ ) { const v = rows [ i ] [ colName ] ; arr [ i ] = v != null ? ( typeof v === "bigint" ? v as bigint : BigInt ( Math . trunc ( Number ( v ) ) ) ) : 0n ; }
1086+ result . push ( { name : colName , dtype : "int64" , values : arr . buffer , ... ( hasNull && { nullBitmap } ) } ) ;
10641087 } else {
10651088 const arr = new Float64Array ( rows . length ) ;
10661089 for ( let i = 0 ; i < rows . length ; i ++ ) { const v = rows [ i ] [ colName ] ; arr [ i ] = v != null ? v as number : 0 ; }
1067- result . push ( { name : colName , dtype : "float64" , values : arr . buffer } ) ;
1090+ result . push ( { name : colName , dtype : "float64" , values : arr . buffer , ... ( hasNull && { nullBitmap } ) } ) ;
10681091 }
10691092 } else if ( typeof sample === "bigint" ) {
10701093 const arr = new BigInt64Array ( rows . length ) ;
10711094 for ( let i = 0 ; i < rows . length ; i ++ ) { const v = rows [ i ] [ colName ] ; arr [ i ] = v != null ? v as bigint : 0n ; }
1072- result . push ( { name : colName , dtype : "int64" , values : arr . buffer } ) ;
1095+ result . push ( { name : colName , dtype : "int64" , values : arr . buffer , ... ( hasNull && { nullBitmap } ) } ) ;
10731096 } else if ( typeof sample === "boolean" ) {
10741097 const byteCount = Math . ceil ( rows . length / 8 ) ;
10751098 const boolBuf = new Uint8Array ( byteCount ) ;
10761099 for ( let i = 0 ; i < rows . length ; i ++ ) {
10771100 if ( rows [ i ] [ colName ] ) boolBuf [ i >> 3 ] |= 1 << ( i & 7 ) ;
10781101 }
1079- result . push ( { name : colName , dtype : "bool" , values : boolBuf . buffer , rowCount : rows . length } ) ;
1102+ result . push ( { name : colName , dtype : "bool" , values : boolBuf . buffer , rowCount : rows . length , ... ( hasNull && { nullBitmap } ) } ) ;
10801103 } else if ( typeof sample === "string" ) {
10811104 const enc = textEncoder ;
10821105 const encoded : Uint8Array [ ] = new Array ( rows . length ) ;
10831106 let totalLen = 0 ;
10841107 for ( let i = 0 ; i < rows . length ; i ++ ) {
1085- encoded [ i ] = enc . encode ( String ( rows [ i ] [ colName ] ?? "" ) ) ;
1108+ encoded [ i ] = enc . encode ( rows [ i ] [ colName ] != null ? String ( rows [ i ] [ colName ] ) : "" ) ;
10861109 totalLen += 4 + encoded [ i ] . length ;
10871110 }
10881111 const buf = new Uint8Array ( totalLen ) ;
@@ -1094,7 +1117,7 @@ export function rowsToColumnArrays(rows: Record<string, unknown>[]): FragmentCol
10941117 buf . set ( encoded [ i ] , off ) ;
10951118 off += encoded [ i ] . length ;
10961119 }
1097- result . push ( { name : colName , dtype : "utf8" , values : buf . buffer } ) ;
1120+ result . push ( { name : colName , dtype : "utf8" , values : buf . buffer , ... ( hasNull && { nullBitmap } ) } ) ;
10981121 }
10991122 }
11001123 return result ;
0 commit comments