@@ -80,6 +80,7 @@ export class QueryDO implements DurableObject {
8080 case "/tables" : return this . handleListTables ( ) ;
8181 case "/meta" : return this . handleGetMeta ( request ) ;
8282 case "/diagnostics" : return this . handleDiagnostics ( ) ;
83+ case "/register-iceberg" : return this . handleRegisterIceberg ( request ) ;
8384 default : return new Response ( "Not found" , { status : 404 } ) ;
8485 }
8586 }
@@ -999,6 +1000,84 @@ export class QueryDO implements DurableObject {
9991000 return null ;
10001001 }
10011002
1003+ /** Register an Iceberg table by explicit metadata path (bypasses R2 list()). */
1004+ private async handleRegisterIceberg ( request : Request ) : Promise < Response > {
1005+ const { table, metadataKey } = ( await request . json ( ) ) as { table : string ; metadataKey : string } ;
1006+ if ( ! table || ! metadataKey ) return this . json ( { error : "Missing table or metadataKey" } , 400 ) ;
1007+
1008+ const result = await this . loadIcebergByKey ( table , metadataKey ) ;
1009+ if ( ! result ) return this . json ( { error : "Failed to load Iceberg metadata" } , 500 ) ;
1010+ return this . json ( { registered : true , table, totalRows : result . totalRows , files : result . parquetFiles . length } ) ;
1011+ }
1012+
1013+ /** Load an Iceberg table from an explicit metadata.json key (no R2 list() needed). */
1014+ private async loadIcebergByKey ( tableName : string , metadataKey : string ) : Promise < IcebergDatasetMeta | null > {
1015+ const metaObj = await this . env . DATA_BUCKET . get ( metadataKey ) ;
1016+ if ( ! metaObj ) return null ;
1017+
1018+ const metaJson = await metaObj . text ( ) ;
1019+ const icebergMeta = parseIcebergMetadata ( metaJson ) ;
1020+ if ( ! icebergMeta ) return null ;
1021+
1022+ // Derive prefix from metadataKey (e.g., "bench_iceberg_100k/metadata/v1.metadata.json" → "bench_iceberg_100k/")
1023+ const prefix = metadataKey . replace ( / m e t a d a t a \/ .* $ / , "" ) ;
1024+
1025+ const manifestListKey = `${ prefix } ${ icebergMeta . manifestListPath } ` ;
1026+ const manifestListObj = await this . env . DATA_BUCKET . get ( manifestListKey ) ;
1027+ if ( ! manifestListObj ) return null ;
1028+
1029+ const manifestListBytes = await manifestListObj . arrayBuffer ( ) ;
1030+ const parquetPaths = extractParquetPathsFromManifest ( manifestListBytes ) ;
1031+ if ( parquetPaths . length === 0 ) return null ;
1032+
1033+ // Load each Parquet file's metadata
1034+ const fragmentMetas = new Map < number , TableMeta > ( ) ;
1035+ let totalRows = 0 ;
1036+ for ( let i = 0 ; i < parquetPaths . length ; i ++ ) {
1037+ const parquetKey = parquetPaths [ i ] . startsWith ( prefix ) ? parquetPaths [ i ] : `${ prefix } ${ parquetPaths [ i ] } ` ;
1038+ const head = await this . env . DATA_BUCKET . head ( parquetKey ) ;
1039+ if ( ! head ) continue ;
1040+
1041+ const fileSize = BigInt ( head . size ) ;
1042+ const tailObj = await this . env . DATA_BUCKET . get ( parquetKey , {
1043+ range : { offset : Math . max ( 0 , Number ( fileSize ) - 8 ) , length : Math . min ( 8 , Number ( fileSize ) ) } ,
1044+ } ) ;
1045+ if ( ! tailObj ) continue ;
1046+
1047+ const tailBuf = await tailObj . arrayBuffer ( ) ;
1048+ const footerLen = getParquetFooterLength ( tailBuf ) ;
1049+ if ( ! footerLen ) continue ;
1050+
1051+ const footerObj = await this . env . DATA_BUCKET . get ( parquetKey , {
1052+ range : { offset : Number ( fileSize ) - footerLen - 8 , length : footerLen } ,
1053+ } ) ;
1054+ if ( ! footerObj ) continue ;
1055+
1056+ const parquetFileMeta = parseParquetFooter ( await footerObj . arrayBuffer ( ) ) ;
1057+ if ( ! parquetFileMeta ) continue ;
1058+
1059+ const meta = parquetMetaToTableMeta ( parquetFileMeta , parquetKey , fileSize ) ;
1060+ meta . name = parquetPaths [ i ] ;
1061+ fragmentMetas . set ( i , meta ) ;
1062+ totalRows += meta . totalRows ;
1063+ }
1064+
1065+ if ( fragmentMetas . size === 0 ) return null ;
1066+
1067+ const dataset : IcebergDatasetMeta = {
1068+ name : tableName , r2Prefix : prefix ,
1069+ schema : icebergMeta . schema , snapshotId : icebergMeta . currentSnapshotId ,
1070+ parquetFiles : parquetPaths , fragmentMetas, totalRows, updatedAt : Date . now ( ) ,
1071+ } ;
1072+ this . datasetCache . set ( tableName , {
1073+ name : tableName , r2Prefix : prefix ,
1074+ manifest : { version : 0 , fragments : parquetPaths . map ( ( p , idx ) => ( { id : idx , filePath : p , physicalRows : 0 } ) ) , totalRows, schema : [ ] } ,
1075+ fragmentMetas, totalRows, updatedAt : Date . now ( ) ,
1076+ } ) ;
1077+ this . evictDatasetCache ( ) ;
1078+ return dataset ;
1079+ }
1080+
10021081 /** Discover an Iceberg table in R2 by listing metadata/ for .metadata.json files. */
10031082 private async loadIcebergFromR2 ( tableName : string ) : Promise < IcebergDatasetMeta | null > {
10041083 for ( const prefix of [ `${ tableName } /` , `data/${ tableName } /` ] ) {
0 commit comments