1+ /**
2+ * @import {DataReader, ThriftObject, ThriftType} from '../src/types.d.ts'
3+ */
4+
15// TCompactProtocol types
2- export const CompactType = {
3- STOP : 0 ,
4- TRUE : 1 ,
5- FALSE : 2 ,
6- BYTE : 3 ,
7- I16 : 4 ,
8- I32 : 5 ,
9- I64 : 6 ,
10- DOUBLE : 7 ,
11- BINARY : 8 ,
12- LIST : 9 ,
13- SET : 10 ,
14- MAP : 11 ,
15- STRUCT : 12 ,
16- UUID : 13 ,
17- }
6+ const STOP = 0
7+ const TRUE = 1
8+ const FALSE = 2
9+ const BYTE = 3
10+ const I16 = 4
11+ const I32 = 5
12+ const I64 = 6
13+ const DOUBLE = 7
14+ const BINARY = 8
15+ const LIST = 9
16+ const STRUCT = 12
1817
1918/**
2019 * Parse TCompactProtocol
@@ -23,20 +22,17 @@ export const CompactType = {
2322 * @returns {{ [key: `field_${number}`]: any } }
2423 */
2524export function deserializeTCompactProtocol ( reader ) {
26- let lastFid = 0
2725 /** @type {ThriftObject } */
2826 const value = { }
27+ let fid = 0
2928
3029 while ( reader . offset < reader . view . byteLength ) {
3130 // Parse each field based on its type and add to the result object
32- const [ type , fid , newLastFid ] = readFieldBegin ( reader , lastFid )
33- lastFid = newLastFid
34-
35- if ( type === CompactType . STOP ) {
36- break
37- }
38-
39- // Handle the field based on its type
31+ const byte = reader . view . getUint8 ( reader . offset ++ )
32+ const type = byte & 0x0f
33+ if ( type === STOP ) break
34+ const delta = byte >> 4
35+ fid = delta ? fid + delta : readZigZag ( reader )
4036 value [ `field_${ fid } ` ] = readElement ( reader , type )
4137 }
4238
@@ -46,73 +42,59 @@ export function deserializeTCompactProtocol(reader) {
4642/**
4743 * Read a single element based on its type
4844 *
49- * @import {DataReader, ThriftObject, ThriftType} from '../src/types.d.ts'
5045 * @param {DataReader } reader
5146 * @param {number } type
5247 * @returns {ThriftType }
5348 */
5449function readElement ( reader , type ) {
5550 switch ( type ) {
56- case CompactType . TRUE :
51+ case TRUE :
5752 return true
58- case CompactType . FALSE :
53+ case FALSE :
5954 return false
60- case CompactType . BYTE :
61- // read byte directly
55+ case BYTE :
6256 return reader . view . getInt8 ( reader . offset ++ )
63- case CompactType . I16 :
64- case CompactType . I32 :
57+ case I16 :
58+ case I32 :
6559 return readZigZag ( reader )
66- case CompactType . I64 :
60+ case I64 :
6761 return readZigZagBigInt ( reader )
68- case CompactType . DOUBLE : {
62+ case DOUBLE : {
6963 const value = reader . view . getFloat64 ( reader . offset , true )
7064 reader . offset += 8
7165 return value
7266 }
73- case CompactType . BINARY : {
67+ case BINARY : {
7468 const stringLength = readVarInt ( reader )
7569 const strBytes = new Uint8Array ( reader . view . buffer , reader . view . byteOffset + reader . offset , stringLength )
7670 reader . offset += stringLength
7771 return strBytes
7872 }
79- case CompactType . LIST : {
73+ case LIST : {
8074 const byte = reader . view . getUint8 ( reader . offset ++ )
8175 const elemType = byte & 0x0f
8276 let listSize = byte >> 4
8377 if ( listSize === 15 ) {
8478 listSize = readVarInt ( reader )
8579 }
86- const boolType = elemType === CompactType . TRUE || elemType === CompactType . FALSE
80+ const boolType = elemType === TRUE || elemType === FALSE
8781 const values = new Array ( listSize )
8882 for ( let i = 0 ; i < listSize ; i ++ ) {
89- values [ i ] = boolType ? readElement ( reader , CompactType . BYTE ) === 1 : readElement ( reader , elemType )
83+ values [ i ] = boolType ? readElement ( reader , BYTE ) === 1 : readElement ( reader , elemType )
9084 }
9185 return values
9286 }
93- case CompactType . STRUCT : {
94- /** @type {ThriftObject } */
95- const structValues = { }
96- let lastFid = 0
97- while ( true ) {
98- const [ fieldType , fid , newLastFid ] = readFieldBegin ( reader , lastFid )
99- lastFid = newLastFid
100- if ( fieldType === CompactType . STOP ) {
101- break
102- }
103- structValues [ `field_${ fid } ` ] = readElement ( reader , fieldType )
104- }
105- return structValues
106- }
107- // TODO: MAP, SET, UUID
87+ case STRUCT :
88+ // main function handles struct parsing
89+ return deserializeTCompactProtocol ( reader )
10890 default :
91+ // MAP, SET, UUID not used by parquet
10992 throw new Error ( `thrift unhandled type: ${ type } ` )
11093 }
11194}
11295
11396/**
114- * Var int aka Unsigned LEB128.
115- * Reads groups of 7 low bits until high bit is 0.
97+ * Read varint aka Unsigned LEB128.
11698 *
11799 * @param {DataReader } reader
118100 * @returns {number }
@@ -121,6 +103,7 @@ export function readVarInt(reader) {
121103 let result = 0
122104 let shift = 0
123105 while ( true ) {
106+ // Read groups of 7 low bits until high bit is 0
124107 const byte = reader . view . getUint8 ( reader . offset ++ )
125108 result |= ( byte & 0x7f ) << shift
126109 if ( ! ( byte & 0x80 ) ) {
@@ -150,46 +133,24 @@ function readVarBigInt(reader) {
150133}
151134
152135/**
153- * Values of type int32 and int64 are transformed to a zigzag int .
154- * A zigzag int folds positive and negative numbers into the positive number space.
136+ * Read a zigzag number .
137+ * Zigzag folds positive and negative numbers into the positive number space.
155138 *
156139 * @param {DataReader } reader
157140 * @returns {number }
158141 */
159142export function readZigZag ( reader ) {
160143 const zigzag = readVarInt ( reader )
161- // convert zigzag to int
162144 return zigzag >>> 1 ^ - ( zigzag & 1 )
163145}
164146
165147/**
166- * A zigzag int folds positive and negative numbers into the positive number space.
167- * This version returns a BigInt.
148+ * Read a zigzag bigint.
168149 *
169150 * @param {DataReader } reader
170151 * @returns {bigint }
171152 */
172153export function readZigZagBigInt ( reader ) {
173154 const zigzag = readVarBigInt ( reader )
174- // convert zigzag to int
175155 return zigzag >> 1n ^ - ( zigzag & 1n )
176156}
177-
178- /**
179- * Read field type and field id
180- *
181- * @param {DataReader } reader
182- * @param {number } lastFid
183- * @returns {[number, number, number] } [type, fid, newLastFid]
184- */
185- function readFieldBegin ( reader , lastFid ) {
186- const byte = reader . view . getUint8 ( reader . offset ++ )
187- const type = byte & 0x0f
188- if ( type === CompactType . STOP ) {
189- // STOP also ends a struct
190- return [ 0 , 0 , lastFid ]
191- }
192- const delta = byte >> 4
193- const fid = delta ? lastFid + delta : readZigZag ( reader )
194- return [ type , fid , fid ]
195- }
0 commit comments