Skip to content

Commit df7a0ee

Browse files
committed
address comments
1 parent 528f04c commit df7a0ee

File tree

5 files changed

+47
-135
lines changed

5 files changed

+47
-135
lines changed

parquet-variant-json/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,5 @@
3434
mod from_json;
3535
mod to_json;
3636

37-
pub use from_json::JsonToVariant;
38-
pub use from_json::append_json;
37+
pub use from_json::{JsonToVariant, append_json};
3938
pub use to_json::VariantToJson;

parquet-variant/src/builder.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
// KIND, either express or implied. See the License for the
1515
// specific language governing permissions and limitations
1616
// under the License.
17-
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
17+
use crate::decoder::{OffsetSizeBytes, VariantBasicType, VariantPrimitiveType};
1818
use crate::{
1919
ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantList,
2020
VariantMetadata, VariantObject,
@@ -43,12 +43,12 @@ fn short_string_header(len: usize) -> u8 {
4343
(len as u8) << 2 | VariantBasicType::ShortString as u8
4444
}
4545

46-
pub(crate) fn int_size(v: usize) -> u8 {
46+
pub(crate) fn int_size(v: usize) -> OffsetSizeBytes {
4747
match v {
48-
0..=0xFF => 1,
49-
0x100..=0xFFFF => 2,
50-
0x10000..=0xFFFFFF => 3,
51-
_ => 4,
48+
0..=0xFF => OffsetSizeBytes::One,
49+
0x100..=0xFFFF => OffsetSizeBytes::Two,
50+
0x10000..=0xFFFFFF => OffsetSizeBytes::Three,
51+
_ => OffsetSizeBytes::Four,
5252
}
5353
}
5454

parquet-variant/src/builder/list.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ impl<'a, S: BuilderSpecificState> ListBuilder<'a, S> {
174174
// Make sure to reserve enough capacity to handle the extra bytes we'll truncate.
175175
let mut bytes_to_splice = Vec::with_capacity(header_size + 3);
176176
// Write header
177-
let header = array_header(is_large, offset_size);
177+
let header = array_header(is_large, offset_size as _);
178178
bytes_to_splice.push(header);
179179

180180
append_packed_u32(&mut bytes_to_splice, num_elements as u32, num_elements_size);

parquet-variant/src/builder/metadata.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ impl WritableMetadataBuilder {
206206

207207
// Determine appropriate offset size based on the larger of dict size or total string size
208208
let max_offset = std::cmp::max(total_dict_size, nkeys);
209-
let offset_size = int_size(max_offset);
209+
let offset_size = int_size(max_offset) as u8;
210210

211211
let offset_start = 1 + offset_size as usize;
212212
let string_start = offset_start + (nkeys + 1) * offset_size as usize;

parquet-variant/src/builder/object.rs

Lines changed: 38 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,14 @@ impl<const OFFSET_SIZE: u8, const ID_SIZE: u8> ObjectHeaderWriter<OFFSET_SIZE, I
4242
data_size: usize,
4343
) {
4444
let is_large = num_fields > u8::MAX as usize;
45-
match is_large {
46-
true => {
47-
dst.push(object_header::<1, { ID_SIZE }, { OFFSET_SIZE }>());
48-
// num_fields will consume 4 bytes when it is larger than u8::MAX
49-
append_packed_u32::<4>(dst, num_fields);
50-
}
51-
false => {
52-
dst.push(object_header::<0, { ID_SIZE }, { OFFSET_SIZE }>());
53-
append_packed_u32::<1>(dst, num_fields);
54-
}
55-
};
45+
// num_fields will consume 4 bytes when it is larger than u8::MAX
46+
if is_large {
47+
dst.push(object_header::<1, { ID_SIZE }, { OFFSET_SIZE }>());
48+
append_packed_u32::<4>(dst, num_fields);
49+
} else {
50+
dst.push(object_header::<0, { ID_SIZE }, { OFFSET_SIZE }>());
51+
append_packed_u32::<1>(dst, num_fields);
52+
}
5653

5754
for id in field_ids {
5855
append_packed_u32::<ID_SIZE>(dst, id as usize);
@@ -267,7 +264,7 @@ impl<'a, S: BuilderSpecificState> ObjectBuilder<'a, S> {
267264
});
268265

269266
let max_id = self.fields.iter().map(|(i, _)| *i).max().unwrap_or(0);
270-
let id_size: u8 = int_size(max_id as usize);
267+
let id_size = int_size(max_id as usize);
271268

272269
let starting_offset = self.parent_state.saved_value_builder_offset;
273270
let value_builder = self.parent_state.value_builder();
@@ -286,120 +283,36 @@ impl<'a, S: BuilderSpecificState> ObjectBuilder<'a, S> {
286283

287284
let mut bytes_to_splice = Vec::with_capacity(header_size);
288285

286+
macro_rules! write_header {
287+
($offset_size:expr, $id_size:expr) => {
288+
ObjectHeaderWriter::<{ $offset_size as u8 }, { $id_size as u8 }>::write(
289+
&mut bytes_to_splice,
290+
num_fields,
291+
self.fields.keys().copied(),
292+
self.fields.values().copied(),
293+
data_size,
294+
)
295+
};
296+
}
297+
298+
use crate::decoder::OffsetSizeBytes::*;
289299
match (offset_size, id_size) {
290-
(1, 1) => ObjectHeaderWriter::<1, 1>::write(
291-
&mut bytes_to_splice,
292-
num_fields,
293-
self.fields.keys().copied(),
294-
self.fields.values().copied(),
295-
data_size,
296-
),
297-
(1, 2) => ObjectHeaderWriter::<1, 2>::write(
298-
&mut bytes_to_splice,
299-
num_fields,
300-
self.fields.keys().copied(),
301-
self.fields.values().copied(),
302-
data_size,
303-
),
304-
(1, 3) => ObjectHeaderWriter::<1, 3>::write(
305-
&mut bytes_to_splice,
306-
num_fields,
307-
self.fields.keys().copied(),
308-
self.fields.values().copied(),
309-
data_size,
310-
),
311-
(1, 4) => ObjectHeaderWriter::<1, 4>::write(
312-
&mut bytes_to_splice,
313-
num_fields,
314-
self.fields.keys().copied(),
315-
self.fields.values().copied(),
316-
data_size,
317-
),
318-
(2, 1) => ObjectHeaderWriter::<2, 1>::write(
319-
&mut bytes_to_splice,
320-
num_fields,
321-
self.fields.keys().copied(),
322-
self.fields.values().copied(),
323-
data_size,
324-
),
325-
(2, 2) => ObjectHeaderWriter::<2, 2>::write(
326-
&mut bytes_to_splice,
327-
num_fields,
328-
self.fields.keys().copied(),
329-
self.fields.values().copied(),
330-
data_size,
331-
),
332-
(2, 3) => ObjectHeaderWriter::<2, 3>::write(
333-
&mut bytes_to_splice,
334-
num_fields,
335-
self.fields.keys().copied(),
336-
self.fields.values().copied(),
337-
data_size,
338-
),
339-
(2, 4) => ObjectHeaderWriter::<2, 4>::write(
340-
&mut bytes_to_splice,
341-
num_fields,
342-
self.fields.keys().copied(),
343-
self.fields.values().copied(),
344-
data_size,
345-
),
346-
(3, 1) => ObjectHeaderWriter::<3, 1>::write(
347-
&mut bytes_to_splice,
348-
num_fields,
349-
self.fields.keys().copied(),
350-
self.fields.values().copied(),
351-
data_size,
352-
),
353-
(3, 2) => ObjectHeaderWriter::<3, 2>::write(
354-
&mut bytes_to_splice,
355-
num_fields,
356-
self.fields.keys().copied(),
357-
self.fields.values().copied(),
358-
data_size,
359-
),
360-
(3, 3) => ObjectHeaderWriter::<3, 3>::write(
361-
&mut bytes_to_splice,
362-
num_fields,
363-
self.fields.keys().copied(),
364-
self.fields.values().copied(),
365-
data_size,
366-
),
367-
(3, 4) => ObjectHeaderWriter::<3, 4>::write(
368-
&mut bytes_to_splice,
369-
num_fields,
370-
self.fields.keys().copied(),
371-
self.fields.values().copied(),
372-
data_size,
373-
),
374-
(4, 1) => ObjectHeaderWriter::<4, 1>::write(
375-
&mut bytes_to_splice,
376-
num_fields,
377-
self.fields.keys().copied(),
378-
self.fields.values().copied(),
379-
data_size,
380-
),
381-
(4, 2) => ObjectHeaderWriter::<4, 2>::write(
382-
&mut bytes_to_splice,
383-
num_fields,
384-
self.fields.keys().copied(),
385-
self.fields.values().copied(),
386-
data_size,
387-
),
388-
(4, 3) => ObjectHeaderWriter::<4, 3>::write(
389-
&mut bytes_to_splice,
390-
num_fields,
391-
self.fields.keys().copied(),
392-
self.fields.values().copied(),
393-
data_size,
394-
),
395-
(4, 4) => ObjectHeaderWriter::<4, 4>::write(
396-
&mut bytes_to_splice,
397-
num_fields,
398-
self.fields.keys().copied(),
399-
self.fields.values().copied(),
400-
data_size,
401-
),
402-
_ => panic!("Unsupported offset_size/id_size combination"),
300+
(One, One) => write_header!(One, One),
301+
(One, Two) => write_header!(One, Two),
302+
(One, Three) => write_header!(One, Three),
303+
(One, Four) => write_header!(One, Four),
304+
(Two, One) => write_header!(Two, One),
305+
(Two, Two) => write_header!(Two, Two),
306+
(Two, Three) => write_header!(Two, Three),
307+
(Two, Four) => write_header!(Two, Four),
308+
(Three, One) => write_header!(Three, One),
309+
(Three, Two) => write_header!(Three, Two),
310+
(Three, Three) => write_header!(Three, Three),
311+
(Three, Four) => write_header!(Three, Four),
312+
(Four, One) => write_header!(Four, One),
313+
(Four, Two) => write_header!(Four, Two),
314+
(Four, Three) => write_header!(Four, Three),
315+
(Four, Four) => write_header!(Four, Four),
403316
}
404317

405318
// Shift existing data to make room for the header

0 commit comments

Comments
 (0)