Skip to content

Commit acdbbe0

Browse files
Dandandanclaude
andcommitted
Introduce FromBitpacked trait and use debug_assert in BitReader
Replace unreachable!() stubs for from_u64 on Int96/ByteArray/FixedLenByteArray with a separate FromBitpacked trait that is only implemented for types that can actually be converted from u64 (primitives, floats, bool). Also convert assert! to debug_assert! for num_bits bounds checks in get_value/get_batch. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0c1ec22 commit acdbbe0

File tree

4 files changed

+37
-23
lines changed

4 files changed

+37
-23
lines changed

parquet/src/arrow/array_reader/byte_array_dictionary.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use crate::column::reader::decoder::ColumnValueDecoder;
3434
use crate::encodings::rle::RleDecoder;
3535
use crate::errors::{ParquetError, Result};
3636
use crate::schema::types::ColumnDescPtr;
37-
use crate::util::bit_util::FromBytes;
37+
use crate::util::bit_util::FromBitpacked;
3838

3939
/// A macro to reduce verbosity of [`make_byte_array_dictionary_reader`]
4040
macro_rules! make_reader {
@@ -128,7 +128,7 @@ struct ByteArrayDictionaryReader<K: ArrowNativeType, V: OffsetSizeTrait> {
128128

129129
impl<K, V> ByteArrayDictionaryReader<K, V>
130130
where
131-
K: FromBytes + Ord + ArrowNativeType,
131+
K: FromBitpacked + Ord + ArrowNativeType,
132132
V: OffsetSizeTrait,
133133
{
134134
fn new(
@@ -148,7 +148,7 @@ where
148148

149149
impl<K, V> ArrayReader for ByteArrayDictionaryReader<K, V>
150150
where
151-
K: FromBytes + Ord + ArrowNativeType,
151+
K: FromBitpacked + Ord + ArrowNativeType,
152152
V: OffsetSizeTrait,
153153
{
154154
fn as_any(&self) -> &dyn Any {
@@ -226,7 +226,7 @@ struct DictionaryDecoder<K, V> {
226226

227227
impl<K, V> ColumnValueDecoder for DictionaryDecoder<K, V>
228228
where
229-
K: FromBytes + Ord + ArrowNativeType,
229+
K: FromBitpacked + Ord + ArrowNativeType,
230230
V: OffsetSizeTrait,
231231
{
232232
type Buffer = DictionaryBuffer<K, V>;

parquet/src/encodings/decoding.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use crate::encodings::decoding::byte_stream_split_decoder::{
3131
};
3232
use crate::errors::{ParquetError, Result};
3333
use crate::schema::types::ColumnDescPtr;
34-
use crate::util::bit_util::{self, BitReader};
34+
use crate::util::bit_util::{self, BitReader, FromBitpacked};
3535

3636
mod byte_stream_split_decoder;
3737

@@ -455,7 +455,10 @@ impl<T: DataType> RleValueDecoder<T> {
455455
}
456456
}
457457

458-
impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
458+
impl<T: DataType> Decoder<T> for RleValueDecoder<T>
459+
where
460+
T::T: FromBitpacked,
461+
{
459462
#[inline]
460463
fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
461464
// Only support RLE value reader for boolean values with bit width of 1.
@@ -658,7 +661,7 @@ where
658661

659662
impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T>
660663
where
661-
T::T: Default + FromPrimitive + WrappingAdd + Copy,
664+
T::T: Default + FromPrimitive + FromBitpacked + WrappingAdd + Copy,
662665
{
663666
// # of total values is derived from encoding
664667
#[inline]

parquet/src/encodings/rle.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ use std::{cmp, mem::size_of};
3939
use bytes::Bytes;
4040

4141
use crate::errors::{ParquetError, Result};
42-
use crate::util::bit_util::{self, BitReader, BitWriter, FromBytes};
42+
use crate::util::bit_util::{self, BitReader, BitWriter, FromBitpacked};
4343

4444
/// Maximum groups of 8 values per bit-packed run. Current value is 64.
4545
const MAX_GROUPS_PER_BIT_PACKED_RUN: usize = 1 << 6;
@@ -352,7 +352,7 @@ impl RleDecoder {
352352
// that damage L1d-cache occupancy. This results in a ~18% performance drop
353353
#[inline(never)]
354354
#[allow(unused)]
355-
pub fn get<T: FromBytes>(&mut self) -> Result<Option<T>> {
355+
pub fn get<T: FromBitpacked>(&mut self) -> Result<Option<T>> {
356356
assert!(size_of::<T>() <= 8);
357357

358358
while self.rle_left == 0 && self.bit_packed_left == 0 {
@@ -388,7 +388,7 @@ impl RleDecoder {
388388
}
389389

390390
#[inline(never)]
391-
pub fn get_batch<T: FromBytes + Clone>(&mut self, buffer: &mut [T]) -> Result<usize> {
391+
pub fn get_batch<T: FromBitpacked + Clone>(&mut self, buffer: &mut [T]) -> Result<usize> {
392392
assert!(size_of::<T>() <= 8);
393393

394394
let mut values_read = 0;

parquet/src/util/bit_util.rs

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ pub unsafe trait FromBytes: Sized {
4444
type Buffer: AsMut<[u8]> + Default;
4545
fn try_from_le_slice(b: &[u8]) -> Result<Self>;
4646
fn from_le_bytes(bs: Self::Buffer) -> Self;
47+
}
48+
49+
/// Types that can be decoded from bitpacked representations.
50+
///
51+
/// This is implemented for primitive types and bool that can be
52+
/// directly converted from a u64 value. Types like Int96, ByteArray,
53+
/// and FixedLenByteArray that cannot be represented in 64 bits do not
54+
/// implement this trait.
55+
pub trait FromBitpacked: FromBytes {
4756
/// Convert directly from a u64 value by truncation, avoiding byte slice copies.
4857
fn from_u64(v: u64) -> Self;
4958
}
@@ -61,6 +70,8 @@ macro_rules! from_le_bytes {
6170
fn from_le_bytes(bs: Self::Buffer) -> Self {
6271
<$ty>::from_le_bytes(bs)
6372
}
73+
}
74+
impl FromBitpacked for $ty {
6475
#[inline]
6576
fn from_u64(v: u64) -> Self {
6677
v as Self
@@ -82,6 +93,9 @@ unsafe impl FromBytes for f32 {
8293
fn from_le_bytes(bs: Self::Buffer) -> Self {
8394
f32::from_le_bytes(bs)
8495
}
96+
}
97+
98+
impl FromBitpacked for f32 {
8599
#[inline]
86100
fn from_u64(v: u64) -> Self {
87101
f32::from_bits(v as u32)
@@ -98,6 +112,9 @@ unsafe impl FromBytes for f64 {
98112
fn from_le_bytes(bs: Self::Buffer) -> Self {
99113
f64::from_le_bytes(bs)
100114
}
115+
}
116+
117+
impl FromBitpacked for f64 {
101118
#[inline]
102119
fn from_u64(v: u64) -> Self {
103120
f64::from_bits(v)
@@ -115,6 +132,9 @@ unsafe impl FromBytes for bool {
115132
fn from_le_bytes(bs: Self::Buffer) -> Self {
116133
bs[0] != 0
117134
}
135+
}
136+
137+
impl FromBitpacked for bool {
118138
#[inline]
119139
fn from_u64(v: u64) -> Self {
120140
v != 0
@@ -146,9 +166,6 @@ unsafe impl FromBytes for Int96 {
146166
);
147167
i
148168
}
149-
fn from_u64(_v: u64) -> Self {
150-
unreachable!("Int96 does not support from_u64")
151-
}
152169
}
153170

154171
// SAFETY: BIT_CAPACITY is 0.
@@ -162,9 +179,6 @@ unsafe impl FromBytes for ByteArray {
162179
fn from_le_bytes(bs: Self::Buffer) -> Self {
163180
bs.into()
164181
}
165-
fn from_u64(_v: u64) -> Self {
166-
unreachable!("ByteArray does not support from_u64")
167-
}
168182
}
169183

170184
// SAFETY: BIT_CAPACITY is 0.
@@ -178,9 +192,6 @@ unsafe impl FromBytes for FixedLenByteArray {
178192
fn from_le_bytes(bs: Self::Buffer) -> Self {
179193
bs.into()
180194
}
181-
fn from_u64(_v: u64) -> Self {
182-
unreachable!("FixedLenByteArray does not support from_u64")
183-
}
184195
}
185196

186197
/// Reads `size` of bytes from `src`, and reinterprets them as type `ty`, in
@@ -464,7 +475,7 @@ impl BitReader {
464475
/// Reads a value of type `T` and of size `num_bits`.
465476
///
466477
/// Returns `None` if there's not enough data available. `Some` otherwise.
467-
pub fn get_value<T: FromBytes>(&mut self, num_bits: usize) -> Option<T> {
478+
pub fn get_value<T: FromBitpacked>(&mut self, num_bits: usize) -> Option<T> {
468479
assert!(num_bits <= 64);
469480
assert!(num_bits <= size_of::<T>() * 8);
470481

@@ -507,8 +518,8 @@ impl BitReader {
507518
/// This function panics if
508519
/// - `num_bits` is larger than the bit-capacity of `T`
509520
///
510-
pub fn get_batch<T: FromBytes>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
511-
assert!(num_bits <= size_of::<T>() * 8);
521+
pub fn get_batch<T: FromBitpacked>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
522+
debug_assert!(num_bits <= size_of::<T>() * 8);
512523

513524
let mut values_to_read = batch.len();
514525
let needed_bits = num_bits * values_to_read;
@@ -1074,7 +1085,7 @@ mod tests {
10741085

10751086
fn test_get_batch_helper<T>(total: usize, num_bits: usize)
10761087
where
1077-
T: FromBytes + Default + Clone + Debug + Eq,
1088+
T: FromBitpacked + Default + Clone + Debug + Eq,
10781089
{
10791090
assert!(num_bits <= 64);
10801091
let num_bytes = ceil(num_bits, 8);

0 commit comments

Comments
 (0)