Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4cb1014
copy all unordered row code and update tests
rluvaton Jan 4, 2026
50700ba
fix one test and set the other to ignore
rluvaton Jan 4, 2026
9e5ea8b
added more tests with nullability
rluvaton Jan 4, 2026
7fa72b8
fix variable length parsing
rluvaton Jan 4, 2026
931ee03
improve variable encoding by avoiding blocks and improve boolean by u…
rluvaton Jan 4, 2026
7cf64c9
change do bench to compare both impl
rluvaton Jan 4, 2026
d356454
add bench large and move small strings before empty strings
rluvaton Jan 4, 2026
7fad286
tried special impl for 4 of the same type
rluvaton Jan 4, 2026
15adf69
slower
rluvaton Jan 5, 2026
00a92e1
make some faster and broke others
rluvaton Jan 5, 2026
f0b6568
over complicated to see something
rluvaton Jan 5, 2026
79f0f7f
add null encoding
rluvaton Jan 7, 2026
3f3349e
fix some
rluvaton Jan 7, 2026
e3367c1
fix some more
rluvaton Jan 7, 2026
266edcf
fix encoding
rluvaton Jan 7, 2026
b19d02f
fix and add test
rluvaton Jan 7, 2026
49edf84
for single column encode nulls as before
rluvaton Jan 7, 2026
de6e6e6
wip
rluvaton Jan 11, 2026
25c9bfc
Merge branch 'main' into unordered-row-convertor
rluvaton Jan 12, 2026
9575090
add encode with null and fix
rluvaton Jan 12, 2026
a166272
fix nulls
rluvaton Jan 12, 2026
6568cba
Merge branch 'main' into unordered-row-convertor
rluvaton Jan 13, 2026
d22b872
revert row format benchmark
rluvaton Jan 13, 2026
15e694c
change row format benchmark to use the unordered row converter
rluvaton Jan 13, 2026
ca7d701
improve list encoding
rluvaton Jan 14, 2026
527efa1
cleanup and comment
rluvaton Jan 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ mod fixed;
mod list;
mod run;
mod variable;
pub mod unordered_row;

/// Converts [`ArrayRef`] columns into a [row-oriented](self) format.
///
Expand Down
118 changes: 118 additions & 0 deletions arrow-row/src/unordered_row/boolean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use arrow_array::BooleanArray;
use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType};
use super::fixed::{FixedLengthEncoding, split_off};

pub(super) const FIXED_SIZE: usize = 1;

// Inline always to make sure the other dedicated functions will have optimized away the valid
#[inline(always)]
fn encode_bool_and_null(mut value: bool, valid: bool) -> u8 {
// if valid is false, set value to false
// if valid is true take the value
value = value & valid;

let value_bit = value as u8;
let valid_bit = valid as u8;

// Doing shift on the valid bit and not on the value bit, so in case when there is no nulls we can avoid the shift and it will be optimized away
valid_bit << 1 | value_bit
}

fn encode_bool_and_nullable_with_no_nulls(value: bool) -> u8 {
encode_bool_and_null(value, true)
}

fn decode_null_and_bool(encoded: u8) -> (bool, bool) {
// we know if the value is valid if it is not 0
// as for invalid we set also the value bit to 0
let is_valid = encoded != 0;
let value = encoded & 1 == 1;

(is_valid, value)
}

/// Boolean values are encoded as
///
/// - 1 byte `0` if null or `1` if valid
/// - bytes of [`crate::unordered_row::fixed::FixedLengthEncoding`]
pub fn encode_boolean(
data: &mut [u8],
offsets: &mut [usize],
values: &BooleanBuffer,
nulls: &NullBuffer,
) {
for (idx, (value, is_valid)) in values.iter().zip(nulls.iter()).enumerate() {
let offset = &mut offsets[idx + 1];
data[*offset] = encode_bool_and_null(value, is_valid);
*offset += 1;
}
}

/// Encoding for non-nullable boolean arrays.
/// Iterates directly over `values`, and skips NULLs-checking.
pub fn encode_boolean_not_null(
data: &mut [u8],
offsets: &mut [usize],
values: &BooleanBuffer,
) {
for (idx, value) in values.iter().enumerate() {
let offset = &mut offsets[idx + 1];
data[*offset] = encode_bool_and_nullable_with_no_nulls(value);
*offset += 1;
}
}

/// Decodes a `BooleanArray` from rows
pub fn decode_bool(rows: &mut [&[u8]]) -> BooleanArray {
let len = rows.len();

let mut null_count = 0;
let mut nulls = MutableBuffer::new(bit_util::ceil(len, 64) * 8);
let mut values = MutableBuffer::new(bit_util::ceil(len, 64) * 8);

let chunks = len / 64;
let remainder = len % 64;
for chunk in 0..chunks {
let mut null_packed = 0;
let mut values_packed = 0;

for bit_idx in 0..64 {
let i = split_off(&mut rows[bit_idx + chunk * 64], 1);
let (is_valid, value) = decode_null_and_bool(i[0]);
null_count += !is_valid as usize;
null_packed |= (is_valid as u64) << bit_idx;
values_packed |= (value as u64) << bit_idx;
}

nulls.push(null_packed);
values.push(values_packed);
}

if remainder != 0 {
let mut null_packed = 0;
let mut values_packed = 0;

for bit_idx in 0..remainder {
let i = split_off(&mut rows[bit_idx + chunks * 64], 1);
let (is_valid, value) = decode_null_and_bool(i[0]);
null_count += !is_valid as usize;
null_packed |= (is_valid as u64) << bit_idx;
values_packed |= (value as u64) << bit_idx;
}

nulls.push(null_packed);
values.push(values_packed);
}

let builder = ArrayDataBuilder::new(DataType::Boolean)
.len(rows.len())
.null_count(null_count)
.add_buffer(values.into())
.null_bit_buffer(Some(nulls.into()));

// SAFETY:
// Buffers are the correct length
unsafe { BooleanArray::from(builder.build_unchecked()) }
}
Loading
Loading