Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,37 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
}
}

/// Returns the total number of bytes of all non-null values in this array.
///
/// Unlike [`Self::total_buffer_bytes_used`], this method includes inlined strings
/// (those with length ≤ [`MAX_INLINE_VIEW_LEN`]), making it suitable as a
/// capacity hint when pre-allocating output buffers.
///
/// Null values are excluded from the sum.
///
/// # Example
///
/// ```rust
/// # use arrow_array::StringViewArray;
/// let array = StringViewArray::from_iter(vec![
/// Some("hello"), // 5 bytes, inlined
/// None, // excluded
/// Some("large payload over 12 bytes"), // 27 bytes, non-inlined
/// ]);
/// assert_eq!(array.total_bytes_len(), 5 + 27);
/// ```
pub fn total_bytes_len(&self) -> usize {
match self.nulls() {
None => self.views().iter().map(|v| (*v as u32) as usize).sum(),
Some(nulls) => self
.views()
.iter()
.zip(nulls.iter())
.map(|(v, is_valid)| if is_valid { (*v as u32) as usize } else { 0 })
.sum(),
}
}

/// Returns the total number of bytes used by all non inlined views in all
/// buffers.
///
Expand Down Expand Up @@ -1809,4 +1840,41 @@ mod tests {
assert!(from_utf8(array.value(2)).is_ok());
array
}

#[test]
fn test_total_bytes_len() {
// inlined: "hello"=5, "world"=5, "lulu"=4 → 14
// non-inlined: "large payload over 12 bytes"=27
// null: should not count
let mut builder = StringViewBuilder::new();
builder.append_value("hello");
builder.append_value("world");
builder.append_value("lulu");
builder.append_null();
builder.append_value("large payload over 12 bytes");
let array = builder.finish();
assert_eq!(array.total_bytes_len(), 5 + 5 + 4 + 27);
}

#[test]
fn test_total_bytes_len_empty() {
let array = StringViewArray::from_iter::<Vec<Option<&str>>>(vec![]);
assert_eq!(array.total_bytes_len(), 0);
}

#[test]
fn test_total_bytes_len_all_nulls() {
let array = StringViewArray::new_null(5);
assert_eq!(array.total_bytes_len(), 0);
}

#[test]
fn test_total_bytes_len_binary_view() {
let array = BinaryViewArray::from_iter(vec![
Some(b"hi".as_ref()),
None,
Some(b"large payload over 12 bytes".as_ref()),
]);
assert_eq!(array.total_bytes_len(), 2 + 27);
}
}
Loading