Skip to content

Commit ac81eb2

Browse files
alambetseidl
andauthored
Improve docs and add build() method to {Null,Boolean,}BufferBuilder (#9155)
# Which issue does this PR close? - Part of #9128 - Follow on to #9120 # Rationale for this change I am trying to encourage people to avoid using ArrayData when constructing arrays (as it is slower than just creating the arrays directly). Part of doing so is ensuring that the APIs to create the necessary pieces (NullBuffers in particular) are easy to use / well documented. As pointed out by @scovich on #9120 (comment), it is 1. Not obvious how `finish` works (resets the builder) 2. Why there is no `build` method (when there is a From impl) Thus, let's add `build` methods to `NullBufferBuilder` and document the difference between `finish` and `build` While I was working on this change, I noticed the same issue with `BufferBuilder` and `BooleanBufferBuilder` so I also made them consistent # What changes are included in this PR? 1. Improve docs and Add build() method to {Null,Boolean,}BufferBuilder # Are these changes tested? Yes by CI and new doc examples # Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. If there are any breaking changes to public APIs, please call them out. --> --------- Co-authored-by: Ed Seidl <etseidl@users.noreply.github.com>
1 parent 7a85e90 commit ac81eb2

File tree

4 files changed

+87
-24
lines changed

4 files changed

+87
-24
lines changed

arrow-buffer/src/buffer/null.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
1919
use crate::buffer::BooleanBuffer;
2020
use crate::{Buffer, MutableBuffer};
2121

22-
/// A [`BooleanBuffer`] used to encode validity for Arrow arrays
22+
/// A [`BooleanBuffer`] used to encode validity (null values) for Arrow arrays
2323
///
2424
/// In the [Arrow specification], array validity is encoded in a packed bitmask with a
2525
/// `true` value indicating the corresponding slot is not null, and `false` indicating
2626
/// that it is null.
2727
///
28-
/// `NullBuffer`s can be creating using [`NullBufferBuilder`]
28+
/// # See also
29+
/// * [`NullBufferBuilder`] for creating `NullBuffer`s
2930
///
3031
/// [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
3132
/// [`NullBufferBuilder`]: crate::NullBufferBuilder

arrow-buffer/src/builder/boolean.rs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,28 @@ use std::ops::Range;
2121

2222
/// Builder for [`BooleanBuffer`]
2323
///
24+
/// Builds a packed buffer of bits representing boolean values. Each bit in the
25+
/// buffer corresponds to a boolean value,
26+
///
2427
/// # See Also
2528
///
26-
/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
29+
/// * [`NullBufferBuilder`] for building [`BooleanBuffer`]s for representing nulls
30+
/// * [`BufferBuilder`] for building [`Buffer`]s
31+
///
32+
/// # Example
33+
/// ```
34+
/// # use arrow_buffer::builder::BooleanBufferBuilder;
35+
/// let mut builder = BooleanBufferBuilder::new(10);
36+
/// builder.append(true);
37+
/// builder.append(false);
38+
/// builder.append_n(3, true); // append 3 trues
39+
/// let buffer = builder.build();
40+
/// assert_eq!(buffer.len(), 5); // 5 bits appended
41+
/// assert_eq!(buffer.values(), &[0b00011101_u8]); // packed bits
42+
///```
2743
///
28-
/// [`NullBuffer`]: crate::NullBuffer
44+
/// [`BufferBuilder`]: crate::builder::BufferBuilder
45+
/// [`NullBufferBuilder`]: crate::builder::NullBufferBuilder
2946
#[derive(Debug)]
3047
pub struct BooleanBufferBuilder {
3148
buffer: MutableBuffer,
@@ -247,14 +264,24 @@ impl BooleanBufferBuilder {
247264
self.buffer.as_slice_mut()
248265
}
249266

250-
/// Creates a [`BooleanBuffer`]
267+
/// Resets this builder and returns a [`BooleanBuffer`].
268+
///
269+
/// Use [`Self::build`] when you don't need to reuse this builder.
251270
#[inline]
252271
pub fn finish(&mut self) -> BooleanBuffer {
253272
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
254273
let len = std::mem::replace(&mut self.len, 0);
255274
BooleanBuffer::new(buf.into(), 0, len)
256275
}
257276

277+
/// Builds a [`BooleanBuffer`] without resetting the builder.
278+
///
279+
/// This consumes the builder. Use [`Self::finish`] to reuse it.
280+
#[inline]
281+
pub fn build(self) -> BooleanBuffer {
282+
BooleanBuffer::new(self.buffer.into(), 0, self.len)
283+
}
284+
258285
/// Builds the [BooleanBuffer] without resetting the builder.
259286
pub fn finish_cloned(&self) -> BooleanBuffer {
260287
BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
@@ -285,7 +312,7 @@ impl From<BooleanBufferBuilder> for Buffer {
285312
impl From<BooleanBufferBuilder> for BooleanBuffer {
286313
#[inline]
287314
fn from(builder: BooleanBufferBuilder) -> Self {
288-
BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
315+
builder.build()
289316
}
290317
}
291318

arrow-buffer/src/builder/mod.rs

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,31 @@ pub use offset::*;
2828
use crate::{ArrowNativeType, Buffer, MutableBuffer};
2929
use std::marker::PhantomData;
3030

31-
/// Builder for creating a [Buffer] object.
31+
/// Builder for creating Arrow [`Buffer`] objects
3232
///
33-
/// A [Buffer] is the underlying data structure of Arrow's Arrays.
33+
/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
3434
///
3535
/// For all supported types, there are type definitions for the
3636
/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
3737
///
38+
/// **Note it is typically faster to create buffers directly from `Vec`**.
39+
/// See example on [`Buffer`].
40+
///
41+
/// # See Also
42+
/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
43+
/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
44+
///
45+
/// [`BooleanBuffer`]: crate::BooleanBuffer
46+
/// [`NullBuffer`]: crate::NullBuffer
47+
///
3848
/// # Example:
3949
///
4050
/// ```
4151
/// # use arrow_buffer::builder::BufferBuilder;
42-
///
4352
/// let mut builder = BufferBuilder::<u8>::new(100);
4453
/// builder.append_slice(&[42, 43, 44]);
4554
/// builder.append(45);
4655
/// let buffer = builder.finish();
47-
///
4856
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
4957
/// ```
5058
#[derive(Debug)]
@@ -341,16 +349,15 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
341349

342350
/// Resets this builder and returns an immutable [Buffer].
343351
///
352+
/// Use [`Self::build`] when you don't need to reuse this builder.
353+
///
344354
/// # Example:
345355
///
346356
/// ```
347357
/// # use arrow_buffer::builder::BufferBuilder;
348-
///
349358
/// let mut builder = BufferBuilder::<u8>::new(10);
350359
/// builder.append_slice(&[42, 44, 46]);
351-
///
352360
/// let buffer = builder.finish();
353-
///
354361
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
355362
/// ```
356363
#[inline]
@@ -359,6 +366,24 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
359366
self.len = 0;
360367
buf.into()
361368
}
369+
370+
/// Builds an immutable [Buffer] without resetting the builder.
371+
///
372+
/// This consumes the builder. Use [`Self::finish`] to reuse it.
373+
///
374+
/// # Example:
375+
///
376+
/// ```
377+
/// # use arrow_buffer::builder::BufferBuilder;
378+
/// let mut builder = BufferBuilder::<u8>::new(10);
379+
/// builder.append_slice(&[42, 44, 46]);
380+
/// let buffer = builder.build();
381+
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
382+
/// ```
383+
#[inline]
384+
pub fn build(self) -> Buffer {
385+
self.buffer.into()
386+
}
362387
}
363388

364389
impl<T: ArrowNativeType> Default for BufferBuilder<T> {

arrow-buffer/src/builder/null.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,22 @@
1717

1818
use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
1919

20-
/// Builder for creating [`NullBuffer`]
20+
/// Builder for creating [`NullBuffer`]s (bitmaps indicating validity/nulls).
21+
///
22+
/// # See also
23+
/// * [`BooleanBufferBuilder`] for a lower-level bitmap builder.
24+
/// * [`Self::allocated_size`] for the current memory allocated by the builder.
2125
///
2226
/// # Performance
2327
///
24-
/// This builder only materializes the buffer when we append `false`.
25-
/// If you only append `true`s to the builder, what you get will be
26-
/// `None` when calling [`finish`](#method.finish).
28+
/// This builder only materializes the buffer when null values (`false`) are
29+
/// appended. If you only append non-null, (`true`) to the builder, no buffer is
30+
/// allocated and [`build`](#method.build) or [`finish`](#method.finish) return
31+
/// `None`.
2732
///
2833
/// This optimization is **very** important for the performance as it avoids
2934
/// allocating memory for the null buffer when there are no nulls.
3035
///
31-
/// See [`Self::allocated_size`] to get the current memory allocated by the builder.
32-
///
3336
/// # Example
3437
/// ```
3538
/// # use arrow_buffer::NullBufferBuilder;
@@ -193,11 +196,20 @@ impl NullBufferBuilder {
193196
}
194197
}
195198

196-
/// Builds the null buffer and resets the builder.
197-
/// Returns `None` if the builder only contains `true`s.
199+
/// Builds the [`NullBuffer`] and resets the builder.
200+
///
201+
/// Returns `None` if the builder only contains `true`s. Use [`Self::build`]
202+
/// when you don't need to reuse this builder.
198203
pub fn finish(&mut self) -> Option<NullBuffer> {
199204
self.len = 0;
200-
Some(NullBuffer::new(self.bitmap_builder.take()?.finish()))
205+
Some(NullBuffer::new(self.bitmap_builder.take()?.build()))
206+
}
207+
208+
/// Builds the [`NullBuffer`] without resetting the builder.
209+
///
210+
/// This consumes the builder. Use [`Self::finish`] to reuse it.
211+
pub fn build(self) -> Option<NullBuffer> {
212+
self.bitmap_builder.map(NullBuffer::from)
201213
}
202214

203215
/// Builds the [NullBuffer] without resetting the builder.
@@ -238,9 +250,7 @@ impl NullBufferBuilder {
238250
.map(|b| b.capacity() / 8)
239251
.unwrap_or(0)
240252
}
241-
}
242253

243-
impl NullBufferBuilder {
244254
/// Return the number of bits in the buffer.
245255
pub fn len(&self) -> usize {
246256
self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())

0 commit comments

Comments
 (0)