Skip to content

Commit e5d6ea0

Browse files
committed
replace builder with bulk take in naive_rebuild
Signed-off-by: Baris Palaska <barispalaska@gmail.com>
1 parent e0de842 commit e5d6ea0

File tree

3 files changed

+90
-21
lines changed

3 files changed

+90
-21
lines changed

vortex-array/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,5 +169,9 @@ harness = false
169169
name = "take_fsl"
170170
harness = false
171171

172+
[[bench]]
173+
name = "listview_rebuild"
174+
harness = false
175+
172176
[package.metadata.cargo-machete]
173177
ignored = ["getrandom_v03"]
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks for ListView rebuild with primitive and VarBinView elements.
5+
6+
#![allow(clippy::unwrap_used)]
7+
#![allow(clippy::cast_possible_truncation)]
8+
9+
use divan::Bencher;
10+
use vortex_array::IntoArray;
11+
use vortex_array::arrays::ListViewArray;
12+
use vortex_array::arrays::ListViewRebuildMode;
13+
use vortex_array::arrays::PrimitiveArray;
14+
use vortex_array::arrays::VarBinViewArray;
15+
use vortex_array::validity::Validity;
16+
use vortex_buffer::Buffer;
17+
18+
fn main() {
19+
divan::main();
20+
}
21+
22+
const NUM_LISTS: &[usize] = &[100, 1_000, 10_000];
23+
24+
/// Build a ListViewArray of primitives with overlapping views.
25+
fn make_primitive_listview(num_lists: usize) -> ListViewArray {
26+
let element_count = num_lists * 4;
27+
let elements = PrimitiveArray::from_iter(0..element_count as i32).into_array();
28+
29+
let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * 2) as u32).collect();
30+
let sizes: Buffer<u32> = std::iter::repeat_n(4u32, num_lists).collect();
31+
32+
ListViewArray::new(
33+
elements,
34+
offsets.into_array(),
35+
sizes.into_array(),
36+
Validity::NonNullable,
37+
)
38+
}
39+
40+
/// Build a ListViewArray of strings with overlapping views.
41+
fn make_varbinview_listview(num_lists: usize) -> ListViewArray {
42+
let element_count = num_lists * 4;
43+
let strings: Vec<String> = (0..element_count)
44+
.map(|i| {
45+
if i % 3 == 0 {
46+
format!("long-string-value-{i:06}")
47+
} else {
48+
format!("s{i}")
49+
}
50+
})
51+
.collect();
52+
let elements = VarBinViewArray::from_iter_str(strings.iter().map(|s| s.as_str())).into_array();
53+
54+
let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * 2) as u32).collect();
55+
let sizes: Buffer<u32> = std::iter::repeat_n(4u32, num_lists).collect();
56+
57+
ListViewArray::new(
58+
elements,
59+
offsets.into_array(),
60+
sizes.into_array(),
61+
Validity::NonNullable,
62+
)
63+
}
64+
65+
#[divan::bench(args = NUM_LISTS)]
66+
fn rebuild_primitive(bencher: Bencher, num_lists: usize) {
67+
let listview = make_primitive_listview(num_lists);
68+
bencher
69+
.with_inputs(|| &listview)
70+
.bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
71+
}
72+
73+
#[divan::bench(args = NUM_LISTS)]
74+
fn rebuild_varbinview(bencher: Bencher, num_lists: usize) {
75+
let listview = make_varbinview_listview(num_lists);
76+
bencher
77+
.with_inputs(|| &listview)
78+
.bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
79+
}

vortex-array/src/arrays/listview/rebuild.rs

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use crate::Array;
1414
use crate::IntoArray;
1515
use crate::ToCanonical;
1616
use crate::arrays::ListViewArray;
17-
use crate::builders::builder_with_capacity;
1817
use crate::compute;
1918
use crate::vtable::ValidityHelper;
2019

@@ -103,18 +102,14 @@ impl ListViewArray {
103102
})
104103
}
105104

106-
// TODO(connor)[ListView]: We should benchmark if it is faster to use `take` on the elements
107-
// instead of using a builder.
108105
/// The inner function for `rebuild_zero_copy_to_list`, which rebuilds a `ListViewArray` piece
109106
/// by piece.
107+
///
108+
/// Collects all element indices into a flat `BufferMut<u64>` and performs a single bulk
109+
/// `take` to produce the new elements array.
110110
fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
111111
&self,
112112
) -> VortexResult<ListViewArray> {
113-
let element_dtype = self
114-
.dtype()
115-
.as_list_element_opt()
116-
.vortex_expect("somehow had a canonical list that was not a list");
117-
118113
let offsets_canonical = self.offsets().to_primitive();
119114
let offsets_slice = offsets_canonical.as_slice::<O>();
120115
let sizes_canonical = self.sizes().to_primitive();
@@ -129,17 +124,8 @@ impl ListViewArray {
129124
// null lists to 0.
130125
let mut new_sizes = BufferMut::<S>::with_capacity(len);
131126

132-
// Canonicalize the elements up front as we will be slicing the elements quite a lot.
133-
let elements_canonical = self
134-
.elements()
135-
.to_canonical()
136-
.vortex_expect("canonicalize elements for rebuild")
137-
.into_array();
138-
139-
// Note that we do not know what the exact capacity should be of the new elements since
140-
// there could be overlaps in the existing `ListViewArray`.
141-
let mut new_elements_builder =
142-
builder_with_capacity(element_dtype.as_ref(), self.elements().len());
127+
// Collect all element indices for a single bulk take.
128+
let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
143129

144130
let mut n_elements = NewOffset::zero();
145131
for index in 0..len {
@@ -159,14 +145,14 @@ impl ListViewArray {
159145

160146
new_offsets.push(n_elements);
161147
new_sizes.push(size);
162-
new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
148+
take_indices.extend(start as u64..stop as u64);
163149

164150
n_elements += num_traits::cast(size).vortex_expect("Cast failed");
165151
}
166152

167153
let offsets = new_offsets.into_array();
168154
let sizes = new_sizes.into_array();
169-
let elements = new_elements_builder.finish();
155+
let elements = self.elements().take(take_indices.into_array())?;
170156

171157
debug_assert_eq!(
172158
n_elements.as_(),

0 commit comments

Comments
 (0)