44use num_traits:: Zero ;
55use vortex_error:: VortexResult ;
66
7+ use super :: REBUILD_DENSITY_THRESHOLD ;
78use crate :: ArrayRef ;
9+ use crate :: ExecutionCtx ;
810use crate :: IntoArray ;
911use crate :: array:: ArrayView ;
1012use crate :: arrays:: ListView ;
1113use crate :: arrays:: ListViewArray ;
14+ use crate :: arrays:: dict:: TakeExecute ;
1215use crate :: arrays:: dict:: TakeReduce ;
1316use crate :: arrays:: listview:: ListViewArrayExt ;
1417use crate :: arrays:: listview:: ListViewRebuildMode ;
@@ -17,72 +20,79 @@ use crate::dtype::Nullability;
1720use crate :: match_each_integer_ptype;
1821use crate :: scalar:: Scalar ;
1922
20- // TODO(connor)[ListView]: Make use of this threshold after we start migrating operators.
21- /// The threshold for triggering a rebuild of the [`ListViewArray`].
22- ///
23- /// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it
24- /// can be potentially expensive to reorganize the array based on what views we have into it.
25- ///
26- /// However, we also do not want to carry around a large amount of garbage data. Below this
27- /// threshold of the density of the selection mask, we will rebuild the [`ListViewArray`], removing
28- /// any garbage data.
29- #[ expect( unused) ]
30- const REBUILD_DENSITY_THRESHOLD : f64 = 0.1 ;
31-
32- /// [`ListViewArray`] take implementation.
33- ///
34- /// This implementation is deliberately simple and read-optimized. We just take the `offsets` and
35- /// `sizes` at the requested indices and reuse the original `elements` array. This works because
36- /// `ListView` (unlike `List`) allows non-contiguous and out-of-order lists.
37- ///
38- /// We don't slice the `elements` array because it would require computing min/max offsets and
39- /// adjusting all offsets accordingly, which is not really worth the small potential memory we would
40- /// be able to get back.
41- ///
42- /// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable since
43- /// we're optimizing for read performance and the data isn't being copied.
23+ /// Metadata-only take for [`ListViewArray`].
4424impl TakeReduce for ListView {
4525 fn take ( array : ArrayView < ' _ , ListView > , indices : & ArrayRef ) -> VortexResult < Option < ArrayRef > > {
46- let elements = array. elements ( ) ;
47- let offsets = array. offsets ( ) ;
48- let sizes = array. sizes ( ) ;
26+ // Approximate element density by the fraction of list rows retained. Assumes roughly
27+ // uniform list sizes; good enough to decide whether dragging along the full `elements`
28+ // buffer is worth avoiding a rebuild.
29+ let kept_row_fraction = indices. len ( ) as f32 / array. sizes ( ) . len ( ) as f32 ;
30+ if kept_row_fraction < REBUILD_DENSITY_THRESHOLD {
31+ return Ok ( None ) ;
32+ }
4933
50- // Compute the new validity by combining the array's validity with the indices' validity.
51- let new_validity = array. validity ( ) ?. take ( indices) ?;
34+ Ok ( Some ( apply_take ( array, indices) ?. into_array ( ) ) )
35+ }
36+ }
5237
53- // Take the offsets and sizes arrays at the requested indices.
54- // Take can reorder offsets, create gaps, and may introduce overlaps if the `indices`
55- // contain duplicates.
56- let nullable_new_offsets = offsets. take ( indices. clone ( ) ) ?;
57- let nullable_new_sizes = sizes. take ( indices. clone ( ) ) ?;
38+ /// Execution-path take for [`ListViewArray`].
39+ ///
40+ /// This does the same metadata-only take as [`TakeReduce`], but also rebuilds the array if the
41+ /// resulting array will be less dense than `REBUILD_DENSITY_THRESHOLD`.
42+ impl TakeExecute for ListView {
43+ fn take (
44+ array : ArrayView < ' _ , ListView > ,
45+ indices : & ArrayRef ,
46+ _ctx : & mut ExecutionCtx ,
47+ ) -> VortexResult < Option < ArrayRef > > {
48+ let kept_row_fraction = indices. len ( ) as f32 / array. sizes ( ) . len ( ) as f32 ;
49+ let taken = apply_take ( array, indices) ?;
5850
59- // Since `take` returns nullable arrays, we simply cast it back to non-nullable (filled with
60- // zeros to represent null lists).
61- let new_offsets = match_each_integer_ptype ! ( nullable_new_offsets. dtype( ) . as_ptype( ) , |O | {
62- nullable_new_offsets
63- . fill_null( Scalar :: primitive( O :: zero( ) , Nullability :: NonNullable ) ) ?
64- } ) ;
65- let new_sizes = match_each_integer_ptype ! ( nullable_new_sizes. dtype( ) . as_ptype( ) , |S | {
66- nullable_new_sizes. fill_null( Scalar :: primitive( S :: zero( ) , Nullability :: NonNullable ) ) ?
67- } ) ;
68- // SAFETY: Take operation maintains all `ListViewArray` invariants:
69- // - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
70- // - `new_offsets` and `new_sizes` are non-nullable.
71- // - `new_offsets` and `new_sizes` have the same length (both taken with the same
72- // `indices`).
73- // - Validity correctly reflects the combination of array and indices validity.
74- let new_array = unsafe {
75- ListViewArray :: new_unchecked ( elements. clone ( ) , new_offsets, new_sizes, new_validity)
76- } ;
51+ if kept_row_fraction < REBUILD_DENSITY_THRESHOLD {
52+ // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
53+ // compute functions have run, at the "top" of the operator tree. However, we cannot do
54+ // this right now, so we will just rebuild every time (similar to `ListArray`).
55+ Ok ( Some (
56+ taken
57+ . rebuild ( ListViewRebuildMode :: MakeZeroCopyToList ) ?
58+ . into_array ( ) ,
59+ ) )
60+ } else {
61+ Ok ( Some ( taken. into_array ( ) ) )
62+ }
63+ }
64+ }
7765
78- // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
79- // compute functions have run, at the "top" of the operator tree. However, we cannot do this
80- // right now, so we will just rebuild every time (similar to `ListArray`).
66+ /// Shared metadata-only take: take `offsets`, `sizes` and `validity` at `indices` while reusing
67+ /// the original `elements` buffer as-is.
68+ fn apply_take ( array : ArrayView < ' _ , ListView > , indices : & ArrayRef ) -> VortexResult < ListViewArray > {
69+ let elements = array. elements ( ) ;
70+ let offsets = array. offsets ( ) ;
71+ let sizes = array. sizes ( ) ;
8172
82- Ok ( Some (
83- new_array
84- . rebuild ( ListViewRebuildMode :: MakeZeroCopyToList ) ?
85- . into_array ( ) ,
86- ) )
87- }
73+ // Combine the array's validity with the indices' validity.
74+ let new_validity = array. validity ( ) ?. take ( indices) ?;
75+
76+ // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain
77+ // duplicates.
78+ let nullable_new_offsets = offsets. take ( indices. clone ( ) ) ?;
79+ let nullable_new_sizes = sizes. take ( indices. clone ( ) ) ?;
80+
81+ // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to represent
82+ // the null lists — the validity mask tracks nullness separately).
83+ let new_offsets = match_each_integer_ptype ! ( nullable_new_offsets. dtype( ) . as_ptype( ) , |O | {
84+ nullable_new_offsets. fill_null( Scalar :: primitive( O :: zero( ) , Nullability :: NonNullable ) ) ?
85+ } ) ;
86+ let new_sizes = match_each_integer_ptype ! ( nullable_new_sizes. dtype( ) . as_ptype( ) , |S | {
87+ nullable_new_sizes. fill_null( Scalar :: primitive( S :: zero( ) , Nullability :: NonNullable ) ) ?
88+ } ) ;
89+
90+ // SAFETY: Take operation maintains all `ListViewArray` invariants:
91+ // - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
92+ // - `new_offsets` and `new_sizes` are non-nullable.
93+ // - `new_offsets` and `new_sizes` have the same length (both taken with the same `indices`).
94+ // - Validity correctly reflects the combination of array and indices validity.
95+ Ok ( unsafe {
96+ ListViewArray :: new_unchecked ( elements. clone ( ) , new_offsets, new_sizes, new_validity)
97+ } )
8898}
0 commit comments