Skip to content

Commit c8ed14d

Browse files
authored
Merge branch 'main' into optimize-json-binary-parse
2 parents 924588b + b2aeab1 commit c8ed14d

File tree

67 files changed

+3097
-1253
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+3097
-1253
lines changed

CHANGELOG-old.md

Lines changed: 167 additions & 0 deletions
Large diffs are not rendered by default.

CHANGELOG.md

Lines changed: 147 additions & 145 deletions
Large diffs are not rendered by default.

Cargo.toml

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ exclude = [
6868
]
6969

7070
[workspace.package]
71-
version = "57.1.0"
71+
version = "57.2.0"
7272
homepage = "https://github.com/apache/arrow-rs"
7373
repository = "https://github.com/apache/arrow-rs"
7474
authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -85,26 +85,26 @@ edition = "2024"
8585
rust-version = "1.85"
8686

8787
[workspace.dependencies]
88-
arrow = { version = "57.1.0", path = "./arrow", default-features = false }
89-
arrow-arith = { version = "57.1.0", path = "./arrow-arith" }
90-
arrow-array = { version = "57.1.0", path = "./arrow-array" }
91-
arrow-buffer = { version = "57.1.0", path = "./arrow-buffer" }
92-
arrow-cast = { version = "57.1.0", path = "./arrow-cast" }
93-
arrow-csv = { version = "57.1.0", path = "./arrow-csv" }
94-
arrow-data = { version = "57.1.0", path = "./arrow-data" }
95-
arrow-ipc = { version = "57.1.0", path = "./arrow-ipc" }
96-
arrow-json = { version = "57.1.0", path = "./arrow-json" }
97-
arrow-ord = { version = "57.1.0", path = "./arrow-ord" }
98-
arrow-pyarrow = { version = "57.1.0", path = "./arrow-pyarrow" }
99-
arrow-row = { version = "57.1.0", path = "./arrow-row" }
100-
arrow-schema = { version = "57.1.0", path = "./arrow-schema" }
101-
arrow-select = { version = "57.1.0", path = "./arrow-select" }
102-
arrow-string = { version = "57.1.0", path = "./arrow-string" }
103-
parquet = { version = "57.1.0", path = "./parquet", default-features = false }
104-
parquet-geospatial = { version = "57.1.0", path = "./parquet-geospatial" }
105-
parquet-variant = { version = "57.1.0", path = "./parquet-variant" }
106-
parquet-variant-json = { version = "57.1.0", path = "./parquet-variant-json" }
107-
parquet-variant-compute = { version = "57.1.0", path = "./parquet-variant-compute" }
88+
arrow = { version = "57.2.0", path = "./arrow", default-features = false }
89+
arrow-arith = { version = "57.2.0", path = "./arrow-arith" }
90+
arrow-array = { version = "57.2.0", path = "./arrow-array" }
91+
arrow-buffer = { version = "57.2.0", path = "./arrow-buffer" }
92+
arrow-cast = { version = "57.2.0", path = "./arrow-cast" }
93+
arrow-csv = { version = "57.2.0", path = "./arrow-csv" }
94+
arrow-data = { version = "57.2.0", path = "./arrow-data" }
95+
arrow-ipc = { version = "57.2.0", path = "./arrow-ipc" }
96+
arrow-json = { version = "57.2.0", path = "./arrow-json" }
97+
arrow-ord = { version = "57.2.0", path = "./arrow-ord" }
98+
arrow-pyarrow = { version = "57.2.0", path = "./arrow-pyarrow" }
99+
arrow-row = { version = "57.2.0", path = "./arrow-row" }
100+
arrow-schema = { version = "57.2.0", path = "./arrow-schema" }
101+
arrow-select = { version = "57.2.0", path = "./arrow-select" }
102+
arrow-string = { version = "57.2.0", path = "./arrow-string" }
103+
parquet = { version = "57.2.0", path = "./parquet", default-features = false }
104+
parquet-geospatial = { version = "57.2.0", path = "./parquet-geospatial" }
105+
parquet-variant = { version = "57.2.0", path = "./parquet-variant" }
106+
parquet-variant-json = { version = "57.2.0", path = "./parquet-variant-json" }
107+
parquet-variant-compute = { version = "57.2.0", path = "./parquet-variant-compute" }
108108

109109
chrono = { version = "0.4.40", default-features = false, features = ["clock"] }
110110

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,17 @@ Planned Release Schedule
6565

6666
| Approximate Date | Version | Notes |
6767
| ---------------- | ---------- | --------------------------------------- |
68-
| October 2025 | [`57.0.0`] | Major, potentially breaking API changes |
69-
| November 2025 | [`57.1.0`] | Minor, NO breaking API changes |
7068
| December 2025 | [`57.2.0`] | Minor, NO breaking API changes |
7169
| January 2026 | [`58.0.0`] | Major, potentially breaking API changes |
70+
| February 2026 | [`58.1.0`] | Minor, NO breaking API changes |
71+
| March 2026 | [`58.2.0`] | Minor, NO breaking API changes |
72+
| April 2026 | [`59.0.0`] | Major, potentially breaking API changes |
7273

73-
[`57.0.0`]: https://github.com/apache/arrow-rs/issues/7835
74-
[`57.1.0`]: https://github.com/apache/arrow-rs/milestone/3
7574
[`57.2.0`]: https://github.com/apache/arrow-rs/milestone/5
7675
[`58.0.0`]: https://github.com/apache/arrow-rs/milestone/6
76+
[`58.1.0`]: https://github.com/apache/arrow-rs/issues/9108
77+
[`58.2.0`]: https://github.com/apache/arrow-rs/issues/9109
78+
[`59.0.0`]: https://github.com/apache/arrow-rs/issues/9110
7779
[ticket #5368]: https://github.com/apache/arrow-rs/issues/5368
7880
[semantic versioning]: https://semver.org/
7981

arrow-arith/src/boolean.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
2424
2525
use arrow_array::*;
26-
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
26+
use arrow_buffer::buffer::bitwise_quaternary_op_helper;
2727
use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
2828
use arrow_schema::ArrowError;
2929

@@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
7474
// The final null bit is set only if:
7575
// 1. left null bit is set, or
7676
// 2. right data bit is false (because null AND false = false).
77-
Some(bitwise_bin_op_helper(
77+
Some(BooleanBuffer::from_bitwise_binary_op(
7878
left_null_buffer.buffer(),
7979
left_null_buffer.offset(),
8080
right_values.inner(),
@@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
8585
}
8686
(None, Some(right_null_buffer)) => {
8787
// Same as above
88-
Some(bitwise_bin_op_helper(
88+
Some(BooleanBuffer::from_bitwise_binary_op(
8989
right_null_buffer.buffer(),
9090
right_null_buffer.offset(),
9191
left_values.inner(),
@@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
100100
// d is right data bits.
101101
// The final null bits are:
102102
// (a | (c & !d)) & (c | (a & !b))
103-
Some(bitwise_quaternary_op_helper(
103+
let buffer = bitwise_quaternary_op_helper(
104104
[
105105
left_null_buffer.buffer(),
106106
left_values.inner(),
@@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
115115
],
116116
left.len(),
117117
|a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
118-
))
118+
);
119+
Some(BooleanBuffer::new(buffer, 0, left.len()))
119120
}
120121
};
121-
let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
122+
let nulls = buffer.map(NullBuffer::new);
122123
Ok(BooleanArray::new(left_values & right_values, nulls))
123124
}
124125

@@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
169170
// The final null bit is set only if:
170171
// 1. left null bit is set, or
171172
// 2. right data bit is true (because null OR true = true).
172-
Some(bitwise_bin_op_helper(
173+
Some(BooleanBuffer::from_bitwise_binary_op(
173174
left_nulls.buffer(),
174175
left_nulls.offset(),
175176
right_values.inner(),
@@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
180181
}
181182
(None, Some(right_nulls)) => {
182183
// Same as above
183-
Some(bitwise_bin_op_helper(
184+
Some(BooleanBuffer::from_bitwise_binary_op(
184185
right_nulls.buffer(),
185186
right_nulls.offset(),
186187
left_values.inner(),
@@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
195196
// d is right data bits.
196197
// The final null bits are:
197198
// (a | (c & d)) & (c | (a & b))
198-
Some(bitwise_quaternary_op_helper(
199+
let buffer = bitwise_quaternary_op_helper(
199200
[
200201
left_nulls.buffer(),
201202
left_values.inner(),
@@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
210211
],
211212
left.len(),
212213
|a, b, c, d| (a | (c & d)) & (c | (a & b)),
213-
))
214+
);
215+
Some(BooleanBuffer::new(buffer, 0, left.len()))
214216
}
215217
};
216218

217-
let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
219+
let nulls = buffer.map(NullBuffer::new);
218220
Ok(BooleanArray::new(left_values | right_values, nulls))
219221
}
220222

arrow-array/src/array/boolean_array.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ impl BooleanArray {
286286
}
287287
}
288288

289+
impl super::private::Sealed for BooleanArray {}
290+
289291
impl Array for BooleanArray {
290292
fn as_any(&self) -> &dyn Any {
291293
self

arrow-array/src/array/byte_array.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@ impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
462462
}
463463
}
464464

465+
impl<T: ByteArrayType> super::private::Sealed for GenericByteArray<T> {}
466+
465467
impl<T: ByteArrayType> Array for GenericByteArray<T> {
466468
fn as_any(&self) -> &dyn Any {
467469
self

arrow-array/src/array/byte_view_array.rs

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ use super::ByteArrayType;
165165
pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
166166
data_type: DataType,
167167
views: ScalarBuffer<u128>,
168-
buffers: Vec<Buffer>,
168+
buffers: Arc<[Buffer]>,
169169
phantom: PhantomData<T>,
170170
nulls: Option<NullBuffer>,
171171
}
@@ -188,7 +188,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
188188
/// # Panics
189189
///
190190
/// Panics if [`GenericByteViewArray::try_new`] returns an error
191-
pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
191+
pub fn new<U>(views: ScalarBuffer<u128>, buffers: U, nulls: Option<NullBuffer>) -> Self
192+
where
193+
U: Into<Arc<[Buffer]>>,
194+
{
192195
Self::try_new(views, buffers, nulls).unwrap()
193196
}
194197

@@ -198,11 +201,16 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
198201
///
199202
/// * `views.len() != nulls.len()`
200203
/// * [ByteViewType::validate] fails
201-
pub fn try_new(
204+
pub fn try_new<U>(
202205
views: ScalarBuffer<u128>,
203-
buffers: Vec<Buffer>,
206+
buffers: U,
204207
nulls: Option<NullBuffer>,
205-
) -> Result<Self, ArrowError> {
208+
) -> Result<Self, ArrowError>
209+
where
210+
U: Into<Arc<[Buffer]>>,
211+
{
212+
let buffers: Arc<[Buffer]> = buffers.into();
213+
206214
T::validate(&views, &buffers)?;
207215

208216
if let Some(n) = nulls.as_ref() {
@@ -230,11 +238,14 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
230238
/// # Safety
231239
///
232240
/// Safe if [`Self::try_new`] would not error
233-
pub unsafe fn new_unchecked(
241+
pub unsafe fn new_unchecked<U>(
234242
views: ScalarBuffer<u128>,
235-
buffers: Vec<Buffer>,
243+
buffers: U,
236244
nulls: Option<NullBuffer>,
237-
) -> Self {
245+
) -> Self
246+
where
247+
U: Into<Arc<[Buffer]>>,
248+
{
238249
if cfg!(feature = "force_validate") {
239250
return Self::new(views, buffers, nulls);
240251
}
@@ -243,7 +254,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
243254
data_type: T::DATA_TYPE,
244255
phantom: Default::default(),
245256
views,
246-
buffers,
257+
buffers: buffers.into(),
247258
nulls,
248259
}
249260
}
@@ -253,7 +264,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
253264
Self {
254265
data_type: T::DATA_TYPE,
255266
views: vec![0; len].into(),
256-
buffers: vec![],
267+
buffers: vec![].into(),
257268
nulls: Some(NullBuffer::new_null(len)),
258269
phantom: Default::default(),
259270
}
@@ -279,7 +290,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
279290
}
280291

281292
/// Deconstruct this array into its constituent parts
282-
pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
293+
pub fn into_parts(self) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
283294
(self.views, self.buffers, self.nulls)
284295
}
285296

@@ -854,6 +865,8 @@ impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
854865
}
855866
}
856867

868+
impl<T: ByteViewType + ?Sized> super::private::Sealed for GenericByteViewArray<T> {}
869+
857870
impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
858871
fn as_any(&self) -> &dyn Any {
859872
self
@@ -885,8 +898,21 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
885898

886899
fn shrink_to_fit(&mut self) {
887900
self.views.shrink_to_fit();
888-
self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
889-
self.buffers.shrink_to_fit();
901+
902+
// The goal of `shrink_to_fit` is to minimize the space used by any of
903+
// its allocations. The use of `Arc::get_mut` over `Arc::make_mut` is
904+
// because if the reference count is greater than 1, `Arc::make_mut`
905+
// will first clone its contents. So, any large allocations will first
906+
// be cloned before being shrunk, leaving the pre-cloned allocations
907+
// intact, before adding the extra (used) space of the new clones.
908+
if let Some(buffers) = Arc::get_mut(&mut self.buffers) {
909+
buffers.iter_mut().for_each(|b| b.shrink_to_fit());
910+
}
911+
912+
// With the assumption that this is a best-effort function, no attempt
913+
// is made to shrink `self.buffers`, which it can't because it's type
914+
// does not expose a `shrink_to_fit` method.
915+
890916
if let Some(nulls) = &mut self.nulls {
891917
nulls.shrink_to_fit();
892918
}
@@ -944,7 +970,7 @@ impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
944970
fn from(value: ArrayData) -> Self {
945971
let views = value.buffers()[0].clone();
946972
let views = ScalarBuffer::new(views, value.offset(), value.len());
947-
let buffers = value.buffers()[1..].to_vec();
973+
let buffers = value.buffers()[1..].to_vec().into();
948974
Self {
949975
data_type: T::DATA_TYPE,
950976
views,
@@ -1012,12 +1038,15 @@ where
10121038
}
10131039

10141040
impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
1015-
fn from(mut array: GenericByteViewArray<T>) -> Self {
1041+
fn from(array: GenericByteViewArray<T>) -> Self {
10161042
let len = array.len();
1017-
array.buffers.insert(0, array.views.into_inner());
1043+
1044+
let mut buffers = array.buffers.to_vec();
1045+
buffers.insert(0, array.views.into_inner());
1046+
10181047
let builder = ArrayDataBuilder::new(T::DATA_TYPE)
10191048
.len(len)
1020-
.buffers(array.buffers)
1049+
.buffers(buffers)
10211050
.nulls(array.nulls);
10221051

10231052
unsafe { builder.build_unchecked() }

arrow-array/src/array/dictionary_array.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,8 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T>
697697
}
698698
}
699699

700+
impl<T: ArrowDictionaryKeyType> super::private::Sealed for DictionaryArray<T> {}
701+
700702
impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
701703
fn as_any(&self) -> &dyn Any {
702704
self
@@ -856,6 +858,8 @@ impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
856858
}
857859
}
858860

861+
impl<K: ArrowDictionaryKeyType, V: Sync> super::private::Sealed for TypedDictionaryArray<'_, K, V> {}
862+
859863
impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
860864
fn as_any(&self) -> &dyn Any {
861865
self.dictionary

arrow-array/src/array/fixed_size_binary_array.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,8 @@ impl std::fmt::Debug for FixedSizeBinaryArray {
602602
}
603603
}
604604

605+
impl super::private::Sealed for FixedSizeBinaryArray {}
606+
605607
impl Array for FixedSizeBinaryArray {
606608
fn as_any(&self) -> &dyn Any {
607609
self

0 commit comments

Comments
 (0)