Skip to content

Commit cd723b7

Browse files
authored
Merge branch 'main' into zip-string-view-improv
2 parents 86db64d + 814ee42 commit cd723b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+5141
-927
lines changed

.github/pull_request_template.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,38 @@
11
# Which issue does this PR close?
22

3+
<!--
34
We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax.
5+
-->
46

57
- Closes #NNN.
68

79
# Rationale for this change
810

11+
<!--
912
Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed.
1013
Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes.
14+
-->
1115

1216
# What changes are included in this PR?
1317

18+
<!--
1419
There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR.
20+
-->
1521

1622
# Are these changes tested?
1723

24+
<!--
1825
We typically require tests for all PRs in order to:
1926
1. Prevent the code from being accidentally broken by subsequent changes
2027
2. Serve as another way to document the expected behavior of the code
2128
2229
If tests are not included in your PR, please explain why (for example, are they covered by existing tests)?
30+
-->
2331

2432
# Are there any user-facing changes?
2533

34+
<!--
2635
If there are user-facing changes then we may require documentation to be updated before approving the PR.
2736
2837
If there are any breaking changes to public APIs, please call them out.
38+
-->

.github/workflows/docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
steps:
7272
- uses: actions/checkout@v6
7373
- name: Download crate docs
74-
uses: actions/download-artifact@v6
74+
uses: actions/download-artifact@v7
7575
with:
7676
name: crate-docs
7777
path: website/build

.github/workflows/integration.yml

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,58 +78,112 @@ jobs:
7878
run:
7979
shell: bash
8080
steps:
81+
- name: Monitor disk usage - Initial
82+
run: |
83+
echo "=== Initial Disk Usage ==="
84+
df -h /
85+
echo ""
86+
87+
- name: Remove unnecessary preinstalled software
88+
run: |
89+
echo "=== Cleaning up host disk space ==="
90+
echo "Disk space before cleanup:"
91+
df -h /
92+
93+
# Clean apt cache
94+
apt-get clean || true
95+
96+
# Remove GitHub Actions tool cache
97+
rm -rf /__t/* || true
98+
99+
# Remove large packages from host filesystem (mounted at /host/)
100+
rm -rf /host/usr/share/dotnet || true
101+
rm -rf /host/usr/local/lib/android || true
102+
rm -rf /host/usr/local/.ghcup || true
103+
rm -rf /host/opt/hostedtoolcache/CodeQL || true
104+
105+
echo ""
106+
echo "Disk space after cleanup:"
107+
df -h /
108+
echo ""
109+
81110
# This is necessary so that actions/checkout can find git
82111
- name: Export conda path
83112
run: echo "/opt/conda/envs/arrow/bin" >> $GITHUB_PATH
84113
# This is necessary so that Rust can find cargo
85114
- name: Export cargo path
86115
run: echo "/root/.cargo/bin" >> $GITHUB_PATH
87-
- name: Check rustup
88-
run: which rustup
89-
- name: Check cmake
90-
run: which cmake
116+
117+
# Checkout repos (using shallow clones with fetch-depth: 1)
91118
- name: Checkout Arrow
92119
uses: actions/checkout@v6
93120
with:
94121
repository: apache/arrow
95122
submodules: true
96-
fetch-depth: 0
123+
fetch-depth: 1
97124
- name: Checkout Arrow Rust
98125
uses: actions/checkout@v6
99126
with:
100127
path: rust
101128
submodules: true
102-
fetch-depth: 0
129+
fetch-depth: 1
103130
- name: Checkout Arrow .NET
104131
uses: actions/checkout@v6
105132
with:
106133
repository: apache/arrow-dotnet
107134
path: dotnet
135+
fetch-depth: 1
108136
- name: Checkout Arrow Go
109137
uses: actions/checkout@v6
110138
with:
111139
repository: apache/arrow-go
112140
path: go
141+
fetch-depth: 1
113142
- name: Checkout Arrow Java
114143
uses: actions/checkout@v6
115144
with:
116145
repository: apache/arrow-java
117146
path: java
147+
fetch-depth: 1
118148
- name: Checkout Arrow JavaScript
119149
uses: actions/checkout@v6
120150
with:
121151
repository: apache/arrow-js
122152
path: js
153+
fetch-depth: 1
123154
- name: Checkout Arrow nanoarrow
124155
uses: actions/checkout@v6
125156
with:
126157
repository: apache/arrow-nanoarrow
127158
path: nanoarrow
159+
fetch-depth: 1
160+
161+
- name: Monitor disk usage - After checkouts
162+
run: |
163+
echo "=== After Checkouts ==="
164+
df -h /
165+
echo ""
166+
128167
- name: Build
129168
run: conda run --no-capture-output ci/scripts/integration_arrow_build.sh $PWD /build
169+
170+
- name: Monitor disk usage - After build
171+
if: always()
172+
run: |
173+
echo "=== After Build ==="
174+
df -h /
175+
echo ""
176+
130177
- name: Run
131178
run: conda run --no-capture-output ci/scripts/integration_arrow.sh $PWD /build
132179

180+
- name: Monitor disk usage - After tests
181+
if: always()
182+
run: |
183+
echo "=== After Tests ==="
184+
df -h /
185+
echo ""
186+
133187
# test FFI against the C-Data interface exposed by pyarrow
134188
pyarrow-integration-test:
135189
name: Pyarrow C Data Interface

arrow-array/benches/union_array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ fn criterion_benchmark(c: &mut Criterion) {
5454
|b| {
5555
let type_ids = 0..with_nulls+without_nulls;
5656

57-
let fields = UnionFields::new(
57+
let fields = UnionFields::try_new(
5858
type_ids.clone(),
5959
type_ids.clone().map(|i| Field::new(format!("f{i}"), DataType::Int32, true)),
60-
);
60+
).unwrap();
6161

6262
let array = UnionArray::try_new(
6363
fields,

arrow-array/src/array/boolean_array.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,4 +829,32 @@ mod tests {
829829
assert_eq!(values.values(), &[0b1000_0000]);
830830
assert!(nulls.is_none());
831831
}
832+
833+
#[test]
834+
fn test_new_null_array() {
835+
let arr = BooleanArray::new_null(5);
836+
837+
assert_eq!(arr.len(), 5);
838+
assert_eq!(arr.null_count(), 5);
839+
assert_eq!(arr.true_count(), 0);
840+
assert_eq!(arr.false_count(), 0);
841+
842+
for i in 0..5 {
843+
assert!(arr.is_null(i));
844+
assert!(!arr.is_valid(i));
845+
}
846+
}
847+
848+
#[test]
849+
fn test_slice_with_nulls() {
850+
let arr = BooleanArray::from(vec![Some(true), None, Some(false)]);
851+
let sliced = arr.slice(1, 2);
852+
853+
assert_eq!(sliced.len(), 2);
854+
assert_eq!(sliced.null_count(), 1);
855+
856+
assert!(sliced.is_null(0));
857+
assert!(sliced.is_valid(1));
858+
assert!(!sliced.value(1));
859+
}
832860
}

arrow-array/src/array/list_view_array.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use crate::array::{make_array, print_long_array};
2626
use crate::builder::{GenericListViewBuilder, PrimitiveBuilder};
2727
use crate::iterator::GenericListViewArrayIter;
2828
use crate::{
29-
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, OffsetSizeTrait,
30-
new_empty_array,
29+
Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, GenericListArray,
30+
OffsetSizeTrait, new_empty_array,
3131
};
3232

3333
/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
@@ -498,6 +498,29 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<Offse
498498
}
499499
}
500500

501+
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
502+
for GenericListViewArray<OffsetSize>
503+
{
504+
fn from(value: GenericListArray<OffsetSize>) -> Self {
505+
let (field, offsets, values, nulls) = value.into_parts();
506+
let len = offsets.len() - 1;
507+
let mut sizes = Vec::with_capacity(len);
508+
let mut view_offsets = Vec::with_capacity(len);
509+
for (i, offset) in offsets.iter().enumerate().take(len) {
510+
view_offsets.push(*offset);
511+
sizes.push(offsets[i + 1] - offsets[i]);
512+
}
513+
514+
Self::new(
515+
field,
516+
ScalarBuffer::from(view_offsets),
517+
ScalarBuffer::from(sizes),
518+
values,
519+
nulls,
520+
)
521+
}
522+
}
523+
501524
impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
502525
fn from(array: GenericListViewArray<OffsetSize>) -> Self {
503526
let len = array.len();

arrow-array/src/array/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,11 @@ impl<'a> StringArrayType<'a> for &'a StringViewArray {
620620
}
621621
}
622622

623-
/// A trait for Arrow String Arrays, currently three types are supported:
623+
/// A trait for Arrow Binary Arrays, currently four types are supported:
624624
/// - `BinaryArray`
625625
/// - `LargeBinaryArray`
626626
/// - `BinaryViewArray`
627+
/// - `FixedSizeBinaryArray`
627628
///
628629
/// This trait helps to abstract over the different types of binary arrays
629630
/// so that we don't need to duplicate the implementation for each type.
@@ -642,6 +643,11 @@ impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
642643
BinaryViewArray::iter(self)
643644
}
644645
}
646+
impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
647+
fn iter(&self) -> ArrayIter<Self> {
648+
FixedSizeBinaryArray::iter(self)
649+
}
650+
}
645651

646652
impl PartialEq for dyn Array + '_ {
647653
fn eq(&self, other: &Self) -> bool {
@@ -1067,13 +1073,14 @@ mod tests {
10671073
fn test_null_union() {
10681074
for mode in [UnionMode::Sparse, UnionMode::Dense] {
10691075
let data_type = DataType::Union(
1070-
UnionFields::new(
1076+
UnionFields::try_new(
10711077
vec![2, 1],
10721078
vec![
10731079
Field::new("foo", DataType::Int32, true),
10741080
Field::new("bar", DataType::Int64, true),
10751081
],
1076-
),
1082+
)
1083+
.unwrap(),
10771084
mode,
10781085
);
10791086
let array = new_null_array(&data_type, 4);

0 commit comments

Comments
 (0)