Skip to content

Commit 3a5296d

Browse files
Merge branch 'apache:main' into main
2 parents d5841a1 + 9b773f3 commit 3a5296d

37 files changed

Lines changed: 2184 additions & 811 deletions

.github/workflows/iceberg_spark_test.yml

Lines changed: 0 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -102,138 +102,6 @@ jobs:
102102
path: native/target/ci/libcomet.so
103103
retention-days: 1
104104

105-
iceberg-spark:
106-
needs: build-native
107-
if: contains(github.event.pull_request.title, '[iceberg]')
108-
strategy:
109-
matrix:
110-
os: [ubuntu-24.04]
111-
java-version: [11, 17]
112-
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
113-
spark-version: [{short: '3.5', full: '3.5.8'}]
114-
scala-version: ['2.13']
115-
fail-fast: false
116-
name: iceberg-spark/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
117-
runs-on: ${{ matrix.os }}
118-
container:
119-
image: amd64/rust
120-
env:
121-
SPARK_LOCAL_IP: localhost
122-
steps:
123-
- uses: actions/checkout@v6
124-
- name: Setup Rust & Java toolchain
125-
uses: ./.github/actions/setup-builder
126-
with:
127-
rust-version: ${{env.RUST_VERSION}}
128-
jdk-version: ${{ matrix.java-version }}
129-
- name: Download native library
130-
uses: actions/download-artifact@v8
131-
with:
132-
name: native-lib-iceberg
133-
path: native/target/release/
134-
- name: Build Comet
135-
run: |
136-
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
137-
- name: Setup Iceberg
138-
uses: ./.github/actions/setup-iceberg-builder
139-
with:
140-
iceberg-version: ${{ matrix.iceberg-version.full }}
141-
- name: Run Iceberg Spark tests
142-
run: |
143-
cd apache-iceberg
144-
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
145-
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
146-
:iceberg-spark:iceberg-spark-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
147-
-Pquick=true -x javadoc
148-
149-
iceberg-spark-extensions:
150-
needs: build-native
151-
if: contains(github.event.pull_request.title, '[iceberg]')
152-
strategy:
153-
matrix:
154-
os: [ubuntu-24.04]
155-
java-version: [11, 17]
156-
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
157-
spark-version: [{short: '3.5', full: '3.5.8'}]
158-
scala-version: ['2.13']
159-
fail-fast: false
160-
name: iceberg-spark-extensions/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
161-
runs-on: ${{ matrix.os }}
162-
container:
163-
image: amd64/rust
164-
env:
165-
SPARK_LOCAL_IP: localhost
166-
steps:
167-
- uses: actions/checkout@v6
168-
- name: Setup Rust & Java toolchain
169-
uses: ./.github/actions/setup-builder
170-
with:
171-
rust-version: ${{env.RUST_VERSION}}
172-
jdk-version: ${{ matrix.java-version }}
173-
- name: Download native library
174-
uses: actions/download-artifact@v8
175-
with:
176-
name: native-lib-iceberg
177-
path: native/target/release/
178-
- name: Build Comet
179-
run: |
180-
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
181-
- name: Setup Iceberg
182-
uses: ./.github/actions/setup-iceberg-builder
183-
with:
184-
iceberg-version: ${{ matrix.iceberg-version.full }}
185-
- name: Run Iceberg Spark extensions tests
186-
run: |
187-
cd apache-iceberg
188-
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
189-
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
190-
:iceberg-spark:iceberg-spark-extensions-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
191-
-Pquick=true -x javadoc
192-
193-
iceberg-spark-runtime:
194-
needs: build-native
195-
if: contains(github.event.pull_request.title, '[iceberg]')
196-
strategy:
197-
matrix:
198-
os: [ubuntu-24.04]
199-
java-version: [11, 17]
200-
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
201-
spark-version: [{short: '3.5', full: '3.5.8'}]
202-
scala-version: ['2.13']
203-
fail-fast: false
204-
name: iceberg-spark-runtime/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
205-
runs-on: ${{ matrix.os }}
206-
container:
207-
image: amd64/rust
208-
env:
209-
SPARK_LOCAL_IP: localhost
210-
steps:
211-
- uses: actions/checkout@v6
212-
- name: Setup Rust & Java toolchain
213-
uses: ./.github/actions/setup-builder
214-
with:
215-
rust-version: ${{env.RUST_VERSION}}
216-
jdk-version: ${{ matrix.java-version }}
217-
- name: Download native library
218-
uses: actions/download-artifact@v8
219-
with:
220-
name: native-lib-iceberg
221-
path: native/target/release/
222-
- name: Build Comet
223-
run: |
224-
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
225-
- name: Setup Iceberg
226-
uses: ./.github/actions/setup-iceberg-builder
227-
with:
228-
iceberg-version: ${{ matrix.iceberg-version.full }}
229-
- name: Run Iceberg Spark runtime tests
230-
run: |
231-
cd apache-iceberg
232-
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
233-
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
234-
:iceberg-spark:iceberg-spark-runtime-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:integrationTest \
235-
-Pquick=true -x javadoc
236-
237105
iceberg-spark-rust:
238106
needs: build-native
239107
if: contains(github.event.pull_request.title, '[iceberg]')

.github/workflows/pr_build_linux.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ jobs:
7474
container:
7575
image: amd64/rust
7676
steps:
77-
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
77+
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
7878
- uses: actions/checkout@v6
7979
- name: Setup Rust toolchain
8080
uses: ./.github/actions/setup-builder
@@ -127,7 +127,7 @@ jobs:
127127
container:
128128
image: amd64/rust
129129
steps:
130-
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
130+
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
131131

132132
- uses: actions/checkout@v6
133133

@@ -285,7 +285,7 @@ jobs:
285285
JAVA_TOOL_OPTIONS: ${{ matrix.profile.java_version == '17' && '--add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED' || '' }}
286286

287287
steps:
288-
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
288+
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
289289
- uses: actions/checkout@v6
290290

291291
- name: Setup Rust & Java toolchain
@@ -330,7 +330,7 @@ jobs:
330330
container:
331331
image: amd64/rust
332332
steps:
333-
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
333+
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
334334

335335
- uses: actions/checkout@v6
336336

@@ -388,7 +388,7 @@ jobs:
388388
join: [sort_merge, broadcast, hash]
389389
fail-fast: false
390390
steps:
391-
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
391+
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
392392

393393
- uses: actions/checkout@v6
394394

.github/workflows/pr_rat_check.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ jobs:
3737
name: RAT License Check
3838
runs-on: ubuntu-slim
3939
steps:
40-
- uses: actions/checkout@v4
40+
- uses: actions/checkout@v6
4141
- name: Set up Java
42-
uses: actions/setup-java@v4
42+
uses: actions/setup-java@v5
4343
with:
4444
distribution: temurin
4545
java-version: 11

docs/source/user-guide/latest/compatibility.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,51 @@ Expressions that are not 100% Spark-compatible will fall back to Spark by defaul
5858
`spark.comet.expression.EXPRNAME.allowIncompatible=true`, where `EXPRNAME` is the Spark expression class name. See
5959
the [Comet Supported Expressions Guide](expressions.md) for more information on this configuration setting.
6060

61+
### Array Expressions
62+
63+
- **ArrayContains**: Returns null instead of false for empty arrays with literal values.
64+
[#3346](https://github.com/apache/datafusion-comet/issues/3346)
65+
- **ArrayRemove**: Returns null when the element to remove is null, instead of removing null elements from the array.
66+
[#3173](https://github.com/apache/datafusion-comet/issues/3173)
67+
- **GetArrayItem**: Known correctness issues with index handling, including off-by-one errors and incorrect results
68+
with dynamic (non-literal) index values.
69+
[#3330](https://github.com/apache/datafusion-comet/issues/3330),
70+
[#3332](https://github.com/apache/datafusion-comet/issues/3332)
71+
- **ArraysOverlap**: Inconsistent behavior when arrays contain NULL values.
72+
[#3645](https://github.com/apache/datafusion-comet/issues/3645),
73+
[#2036](https://github.com/apache/datafusion-comet/issues/2036)
74+
- **ArrayUnion**: Sorts input arrays before performing the union, while Spark preserves the order of the first array
75+
and appends unique elements from the second.
76+
[#3644](https://github.com/apache/datafusion-comet/issues/3644)
77+
78+
### Date/Time Expressions
79+
80+
- **Hour, Minute, Second**: Incorrectly apply timezone conversion to TimestampNTZ inputs. TimestampNTZ stores local
81+
time without timezone, so no conversion should be applied. These expressions work correctly with Timestamp inputs.
82+
[#3180](https://github.com/apache/datafusion-comet/issues/3180)
83+
- **TruncTimestamp (date_trunc)**: Produces incorrect results when used with non-UTC timezones. Compatible when
84+
timezone is UTC.
85+
[#2649](https://github.com/apache/datafusion-comet/issues/2649)
86+
87+
### Math Expressions
88+
89+
- **Ceil, Floor**: Incorrect results for Decimal type inputs.
90+
[#1729](https://github.com/apache/datafusion-comet/issues/1729)
91+
- **Tan**: `tan(-0.0)` produces `0.0` instead of `-0.0`.
92+
[#1897](https://github.com/apache/datafusion-comet/issues/1897)
93+
94+
### Aggregate Expressions
95+
96+
- **Corr**: Returns null instead of NaN in some edge cases.
97+
[#2646](https://github.com/apache/datafusion-comet/issues/2646)
98+
- **First, Last**: These functions are not deterministic. When `ignoreNulls` is set, results may not match Spark.
99+
[#1630](https://github.com/apache/datafusion-comet/issues/1630)
100+
101+
### Struct Expressions
102+
103+
- **StructsToJson (to_json)**: Does not support `+Infinity` and `-Infinity` for numeric types (float, double).
104+
[#3016](https://github.com/apache/datafusion-comet/issues/3016)
105+
61106
## Regular Expressions
62107

63108
Comet uses the Rust regexp crate for evaluating regular expressions, and this has different behavior from Java's

0 commit comments

Comments
 (0)