From 45bb3e3f0e5578118c80d8f0f9f8fa5514f3512b Mon Sep 17 00:00:00 2001 From: Chang chen Date: Sat, 7 Mar 2026 20:33:26 +0800 Subject: [PATCH 1/2] Update builddep-veloxbe-inc.sh to build specific CMake targets Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dev/builddep-veloxbe-inc.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/builddep-veloxbe-inc.sh b/dev/builddep-veloxbe-inc.sh index 9fafdf327718..5fff1a3e766f 100755 --- a/dev/builddep-veloxbe-inc.sh +++ b/dev/builddep-veloxbe-inc.sh @@ -147,12 +147,12 @@ fi # Step 2: Build Velox step 2 "Building Velox (incremental)" -cmake --build "$VELOX_BUILD_DIR" -j $NUM_THREADS +cmake --build "$VELOX_BUILD_DIR" --target velox -j $NUM_THREADS echo "[Step 2/4] Velox build complete." # Step 3: Build Gluten C++ step 3 "Building Gluten C++ (incremental)" -cmake --build "$GLUTEN_BUILD_DIR" -j $NUM_THREADS +cmake --build "$GLUTEN_BUILD_DIR" --target gluten velox -j $NUM_THREADS echo "[Step 3/4] Gluten C++ build complete." # Step 4: Copy libraries From 4f0f7a97618bb4e2ca7684b80e91f32b6f014d65 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Sat, 7 Mar 2026 15:08:00 +0800 Subject: [PATCH 2/2] Enable GlutenParquetTypeWideningSuite for Spark 4.0 and 4.1 Add translateException() to ClosableIterator as a virtual hook for backend-specific exception translation. Override in ColumnarBatchOutIterator to translate Velox type conversion errors into Spark's SchemaColumnConvertNotSupportedException. Exclude 63 tests: Velox native reader always rejects incompatible type conversions (no parquet-mr fallback), and does not support DELTA_BYTE_ARRAY encoding for FIXED_LEN_BYTE_ARRAY decimals. Test results: 21 pass / 63 ignored (spark40 and spark41). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../vectorized/ColumnarBatchOutIterator.java | 37 +++++++++++ .../gluten/iterator/ClosableIterator.java | 12 +++- .../utils/velox/VeloxTestSettings.scala | 65 ++++++++++++++++++- .../utils/velox/VeloxTestSettings.scala | 65 ++++++++++++++++++- 4 files changed, 175 insertions(+), 4 deletions(-) diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java index 27162a800f07..d25137184112 100644 --- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java +++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java @@ -17,10 +17,12 @@ package org.apache.gluten.vectorized; import org.apache.gluten.columnarbatch.ColumnarBatches; +import org.apache.gluten.exception.GlutenException; import org.apache.gluten.iterator.ClosableIterator; import org.apache.gluten.runtime.Runtime; import org.apache.gluten.runtime.RuntimeAware; +import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException; import org.apache.spark.sql.vectorized.ColumnarBatch; import java.io.IOException; @@ -130,6 +132,41 @@ public void requestBarrier() { nativeRequestBarrier(iterHandle); } + /** + * Translates a Velox type conversion error into a SchemaColumnConvertNotSupportedException. + * Returns null if the message does not indicate a type conversion error. + */ + private static RuntimeException translateToSchemaException(String msg) { + if (msg.contains("not allowed for requested type") || msg.contains("Not a valid type for")) { + return new SchemaColumnConvertNotSupportedException("unknown", msg, "unknown"); + } + return null; + } + + @Override + protected RuntimeException translateException(Exception e) { + String msg = findFirstNonNullMessage(e); + if (msg != null) { + RuntimeException schemaEx = translateToSchemaException(msg); + if (schemaEx != null) { + schemaEx.initCause(e); + return schemaEx; + } + } + return new GlutenException(e); + } + + private static String findFirstNonNullMessage(Throwable t) { + while (t != null) { + String msg = t.getMessage(); + if (msg != null) { + return msg; + } + t = t.getCause(); + } + return null; + } + @Override public void close0() { // To make sure the outputted batches are still accessible after the iterator is closed. diff --git a/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java b/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java index 7947b09af9b7..38764ec02429 100644 --- a/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java +++ b/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java @@ -35,7 +35,7 @@ public final boolean hasNext() { try { return hasNext0(); } catch (Exception e) { - throw new GlutenException(e); + throw translateException(e); } } @@ -47,7 +47,7 @@ public final T next() { try { return next0(); } catch (Exception e) { - throw new GlutenException(e); + throw translateException(e); } } @@ -63,4 +63,12 @@ public final void close() { protected abstract boolean hasNext0() throws Exception; protected abstract T next0() throws Exception; + + /** + * Translates a native exception into an appropriate Java exception. Subclasses can override this + * to translate backend-specific exceptions into Spark-compatible exceptions. + */ + protected RuntimeException translateException(Exception e) { + return new GlutenException(e); + } } diff --git a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 399661654ff6..4f7c67daaad6 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -322,7 +322,70 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetAvroCompatibilitySuite] enableSuite[GlutenParquetCommitterSuite] enableSuite[GlutenParquetFieldIdSchemaSuite] - // TODO: 4.x enableSuite[GlutenParquetTypeWideningSuite] // 74 failures - MAJOR ISSUE + enableSuite[GlutenParquetTypeWideningSuite] + .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(22, 2)") + .exclude("parquet decimal precision and scale change Decimal(20, 7) -> Decimal(22, 5)") + .exclude("parquet decimal precision and scale change Decimal(20, 5) -> Decimal(22, 8)") + .exclude("parquet decimal precision and scale change Decimal(20, 2) -> Decimal(22, 4)") + .exclude("parquet decimal precision and scale change Decimal(10, 4) -> Decimal(12, 7)") + .exclude("parquet decimal precision and scale change Decimal(10, 6) -> Decimal(12, 4)") + .exclude("parquet decimal precision and scale change Decimal(10, 7) -> Decimal(5, 2)") + .exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 2)") + .exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 6)") + .exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(10, 2)") + .exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(5, 2)") + .exclude("parquet decimal precision and scale change Decimal(22, 4) -> Decimal(20, 2)") + .exclude("parquet decimal precision and scale change Decimal(22, 5) -> Decimal(20, 7)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(6, 4)") + .exclude("parquet decimal precision and scale change Decimal(7, 4) -> Decimal(5, 2)") + .exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(12, 4)") + .exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(20, 12)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(10, 7)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(20, 17)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(7, 4)") + .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(5, 2)") + .exclude("parquet decimal precision change Decimal(12, 2) -> Decimal(10, 2)") + .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(10, 2)") + .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(5, 2)") + .exclude("parquet decimal precision change Decimal(22, 2) -> Decimal(20, 2)") + .exclude("parquet decimal precision change Decimal(7, 2) -> Decimal(5, 2)") + .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(12, 2)") + .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(20, 2)") + .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(10, 2)") + .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(20, 2)") + .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(7, 2)") + .exclude("parquet decimal type change Decimal(5, 2) -> Decimal(3, 2) overflows with parquet-mr") + .exclude("unsupported parquet conversion ByteType -> DecimalType(1,0)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(2,0)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(3,0)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(3,1)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(4,1)") + .exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)") + .exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)") + .exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)") + .exclude("unsupported parquet conversion LongType -> DateType") + .exclude("unsupported parquet conversion LongType -> DecimalType(10,0)") + .exclude("unsupported parquet conversion LongType -> DecimalType(19,0)") + .exclude("unsupported parquet conversion LongType -> DecimalType(20,1)") + .exclude("unsupported parquet conversion LongType -> IntegerType") + .exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(5,1)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(6,1)") + .exclude("parquet widening conversion ByteType -> DecimalType(11,1)") + .exclude("parquet widening conversion ByteType -> DecimalType(20,0)") + .exclude("parquet widening conversion IntegerType -> DecimalType(11,1)") + .exclude("parquet widening conversion IntegerType -> DecimalType(20,0)") + .exclude("parquet widening conversion IntegerType -> DecimalType(38,0)") + .exclude("parquet widening conversion IntegerType -> DoubleType") + .exclude("parquet widening conversion LongType -> DecimalType(20,0)") + .exclude("parquet widening conversion LongType -> DecimalType(21,1)") + .exclude("parquet widening conversion LongType -> DecimalType(38,0)") + .exclude("parquet widening conversion ShortType -> DecimalType(11,1)") + .exclude("parquet widening conversion ShortType -> DecimalType(20,0)") + .exclude("parquet widening conversion ShortType -> DecimalType(38,0)") + .exclude("parquet widening conversion ShortType -> DoubleType") enableSuite[GlutenParquetVariantShreddingSuite] // Generated suites for org.apache.spark.sql.execution.datasources.text // TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure diff --git a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 10802c889295..0dadfa1d0bd8 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -333,7 +333,70 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetAvroCompatibilitySuite] enableSuite[GlutenParquetCommitterSuite] enableSuite[GlutenParquetFieldIdSchemaSuite] - // TODO: 4.x enableSuite[GlutenParquetTypeWideningSuite] // 74 failures - MAJOR ISSUE + enableSuite[GlutenParquetTypeWideningSuite] + .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(22, 2)") + .exclude("parquet decimal precision and scale change Decimal(20, 7) -> Decimal(22, 5)") + .exclude("parquet decimal precision and scale change Decimal(20, 5) -> Decimal(22, 8)") + .exclude("parquet decimal precision and scale change Decimal(20, 2) -> Decimal(22, 4)") + .exclude("parquet decimal precision and scale change Decimal(10, 4) -> Decimal(12, 7)") + .exclude("parquet decimal precision and scale change Decimal(10, 6) -> Decimal(12, 4)") + .exclude("parquet decimal precision and scale change Decimal(10, 7) -> Decimal(5, 2)") + .exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 2)") + .exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 6)") + .exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(10, 2)") + .exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(5, 2)") + .exclude("parquet decimal precision and scale change Decimal(22, 4) -> Decimal(20, 2)") + .exclude("parquet decimal precision and scale change Decimal(22, 5) -> Decimal(20, 7)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(6, 4)") + .exclude("parquet decimal precision and scale change Decimal(7, 4) -> Decimal(5, 2)") + .exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(12, 4)") + .exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(20, 12)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(10, 7)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(20, 17)") + .exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(7, 4)") + .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(5, 2)") + .exclude("parquet decimal precision change Decimal(12, 2) -> Decimal(10, 2)") + .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(10, 2)") + .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(5, 2)") + .exclude("parquet decimal precision change Decimal(22, 2) -> Decimal(20, 2)") + .exclude("parquet decimal precision change Decimal(7, 2) -> Decimal(5, 2)") + .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(12, 2)") + .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(20, 2)") + .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(10, 2)") + .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(20, 2)") + .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(7, 2)") + .exclude("parquet decimal type change Decimal(5, 2) -> Decimal(3, 2) overflows with parquet-mr") + .exclude("unsupported parquet conversion ByteType -> DecimalType(1,0)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(2,0)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(3,0)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(3,1)") + .exclude("unsupported parquet conversion ByteType -> DecimalType(4,1)") + .exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)") + .exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)") + .exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)") + .exclude("unsupported parquet conversion LongType -> DateType") + .exclude("unsupported parquet conversion LongType -> DecimalType(10,0)") + .exclude("unsupported parquet conversion LongType -> DecimalType(19,0)") + .exclude("unsupported parquet conversion LongType -> DecimalType(20,1)") + .exclude("unsupported parquet conversion LongType -> IntegerType") + .exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(5,1)") + .exclude("unsupported parquet conversion ShortType -> DecimalType(6,1)") + .exclude("parquet widening conversion ByteType -> DecimalType(11,1)") + .exclude("parquet widening conversion ByteType -> DecimalType(20,0)") + .exclude("parquet widening conversion IntegerType -> DecimalType(11,1)") + .exclude("parquet widening conversion IntegerType -> DecimalType(20,0)") + .exclude("parquet widening conversion IntegerType -> DecimalType(38,0)") + .exclude("parquet widening conversion IntegerType -> DoubleType") + .exclude("parquet widening conversion LongType -> DecimalType(20,0)") + .exclude("parquet widening conversion LongType -> DecimalType(21,1)") + .exclude("parquet widening conversion LongType -> DecimalType(38,0)") + .exclude("parquet widening conversion ShortType -> DecimalType(11,1)") + .exclude("parquet widening conversion ShortType -> DecimalType(20,0)") + .exclude("parquet widening conversion ShortType -> DecimalType(38,0)") + .exclude("parquet widening conversion ShortType -> DoubleType") // TODO: 4.x enableSuite[GlutenParquetVariantShreddingSuite] // 1 failure // Generated suites for org.apache.spark.sql.execution.datasources.text // TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure