Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,16 @@ object VeloxBackendSettings extends BackendSettingsApi {
hadoopConf: Configuration,
partitionFileFormats: Set[ReadFileFormat]): ValidationResult = {

// When parquet vectorized reader is disabled, fallback to Spark's vanilla reader
// (parquet-mr) to preserve its behavior (e.g., allowing decimal precision narrowing).
if (
format == ReadFileFormat.ParquetReadFormat &&
!SQLConf.get.getConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
) {
return ValidationResult.failed(
"Fallback to vanilla reader when parquet vectorized reader is disabled.")
}

def validateScheme(): Option[String] = {
val filteredRootPaths = distinctRootPaths(rootPaths)
if (
Expand Down
27 changes: 26 additions & 1 deletion cpp/velox/substrait/SubstraitToVeloxPlan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,31 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
// The columns present in the table, if not available default to the baseSchema.
auto tableSchema = splitInfo->tableSchema ? splitInfo->tableSchema : baseSchema;

// Build dataColumns from tableSchema, excluding partition columns.
// HiveTableHandle::dataColumns() is used as fileSchema for the reader.
// Partition columns should not be validated against the file's physical types
// (their values come from the partition path, not from the file).
std::unordered_set<std::string> partitionColNames;
for (int idx = 0; idx < colNameList.size(); idx++) {
if (columnTypes[idx] == ColumnType::kPartitionKey) {
partitionColNames.insert(colNameList[idx]);
}
}
RowTypePtr dataColumns;
if (partitionColNames.empty()) {
dataColumns = tableSchema;
} else {
std::vector<std::string> dataColNames;
std::vector<TypePtr> dataColTypes;
for (int idx = 0; idx < tableSchema->size(); idx++) {
if (partitionColNames.find(tableSchema->nameOf(idx)) == partitionColNames.end()) {
dataColNames.push_back(tableSchema->nameOf(idx));
dataColTypes.push_back(tableSchema->childAt(idx));
}
}
dataColumns = ROW(std::move(dataColNames), std::move(dataColTypes));
}

connector::ConnectorTableHandlePtr tableHandle;
auto remainingFilter = readRel.has_filter() ? exprConverter_->toVeloxExpr(readRel.filter(), baseSchema) : nullptr;
auto connectorId = kHiveConnectorId;
Expand All @@ -1506,7 +1531,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
}
common::SubfieldFilters subfieldFilters;
tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
connectorId, "hive_table", std::move(subfieldFilters), remainingFilter, tableSchema);
connectorId, "hive_table", std::move(subfieldFilters), remainingFilter, dataColumns);

// Get assignments and out names.
std::vector<std::string> outNames;
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get-velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
set -exu

CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
VELOX_BRANCH=dft-2026_03_08-iceberg
VELOX_REPO=https://github.com/baibaichen/velox.git
VELOX_BRANCH=pr3/parquet-type-widening
VELOX_ENHANCED_BRANCH=ibm-2026_03_08
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,69 +323,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetCommitterSuite]
enableSuite[GlutenParquetFieldIdSchemaSuite]
enableSuite[GlutenParquetTypeWideningSuite]
// Velox does not support DELTA_BYTE_ARRAY encoding for FIXED_LEN_BYTE_ARRAY decimals.
.exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(22, 2)")
.exclude("parquet decimal precision and scale change Decimal(20, 7) -> Decimal(22, 5)")
.exclude("parquet decimal precision and scale change Decimal(20, 5) -> Decimal(22, 8)")
.exclude("parquet decimal precision and scale change Decimal(20, 2) -> Decimal(22, 4)")
.exclude("parquet decimal precision and scale change Decimal(10, 4) -> Decimal(12, 7)")
.exclude("parquet decimal precision and scale change Decimal(10, 6) -> Decimal(12, 4)")
.exclude("parquet decimal precision and scale change Decimal(10, 7) -> Decimal(5, 2)")
.exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 2)")
.exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 6)")
.exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(10, 2)")
.exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(5, 2)")
.exclude("parquet decimal precision and scale change Decimal(22, 4) -> Decimal(20, 2)")
.exclude("parquet decimal precision and scale change Decimal(22, 5) -> Decimal(20, 7)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(6, 4)")
.exclude("parquet decimal precision and scale change Decimal(7, 4) -> Decimal(5, 2)")
.exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(12, 4)")
.exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(20, 12)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(10, 7)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(20, 17)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(7, 4)")
.exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(5, 2)")
.exclude("parquet decimal precision change Decimal(12, 2) -> Decimal(10, 2)")
.exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(10, 2)")
.exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(5, 2)")
.exclude("parquet decimal precision change Decimal(22, 2) -> Decimal(20, 2)")
.exclude("parquet decimal precision change Decimal(7, 2) -> Decimal(5, 2)")
.exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(12, 2)")
.exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(20, 2)")
.exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(10, 2)")
.exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(20, 2)")
.exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(7, 2)")
.exclude("parquet decimal type change Decimal(5, 2) -> Decimal(3, 2) overflows with parquet-mr")
.exclude("unsupported parquet conversion ByteType -> DecimalType(1,0)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(2,0)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(3,0)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(3,1)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(4,1)")
.exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)")
.exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)")
.exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)")
.exclude("unsupported parquet conversion LongType -> DateType")
.exclude("unsupported parquet conversion LongType -> DecimalType(10,0)")
.exclude("unsupported parquet conversion LongType -> DecimalType(19,0)")
.exclude("unsupported parquet conversion LongType -> DecimalType(20,1)")
.exclude("unsupported parquet conversion LongType -> IntegerType")
.exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(5,1)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(6,1)")
.exclude("parquet widening conversion ByteType -> DecimalType(11,1)")
.exclude("parquet widening conversion ByteType -> DecimalType(20,0)")
.exclude("parquet widening conversion IntegerType -> DecimalType(11,1)")
.exclude("parquet widening conversion IntegerType -> DecimalType(20,0)")
.exclude("parquet widening conversion IntegerType -> DecimalType(38,0)")
.exclude("parquet widening conversion IntegerType -> DoubleType")
.exclude("parquet widening conversion LongType -> DecimalType(20,0)")
.exclude("parquet widening conversion LongType -> DecimalType(21,1)")
.exclude("parquet widening conversion LongType -> DecimalType(38,0)")
.exclude("parquet widening conversion ShortType -> DecimalType(11,1)")
.exclude("parquet widening conversion ShortType -> DecimalType(20,0)")
.exclude("parquet widening conversion ShortType -> DecimalType(38,0)")
.exclude("parquet widening conversion ShortType -> DoubleType")
enableSuite[GlutenParquetVariantShreddingSuite]
// Generated suites for org.apache.spark.sql.execution.datasources.text
// TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.config.GlutenConfig

import org.apache.spark.SparkConf
import org.apache.spark.sql.GlutenSQLTestsTrait

class GlutenParquetTypeWideningSuite extends ParquetTypeWideningSuite with GlutenSQLTestsTrait {}
class GlutenParquetTypeWideningSuite extends ParquetTypeWideningSuite with GlutenSQLTestsTrait {

// Disable native writer so that writeParquetFiles() uses Spark's Parquet writer.
// This suite tests the READ path. The native writer doesn't produce
// DELTA_BINARY_PACKED/DELTA_BYTE_ARRAY encodings that the parent test's
// V2 encoding assertions expect.
override def sparkConf: SparkConf =
super.sparkConf.set(GlutenConfig.NATIVE_WRITER_ENABLED.key, "false")
}
Original file line number Diff line number Diff line change
Expand Up @@ -334,69 +334,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetCommitterSuite]
enableSuite[GlutenParquetFieldIdSchemaSuite]
enableSuite[GlutenParquetTypeWideningSuite]
// Velox does not support DELTA_BYTE_ARRAY encoding for FIXED_LEN_BYTE_ARRAY decimals.
.exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(22, 2)")
.exclude("parquet decimal precision and scale change Decimal(20, 7) -> Decimal(22, 5)")
.exclude("parquet decimal precision and scale change Decimal(20, 5) -> Decimal(22, 8)")
.exclude("parquet decimal precision and scale change Decimal(20, 2) -> Decimal(22, 4)")
.exclude("parquet decimal precision and scale change Decimal(10, 4) -> Decimal(12, 7)")
.exclude("parquet decimal precision and scale change Decimal(10, 6) -> Decimal(12, 4)")
.exclude("parquet decimal precision and scale change Decimal(10, 7) -> Decimal(5, 2)")
.exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 2)")
.exclude("parquet decimal precision and scale change Decimal(12, 4) -> Decimal(10, 6)")
.exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(10, 2)")
.exclude("parquet decimal precision and scale change Decimal(20, 17) -> Decimal(5, 2)")
.exclude("parquet decimal precision and scale change Decimal(22, 4) -> Decimal(20, 2)")
.exclude("parquet decimal precision and scale change Decimal(22, 5) -> Decimal(20, 7)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(6, 4)")
.exclude("parquet decimal precision and scale change Decimal(7, 4) -> Decimal(5, 2)")
.exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(12, 4)")
.exclude("parquet decimal precision and scale change Decimal(10, 2) -> Decimal(20, 12)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(10, 7)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(20, 17)")
.exclude("parquet decimal precision and scale change Decimal(5, 2) -> Decimal(7, 4)")
.exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(5, 2)")
.exclude("parquet decimal precision change Decimal(12, 2) -> Decimal(10, 2)")
.exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(10, 2)")
.exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(5, 2)")
.exclude("parquet decimal precision change Decimal(22, 2) -> Decimal(20, 2)")
.exclude("parquet decimal precision change Decimal(7, 2) -> Decimal(5, 2)")
.exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(12, 2)")
.exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(20, 2)")
.exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(10, 2)")
.exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(20, 2)")
.exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(7, 2)")
.exclude("parquet decimal type change Decimal(5, 2) -> Decimal(3, 2) overflows with parquet-mr")
.exclude("unsupported parquet conversion ByteType -> DecimalType(1,0)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(2,0)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(3,0)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(3,1)")
.exclude("unsupported parquet conversion ByteType -> DecimalType(4,1)")
.exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)")
.exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)")
.exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)")
.exclude("unsupported parquet conversion LongType -> DateType")
.exclude("unsupported parquet conversion LongType -> DecimalType(10,0)")
.exclude("unsupported parquet conversion LongType -> DecimalType(19,0)")
.exclude("unsupported parquet conversion LongType -> DecimalType(20,1)")
.exclude("unsupported parquet conversion LongType -> IntegerType")
.exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(5,1)")
.exclude("unsupported parquet conversion ShortType -> DecimalType(6,1)")
.exclude("parquet widening conversion ByteType -> DecimalType(11,1)")
.exclude("parquet widening conversion ByteType -> DecimalType(20,0)")
.exclude("parquet widening conversion IntegerType -> DecimalType(11,1)")
.exclude("parquet widening conversion IntegerType -> DecimalType(20,0)")
.exclude("parquet widening conversion IntegerType -> DecimalType(38,0)")
.exclude("parquet widening conversion IntegerType -> DoubleType")
.exclude("parquet widening conversion LongType -> DecimalType(20,0)")
.exclude("parquet widening conversion LongType -> DecimalType(21,1)")
.exclude("parquet widening conversion LongType -> DecimalType(38,0)")
.exclude("parquet widening conversion ShortType -> DecimalType(11,1)")
.exclude("parquet widening conversion ShortType -> DecimalType(20,0)")
.exclude("parquet widening conversion ShortType -> DecimalType(38,0)")
.exclude("parquet widening conversion ShortType -> DoubleType")
// TODO: 4.x enableSuite[GlutenParquetVariantShreddingSuite] // 1 failure
// Generated suites for org.apache.spark.sql.execution.datasources.text
// TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.config.GlutenConfig

import org.apache.spark.SparkConf
import org.apache.spark.sql.GlutenSQLTestsTrait

class GlutenParquetTypeWideningSuite extends ParquetTypeWideningSuite with GlutenSQLTestsTrait {}
class GlutenParquetTypeWideningSuite extends ParquetTypeWideningSuite with GlutenSQLTestsTrait {

// Disable native writer so that writeParquetFiles() uses Spark's Parquet writer.
// This suite tests the READ path. The native writer doesn't produce
// DELTA_BINARY_PACKED/DELTA_BYTE_ARRAY encodings that the parent test's
// V2 encoding assertions expect.
override def sparkConf: SparkConf =
super.sparkConf.set(GlutenConfig.NATIVE_WRITER_ENABLED.key, "false")
}
Loading