From dc0d1aba1bf2e08d79779efb98ada9a5a9306a8c Mon Sep 17 00:00:00 2001 From: glutenperfbot Date: Mon, 19 Jan 2026 13:28:46 +0000 Subject: [PATCH 1/3] [GLUTEN-6887][VL] Daily Update Velox Version (dft-2026_01_19) Upstream Velox's New Commits: 98dd37935 by Xiaoxuan Meng, feat: Add sortOrders() getter to KeyEncoder (#16057) 9380556ee by Henry Edwin Dikeman, feat(connector): Add TPCH connector serdes (#16033) 229b01564 by Xiaoxuan Meng, refactor: Trivial distinct aggregation cleanup (#16054) 725110245 by Xiaoxuan Meng, refactor: Deprecate the old cleanup aggregation node ctor APIs (#16053) 5a2aad8f7 by Bikramjeet Vig, feat: Add owner metadata field to Velox functions for error attribution (#15788) 9f9843250 by Eric Jia, refactor: Replace as<> with asChecked<> in IcebergDataSink (#16035) 3ad5164d9 by Pedro Eugenio Rocha Pedreira, refactor(python): Simplify task iterator API (#15943) 80e6c4ba4 by Masha Basmanova, fix: TypeCoercer::leastCommonSuperType for named structs (#16038) Signed-off-by: glutenperfbot --- ep/build-velox/src/get-velox.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh index 14070294bdb6..1f5eaa7ee4be 100755 --- a/ep/build-velox/src/get-velox.sh +++ b/ep/build-velox/src/get-velox.sh @@ -18,8 +18,8 @@ set -exu CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) VELOX_REPO=https://github.com/IBM/velox.git -VELOX_BRANCH=dft-2026_01_16 -VELOX_ENHANCED_BRANCH=ibm-2026_01_16 +VELOX_BRANCH=dft-2026_01_19 +VELOX_ENHANCED_BRANCH=ibm-2026_01_19 VELOX_HOME="" RUN_SETUP_SCRIPT=ON ENABLE_ENHANCED_FEATURES=OFF From 26dc9042a8f66cc30a24432c91d4404b20076474 Mon Sep 17 00:00:00 2001 From: Rong Ma Date: Mon, 19 Jan 2026 13:56:34 +0000 Subject: [PATCH 2/3] fix --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 50 ++++++++++++--------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 1b1ee1f49353..01fec481b907 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -464,7 +464,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: // Get the grouping expressions. VELOX_CHECK( - aggRel.groupings().size() <= 1, "At most one grouping is supported, but got {}.", aggRel.groupings().size()); + aggRel.groupings().size() <= 1, "At most one grouping is supported, but got {}.", aggRel.groupings().size()); if (aggRel.groupings().size() == 1) { for (const auto& groupingExpr : aggRel.groupings()[0].grouping_expressions()) { // Velox's groupings are limited to be Field. @@ -504,7 +504,6 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: aggregates.emplace_back(core::AggregationNode::Aggregate{aggExpr, rawInputTypes, mask, {}, {}}); } - bool ignoreNullKeys = false; std::vector preGroupingExprs; if (aggRel.has_advanced_extension() && SubstraitParser::configSetInOptimization(aggRel.advanced_extension(), "isStreaming=")) { @@ -526,7 +525,8 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: preGroupingExprs, aggOutNames, aggregates, - ignoreNullKeys, + /*ignoreNullKeys=*/false, + /*noGroupsSpanBatches=*/false, childNode); if (aggRel.has_common()) { @@ -641,17 +641,19 @@ std::shared_ptr makeHiveInsertTableHandl } if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), tableColumnNames.at(i)) != partitionedBy.cend()) { ++numPartitionColumns; - columnHandles.emplace_back(std::make_shared( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back( + std::make_shared( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } else { - columnHandles.emplace_back(std::make_shared( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kRegular, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back( + std::make_shared( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kRegular, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } } VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size()); @@ -679,10 +681,11 @@ std::shared_ptr makeCudfHiveInsertTableHandle( columnHandles.reserve(tableColumnNames.size()); for (int i = 0; i < tableColumnNames.size(); ++i) { - columnHandles.push_back(std::make_shared( - tableColumnNames.at(i), - tableColumnTypes.at(i), - cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))})); + columnHandles.push_back( + std::make_shared( + tableColumnNames.at(i), + tableColumnTypes.at(i), + cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))})); } return std::make_shared( @@ -904,10 +907,9 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: (std::dynamic_pointer_cast(childNode) != nullptr || std::dynamic_pointer_cast(childNode) != nullptr) #ifdef GLUTEN_ENABLE_GPU - || std::dynamic_pointer_cast(childNode) != nullptr + || std::dynamic_pointer_cast(childNode) != nullptr #endif - && - childNode->outputType()->size() > requiredChildOutput.size(), + && childNode->outputType()->size() > requiredChildOutput.size(), "injectedProject is true, but the ProjectNode or ValueStreamNode (in case of projection fallback)" " is missing or does not have the corresponding projection field"); @@ -1653,9 +1655,13 @@ bool SubstraitToVeloxPlanConverter::checkTypeExtension(const ::substrait::Plan& } #ifdef GLUTEN_ENABLE_GPU -template core::PlanNodePtr SubstraitToVeloxPlanConverter::constructValueStreamNode(const ::substrait::ReadRel& sRead, int32_t streamIdx); +template core::PlanNodePtr SubstraitToVeloxPlanConverter::constructValueStreamNode( + const ::substrait::ReadRel& sRead, + int32_t streamIdx); #endif -template core::PlanNodePtr SubstraitToVeloxPlanConverter::constructValueStreamNode(const ::substrait::ReadRel& sRead, int32_t streamIdx); +template core::PlanNodePtr SubstraitToVeloxPlanConverter::constructValueStreamNode( + const ::substrait::ReadRel& sRead, + int32_t streamIdx); } // namespace gluten From 85469a0750cfd3ad1dbaa91dd29c6c0414b16716 Mon Sep 17 00:00:00 2001 From: Rong Ma Date: Mon, 19 Jan 2026 14:02:26 +0000 Subject: [PATCH 3/3] fix style --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 31 ++++++++++----------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 01fec481b907..b20b4a3d0901 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -641,19 +641,17 @@ std::shared_ptr makeHiveInsertTableHandl } if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), tableColumnNames.at(i)) != partitionedBy.cend()) { ++numPartitionColumns; - columnHandles.emplace_back( - std::make_shared( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back(std::make_shared( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } else { - columnHandles.emplace_back( - std::make_shared( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kRegular, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back(std::make_shared( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kRegular, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } } VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size()); @@ -681,11 +679,10 @@ std::shared_ptr makeCudfHiveInsertTableHandle( columnHandles.reserve(tableColumnNames.size()); for (int i = 0; i < tableColumnNames.size(); ++i) { - columnHandles.push_back( - std::make_shared( - tableColumnNames.at(i), - tableColumnTypes.at(i), - cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))})); + columnHandles.push_back(std::make_shared( + tableColumnNames.at(i), + tableColumnTypes.at(i), + cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))})); } return std::make_shared(