From 0fc8ce4adff11de593a12eea498bc7be833910aa Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Sun, 4 Jan 2026 18:37:49 +0800 Subject: [PATCH] 1 --- .../scala/org/apache/gluten/config/VeloxConfig.scala | 12 ++++++++++++ cpp/velox/compute/WholeStageResultIterator.cc | 5 ++++- cpp/velox/config/VeloxConfig.h | 2 ++ docs/velox-configuration.md | 1 + 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala index 6644f4edc255..03781d2fb5e9 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala @@ -380,6 +380,18 @@ object VeloxConfig extends ConfigRegistry { .doubleConf .createWithDefault(0.1) + val MAX_EXTENDED_PARTIAL_AGGREGATION_MEMORY = + buildConf("spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory") + .doc( + "Set the max extended memory of partial aggregation in bytes. When this option is set " + + "to a value greater than 0, it will override spark.gluten.sql.columnar.backend.velox." + + "maxExtendedPartialAggregationMemoryRatio. Note: this option only works when " + + "flushable partial aggregation is enabled. Ignored when " + + "spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false." + ) + .bytesConf(ByteUnit.BYTE) + .createOptional + val MAX_EXTENDED_PARTIAL_AGGREGATION_MEMORY_RATIO = buildConf("spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio") .doc( diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 6b9761a154a0..e91e2ad69d62 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -555,7 +555,10 @@ std::unordered_map WholeStageResultIterator::getQueryC : static_cast(veloxCfg_->get(kMaxPartialAggregationMemoryRatio, 0.1) * offHeapMemory)); auto maxExtendedPartialAggregationMemory = std::max( 1 << 26, - static_cast(veloxCfg_->get(kMaxExtendedPartialAggregationMemoryRatio, 0.15) * offHeapMemory)); + veloxCfg_->get(kMaxExtendedPartialAggregationMemory).has_value() + ? veloxCfg_->get(kMaxExtendedPartialAggregationMemory).value() + : static_cast( + veloxCfg_->get(kMaxExtendedPartialAggregationMemoryRatio, 0.15) * offHeapMemory)); configs[velox::core::QueryConfig::kMaxPartialAggregationMemory] = std::to_string(maxPartialAggregationMemory); configs[velox::core::QueryConfig::kMaxExtendedPartialAggregationMemory] = std::to_string(maxExtendedPartialAggregationMemory); diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h index 6db6fb09940c..2cacee5369cb 100644 --- a/cpp/velox/config/VeloxConfig.h +++ b/cpp/velox/config/VeloxConfig.h @@ -55,6 +55,8 @@ const std::string kMaxPartialAggregationMemoryRatio = const std::string kMaxPartialAggregationMemory = "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemory"; const std::string kMaxExtendedPartialAggregationMemoryRatio = "spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio"; +const std::string kMaxExtendedPartialAggregationMemory = + "spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory"; const std::string kAbandonPartialAggregationMinPct = "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct"; const std::string kAbandonPartialAggregationMinRows = diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md index 9738f283eedf..bd838f357c3d 100644 --- a/docs/velox-configuration.md +++ b/docs/velox-configuration.md @@ -37,6 +37,7 @@ nav_order: 16 | spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes | 64MB | Set the max coalesced bytes for velox file scan | | spark.gluten.sql.columnar.backend.velox.maxCoalescedDistance | 512KB | Set the max coalesced distance bytes for velox file scan | | spark.gluten.sql.columnar.backend.velox.maxCompiledRegexes | 100 | Controls maximum number of compiled regular expression patterns per function instance per thread of execution. | +| spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory | <undefined> | Set the max extended memory of partial aggregation in bytes. When this option is set to a value greater than 0, it will override spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio. Note: this option only works when flushable partial aggregation is enabled. Ignored when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false. | | spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio | 0.15 | Set the max extended memory of partial aggregation as maxExtendedPartialAggregationMemoryRatio of offheap size. Note: this option only works when flushable partial aggregation is enabled. Ignored when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false. | | spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemory | <undefined> | Set the max memory of partial aggregation in bytes. When this option is set to a value greater than 0, it will override spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio. Note: this option only works when flushable partial aggregation is enabled. Ignored when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false. | | spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio | 0.1 | Set the max memory of partial aggregation as maxPartialAggregationMemoryRatio of offheap size. Note: this option only works when flushable partial aggregation is enabled. Ignored when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false. |