apache · zhouyuan · Feb 11, 2026 · Mar 13, 2026
diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -274,6 +274,16 @@ object VeloxConfig extends ConfigRegistry {
       .checkValue(_ > 0, "must be a positive number")
       .createWithDefault(10000)
 
+  val MAX_TARGET_FILE_SIZE_SESSION =
+    buildConf("spark.gluten.sql.columnar.backend.velox.maxTargetFileSize")
+      .doc(
+        "The target file size for each output file when writing data. " +
+          "0 means no limit on target file size, and the actual file size will be determined by " +
+          "other factors such as max partition number and shuffle batch size.")
+      .bytesConf(ByteUnit.BYTE)
+      .checkValue(_ >= 0, "must be a non-negative number")
+      .createWithDefault(0)
+
   val COLUMNAR_VELOX_RESIZE_BATCHES_SHUFFLE_INPUT =
     buildConf("spark.gluten.sql.columnar.backend.velox.resizeBatches.shuffleInput")
       .doc(

diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
@@ -160,6 +160,7 @@ const std::string kParquetUseColumnNames = "spark.gluten.sql.columnar.backend.ve
 
 // write fies
 const std::string kMaxPartitions = "spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
+const std::string kMaxTargetFileSize = "spark.gluten.sql.columnar.backend.velox.maxTargetFileSize";
 
 const std::string kGlogVerboseLevel = "spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
 const uint32_t kGlogVerboseLevelDefault = 0;

diff --git a/cpp/velox/utils/ConfigExtractor.cc b/cpp/velox/utils/ConfigExtractor.cc
@@ -231,6 +231,8 @@ std::shared_ptr<facebook::velox::config::ConfigBase> createHiveConnectorSessionC
   configs[facebook::velox::connector::hive::HiveConfig::kReadTimestampUnitSession] = std::string("6");
   configs[facebook::velox::connector::hive::HiveConfig::kMaxPartitionsPerWritersSession] =
       conf->get<std::string>(kMaxPartitions, "10000");
+  configs[facebook::velox::connector::hive::HiveConfig::kMaxTargetFileSize] =
+      conf->get<std::string>(kMaxTargetFileSize, "0B"); // 0 means no limit on target file size
   configs[facebook::velox::connector::hive::HiveConfig::kIgnoreMissingFilesSession] =
       conf->get<bool>(kIgnoreMissingFiles, false) ? "true" : "false";
   configs[facebook::velox::connector::hive::HiveConfig::kParquetUseColumnNamesSession] =

diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
@@ -48,6 +48,7 @@ nav_order: 16
 | spark.gluten.sql.columnar.backend.velox.maxSpillFileSize                         | 1GB               | The maximum size of a single spill file created                                                                                                                                                                                                                                                                                                                                                                                                       |
 | spark.gluten.sql.columnar.backend.velox.maxSpillLevel                            | 4                 | The max allowed spilling level with zero being the initial spilling level                                                                                                                                                                                                                                                                                                                                                                             |
 | spark.gluten.sql.columnar.backend.velox.maxSpillRunRows                          | 3M                | The maximum row size of a single spill run                                                                                                                                                                                                                                                                                                                                                                                                            |
+| spark.gluten.sql.columnar.backend.velox.maxTargetFileSize                 | 0b                | The target file size for each output file when writing data. 0 means no limit on target file size, and the actual file size will be determined by other factors such as max partition number and shuffle batch size.                                                                                                                                                                                                                                  |
 | spark.gluten.sql.columnar.backend.velox.memCacheSize                             | 1GB               | The memory cache size                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | spark.gluten.sql.columnar.backend.velox.memInitCapacity                          | 8MB               | The initial memory capacity to reserve for a newly created Velox query memory pool.                                                                                                                                                                                                                                                                                                                                                                   |
 | spark.gluten.sql.columnar.backend.velox.memoryPoolCapacityTransferAcrossTasks    | true              | Whether to allow memory capacity transfer between memory pools from different tasks.                                                                                                                                                                                                                                                                                                                                                                  |