Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,16 @@ object VeloxConfig extends ConfigRegistry {
.checkValue(_ > 0, "must be a positive number")
.createWithDefault(10000)

val MAX_TARGET_FILE_SIZE_SESSION =
buildConf("spark.gluten.sql.columnar.backend.velox.maxTargetFileSize")
.doc(
"The target file size for each output file when writing data. " +
"0 means no limit on target file size, and the actual file size will be determined by " +
"other factors such as max partition number and shuffle batch size.")
.bytesConf(ByteUnit.BYTE)
.checkValue(_ >= 0, "must be a non-negative number")
.createWithDefault(0)

val COLUMNAR_VELOX_RESIZE_BATCHES_SHUFFLE_INPUT =
buildConf("spark.gluten.sql.columnar.backend.velox.resizeBatches.shuffleInput")
.doc(
Expand Down
1 change: 1 addition & 0 deletions cpp/velox/config/VeloxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ const std::string kParquetUseColumnNames = "spark.gluten.sql.columnar.backend.ve

// write fies
const std::string kMaxPartitions = "spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
const std::string kMaxTargetFileSize = "spark.gluten.sql.columnar.backend.velox.maxTargetFileSize";

const std::string kGlogVerboseLevel = "spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
const uint32_t kGlogVerboseLevelDefault = 0;
Expand Down
2 changes: 2 additions & 0 deletions cpp/velox/utils/ConfigExtractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ std::shared_ptr<facebook::velox::config::ConfigBase> createHiveConnectorSessionC
configs[facebook::velox::connector::hive::HiveConfig::kReadTimestampUnitSession] = std::string("6");
configs[facebook::velox::connector::hive::HiveConfig::kMaxPartitionsPerWritersSession] =
conf->get<std::string>(kMaxPartitions, "10000");
configs[facebook::velox::connector::hive::HiveConfig::kMaxTargetFileSize] =
conf->get<std::string>(kMaxTargetFileSize, "0B"); // 0 means no limit on target file size
configs[facebook::velox::connector::hive::HiveConfig::kIgnoreMissingFilesSession] =
conf->get<bool>(kIgnoreMissingFiles, false) ? "true" : "false";
configs[facebook::velox::connector::hive::HiveConfig::kParquetUseColumnNamesSession] =
Expand Down
1 change: 1 addition & 0 deletions docs/velox-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ nav_order: 16
| spark.gluten.sql.columnar.backend.velox.maxSpillFileSize | 1GB | The maximum size of a single spill file created |
| spark.gluten.sql.columnar.backend.velox.maxSpillLevel | 4 | The max allowed spilling level with zero being the initial spilling level |
| spark.gluten.sql.columnar.backend.velox.maxSpillRunRows | 3M | The maximum row size of a single spill run |
| spark.gluten.sql.columnar.backend.velox.maxTargetFileSize | 0b | The target file size for each output file when writing data. 0 means no limit on target file size, and the actual file size will be determined by other factors such as max partition number and shuffle batch size. |
| spark.gluten.sql.columnar.backend.velox.memCacheSize | 1GB | The memory cache size |
| spark.gluten.sql.columnar.backend.velox.memInitCapacity | 8MB | The initial memory capacity to reserve for a newly created Velox query memory pool. |
| spark.gluten.sql.columnar.backend.velox.memoryPoolCapacityTransferAcrossTasks | true | Whether to allow memory capacity transfer between memory pools from different tasks. |
Expand Down
Loading