Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,15 @@ object VeloxConfig extends ConfigRegistry {
.intConf
.createWithDefault(100000)

val CONTEXT_EXECUTOR_CPU_THREADS =
buildConf("spark.gluten.sql.columnar.backend.velox.cpuExecutorThreads")
.doc(
"The number of CPU threads to execute Velox query. " +
"When the value is set to 0, the CPU executor will be disabled and all the tasks will " +
"be executed in the caller thread.")
.intConf
.createWithDefault(0)

val COLUMNAR_VELOX_BLOOM_FILTER_EXPECTED_NUM_ITEMS =
buildConf("spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems")
.doc(
Expand Down
10 changes: 9 additions & 1 deletion cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,18 @@ WholeStageResultIterator::WholeStageResultIterator(
}

std::shared_ptr<velox::core::QueryCtx> WholeStageResultIterator::createNewVeloxQueryCtx() {
int cpuThreads = veloxCfg_->get<int32_t>(kVeloxCpuExecutorThreads, kVeloxCpuExecutorThreadsDefault);
folly::Executor* executor = nullptr;
if (cpuThreads > 0) {
auto ctxExecutor = std::make_unique<folly::CPUThreadPoolExecutor>(cpuThreads);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should reserve the ctxExecutor, otherwise it is released after the if block

executor = ctxExecutor.get();
}

std::unordered_map<std::string, std::shared_ptr<velox::config::ConfigBase>> connectorConfigs;
connectorConfigs[kHiveConnectorId] = createHiveConnectorSessionConfig(veloxCfg_);

std::shared_ptr<velox::core::QueryCtx> ctx = velox::core::QueryCtx::create(
nullptr,
executor,
facebook::velox::core::QueryConfig{getQueryContextConf()},
connectorConfigs,
gluten::VeloxBackend::get()->getAsyncDataCache(),
Expand Down
3 changes: 3 additions & 0 deletions cpp/velox/config/VeloxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ const std::string kVeloxCacheEnabled = "spark.gluten.sql.columnar.backend.velox.

const std::string kExprMaxCompiledRegexes = "spark.gluten.sql.columnar.backend.velox.maxCompiledRegexes";

const std::string kVeloxCpuExecutorThreads = "spark.gluten.sql.columnar.backend.velox.cpuExecutorThreads";
const int32_t kVeloxCpuExecutorThreadsDefault = 0; // 0 means disable

// memory cache
const std::string kVeloxMemCacheSize = "spark.gluten.sql.columnar.backend.velox.memCacheSize";
const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G
Expand Down
1 change: 1 addition & 0 deletions docs/velox-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ nav_order: 16
| spark.gluten.sql.columnar.backend.velox.cacheEnabled | false | Enable Velox cache, default off. It's recommended to enablesoft-affinity as well when enable velox cache. |
| spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct | 0 | Set prefetch cache min pct for velox file scan |
| spark.gluten.sql.columnar.backend.velox.checkUsageLeak | true | Enable check memory usage leak. |
| spark.gluten.sql.columnar.backend.velox.cpuExecutorThreads | 0 | The number of CPU threads to execute Velox query. When the value is set to 0, the CPU executor will be disabled and all the tasks will be executed in the caller thread. |
| spark.gluten.sql.columnar.backend.velox.cudf.batchSize | 2147483647 | Cudf input batch size after shuffle reader |
| spark.gluten.sql.columnar.backend.velox.cudf.enableTableScan | false | Enable cudf table scan |
| spark.gluten.sql.columnar.backend.velox.cudf.enableValidation | true | Heuristics you can apply to validate a cuDF/GPU plan and only offload when the entire stage can be fully and profitably executed on GPU |
Expand Down
Loading