apache · zhouyuan · Feb 24, 2026 · jinchengchenghh · Mar 2, 2026
diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -428,6 +428,15 @@ object VeloxConfig extends ConfigRegistry {
       .intConf
       .createWithDefault(100000)
 
+  val CONTEXT_EXECUTOR_CPU_THREADS =
+    buildConf("spark.gluten.sql.columnar.backend.velox.cpuExecutorThreads")
+      .doc(
+        "The number of CPU threads to execute Velox query. " +
+          "When the value is set to 0, the CPU executor will be disabled and all the tasks will " +
+          "be executed in the caller thread.")
+      .intConf
+      .createWithDefault(0)
+
   val COLUMNAR_VELOX_BLOOM_FILTER_EXPECTED_NUM_ITEMS =
     buildConf("spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems")
       .doc(

diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -213,10 +213,18 @@ WholeStageResultIterator::WholeStageResultIterator(
 }
 
 std::shared_ptr<velox::core::QueryCtx> WholeStageResultIterator::createNewVeloxQueryCtx() {
+  int cpuThreads = veloxCfg_->get<int32_t>(kVeloxCpuExecutorThreads, kVeloxCpuExecutorThreadsDefault);
+  folly::Executor* executor = nullptr;
+  if (cpuThreads > 0) {
+    auto ctxExecutor = std::make_unique<folly::CPUThreadPoolExecutor>(cpuThreads);
+    executor = ctxExecutor.get();
+  }
+
   std::unordered_map<std::string, std::shared_ptr<velox::config::ConfigBase>> connectorConfigs;
   connectorConfigs[kHiveConnectorId] = createHiveConnectorSessionConfig(veloxCfg_);
+
   std::shared_ptr<velox::core::QueryCtx> ctx = velox::core::QueryCtx::create(
-      nullptr,
+      executor,
       facebook::velox::core::QueryConfig{getQueryContextConf()},
       connectorConfigs,
       gluten::VeloxBackend::get()->getAsyncDataCache(),

diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
@@ -107,6 +107,9 @@ const std::string kVeloxCacheEnabled = "spark.gluten.sql.columnar.backend.velox.
 
 const std::string kExprMaxCompiledRegexes = "spark.gluten.sql.columnar.backend.velox.maxCompiledRegexes";
 
+const std::string kVeloxCpuExecutorThreads = "spark.gluten.sql.columnar.backend.velox.cpuExecutorThreads";
+const int32_t kVeloxCpuExecutorThreadsDefault = 0; // 0 means disable
+
 // memory cache
 const std::string kVeloxMemCacheSize = "spark.gluten.sql.columnar.backend.velox.memCacheSize";
 const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G

diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
@@ -22,6 +22,7 @@ nav_order: 16
 | spark.gluten.sql.columnar.backend.velox.cacheEnabled                             | false             | Enable Velox cache, default off. It's recommended to enablesoft-affinity as well when enable velox cache.                                                                                                                                                                                                                                                                                                                                             |
 | spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct                      | 0                 | Set prefetch cache min pct for velox file scan                                                                                                                                                                                                                                                                                                                                                                                                        |
 | spark.gluten.sql.columnar.backend.velox.checkUsageLeak                           | true              | Enable check memory usage leak.                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| spark.gluten.sql.columnar.backend.velox.cpuExecutorThreads                       | 0                 | The number of CPU threads to execute Velox query. When the value is set to 0, the CPU executor will be disabled and all the tasks will be executed in the caller thread.                                                                                                                                                                                                                                                                              |
 | spark.gluten.sql.columnar.backend.velox.cudf.batchSize                           | 2147483647        | Cudf input batch size after shuffle reader                                                                                                                                                                                                                                                                                                                                                                                                            |
 | spark.gluten.sql.columnar.backend.velox.cudf.enableTableScan                     | false             | Enable cudf table scan                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | spark.gluten.sql.columnar.backend.velox.cudf.enableValidation                    | true              | Heuristics you can apply to validate a cuDF/GPU plan and only offload when the entire stage can be fully and profitably executed on GPU                                                                                                                                                                                                                                                                                                               |