diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h index 2f5992b9c0df..110c741a4b4b 100644 --- a/cpp/core/config/GlutenConfig.h +++ b/cpp/core/config/GlutenConfig.h @@ -95,9 +95,9 @@ const std::string kSparkJsonIgnoreNullFields = "spark.sql.jsonGenerator.ignoreNu // cudf const std::string kCudfEnabled = "spark.gluten.sql.columnar.cudf"; -const bool kCudfEnabledDefault = "true"; +constexpr bool kCudfEnabledDefault = true; const std::string kDebugCudf = "spark.gluten.sql.debug.cudf"; -const bool kDebugCudfDefault = "false"; +const std::string kDebugCudfDefault = "false"; std::unordered_map parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength); diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc index 54cb08bf572a..fedb5aa1712c 100644 --- a/cpp/velox/compute/VeloxBackend.cc +++ b/cpp/velox/compute/VeloxBackend.cc @@ -28,6 +28,7 @@ #include "utils/qat/QatCodec.h" #endif #ifdef GLUTEN_ENABLE_GPU +#include "velox/experimental/cudf/CudfConfig.h" #include "velox/experimental/cudf/connectors/hive/CudfHiveConnector.h" #include "velox/experimental/cudf/exec/ToCudf.h" #endif @@ -166,11 +167,16 @@ void VeloxBackend::init( #ifdef GLUTEN_ENABLE_GPU if (backendConf_->get(kCudfEnabled, kCudfEnabledDefault)) { - FLAGS_velox_cudf_debug = backendConf_->get(kDebugCudf, kDebugCudfDefault); - FLAGS_velox_cudf_memory_resource = backendConf_->get(kCudfMemoryResource, kCudfMemoryResourceDefault); - auto& options = velox::cudf_velox::CudfOptions::getInstance(); - options.memoryPercent = backendConf_->get(kCudfMemoryPercent, kCudfMemoryPercentDefault); - velox::cudf_velox::registerCudf(options); + std::unordered_map options = { + {velox::cudf_velox::CudfConfig::kCudfEnabled, "true"}, + {velox::cudf_velox::CudfConfig::kCudfDebugEnabled, backendConf_->get(kDebugCudf, kDebugCudfDefault)}, + {velox::cudf_velox::CudfConfig::kCudfMemoryResource, + backendConf_->get(kCudfMemoryResource, kCudfMemoryResourceDefault)}, + {velox::cudf_velox::CudfConfig::kCudfMemoryPercent, + backendConf_->get(kCudfMemoryPercent, kCudfMemoryPercentDefault)}}; + auto& cudfConfig = velox::cudf_velox::CudfConfig::getInstance(); + cudfConfig.initialize(std::move(options)); + velox::cudf_velox::registerCudf(); } #endif diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index fb3e1550a8f1..7846898cb702 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -24,6 +24,7 @@ #ifdef GLUTEN_ENABLE_GPU #include #include +#include "velox/experimental/cudf/CudfConfig.h" #include "velox/experimental/cudf/connectors/hive/CudfHiveConnectorSplit.h" #include "velox/experimental/cudf/exec/ToCudf.h" #endif @@ -661,7 +662,7 @@ std::unordered_map WholeStageResultIterator::getQueryC std::to_string(veloxCfg_->get(kSparkJsonIgnoreNullFields, true)); #ifdef GLUTEN_ENABLE_GPU - configs[cudf_velox::kCudfEnabled] = std::to_string(veloxCfg_->get(kCudfEnabled, false)); + configs[velox::cudf_velox::CudfConfig::kCudfEnabled] = std::to_string(veloxCfg_->get(kCudfEnabled, false)); #endif const auto setIfExists = [&](const std::string& glutenKey, const std::string& veloxKey) { diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h index 690fbd59f15e..4406887978b4 100644 --- a/cpp/velox/config/VeloxConfig.h +++ b/cpp/velox/config/VeloxConfig.h @@ -179,7 +179,7 @@ const std::string kCudfMemoryResourceDefault = // Initial percent of GPU memory to allocate for memory resource for one thread const std::string kCudfMemoryPercent = "spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent"; -const int32_t kCudfMemoryPercentDefault = 50; +const std::string kCudfMemoryPercentDefault = "50"; /// Preferred size of batches in bytes to be returned by operators. const std::string kVeloxPreferredBatchBytes = "spark.gluten.sql.columnar.backend.velox.preferredBatchBytes"; diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc index 9e4f37687ad6..00528801435c 100644 --- a/cpp/velox/jni/VeloxJniWrapper.cc +++ b/cpp/velox/jni/VeloxJniWrapper.cc @@ -243,30 +243,19 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_columnarbatch_VeloxColumnarBatchJ auto repeatedBatch = ObjectStore::retrieve(repeatedBatchHandle); auto nonRepeatedBatch = ObjectStore::retrieve(nonRepeatedBatchHandle); - GLUTEN_CHECK(rowNums == nonRepeatedBatch->numRows(), - "Row numbers after repeated do not match the expected size"); + GLUTEN_CHECK(rowNums == nonRepeatedBatch->numRows(), "Row numbers after repeated do not match the expected size"); // wrap repeatedBatch's rowVector in dictionary vector. auto vb = std::dynamic_pointer_cast(repeatedBatch); auto rowVector = vb->getRowVector(); std::vector outputs(rowVector->childrenSize()); for (int i = 0; i < outputs.size(); i++) { - outputs[i] = BaseVector::wrapInDictionary( - nullptr /*nulls*/, - repeatedIndices, - rowNums, - rowVector->childAt(i)); + outputs[i] = BaseVector::wrapInDictionary(nullptr /*nulls*/, repeatedIndices, rowNums, rowVector->childAt(i)); } - auto newRowVector = std::make_shared( - veloxPool.get(), - rowVector->type(), - BufferPtr(nullptr), - rowNums, - std::move(outputs)); + auto newRowVector = + std::make_shared(veloxPool.get(), rowVector->type(), BufferPtr(nullptr), rowNums, std::move(outputs)); repeatedBatch = std::make_shared(std::move(newRowVector)); - auto newBatch = VeloxColumnarBatch::compose( - veloxPool.get(), - {std::move(repeatedBatch), std::move(nonRepeatedBatch)}); + auto newBatch = VeloxColumnarBatch::compose(veloxPool.get(), {std::move(repeatedBatch), std::move(nonRepeatedBatch)}); return ctx->saveObject(newBatch); JNI_METHOD_END(kInvalidObjectHandle) } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index ab76f2c56ca1..ba7a70756848 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -168,15 +168,15 @@ bool SplitInfo::canUseCudfConnector() { bool isEmpty = partitionColumns.empty(); if (!isEmpty) { - // Check if all maps are empty - bool allMapsEmpty = true; - for (const auto& m : partitionColumns) { - if (!m.empty()) { - allMapsEmpty = false; - break; - } + // Check if all maps are empty + bool allMapsEmpty = true; + for (const auto& m : partitionColumns) { + if (!m.empty()) { + allMapsEmpty = false; + break; } - isEmpty = allMapsEmpty; + } + isEmpty = allMapsEmpty; } return isEmpty && format == dwio::common::FileFormat::PARQUET; } @@ -596,19 +596,17 @@ std::shared_ptr makeHiveInsertTableHandl } if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), tableColumnNames.at(i)) != partitionedBy.cend()) { ++numPartitionColumns; - columnHandles.emplace_back( - std::make_shared( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back(std::make_shared( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } else { - columnHandles.emplace_back( - std::make_shared( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kRegular, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back(std::make_shared( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kRegular, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } } VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size()); @@ -635,11 +633,10 @@ std::shared_ptr makeCudfHiveInsertTableHandle( std::vector> columnHandles; for (int i = 0; i < tableColumnNames.size(); ++i) { - columnHandles.push_back( - std::make_shared( - tableColumnNames.at(i), - tableColumnTypes.at(i), - cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))})); + columnHandles.push_back(std::make_shared( + tableColumnNames.at(i), + tableColumnTypes.at(i), + cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))})); } return std::make_shared( @@ -741,16 +738,16 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: const auto& compressionKind = writerOptions->compressionKind.value_or(common::CompressionKind::CompressionKind_SNAPPY); std::shared_ptr tableHandle = std::make_shared( - kHiveConnectorId, - makeHiveInsertTableHandle( - tableColumnNames, /*inputType->names() clolumn name is different*/ - inputType->children(), - partitionedKey, - bucketProperty, - makeLocationHandle(writePath, fileName, fileFormat, compressionKind, bucketProperty != nullptr), - writerOptions, - fileFormat, - compressionKind)); + kHiveConnectorId, + makeHiveInsertTableHandle( + tableColumnNames, /*inputType->names() clolumn name is different*/ + inputType->children(), + partitionedKey, + bucketProperty, + makeLocationHandle(writePath, fileName, fileFormat, compressionKind, bucketProperty != nullptr), + writerOptions, + fileFormat, + compressionKind)); return std::make_shared( nextPlanNodeId(), inputType, @@ -1350,12 +1347,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: } common::SubfieldFilters subfieldFilters; tableHandle = std::make_shared( - connectorId, - "hive_table", - filterPushdownEnabled, - std::move(subfieldFilters), - remainingFilter, - dataColumns); + connectorId, "hive_table", filterPushdownEnabled, std::move(subfieldFilters), remainingFilter, dataColumns); // Get assignments and out names. std::vector outNames; diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 019c38fd7697..169744af9b86 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -18,11 +18,11 @@ set -exu CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2025_10_06 +VELOX_BRANCH=2025_10_08 VELOX_HOME="" RUN_SETUP_SCRIPT=ON VELOX_ENHANCED_REPO=https://github.com/IBM/velox.git -VELOX_ENHANCED_BRANCH=ibm-2025_10_06 +VELOX_ENHANCED_BRANCH=ibm-2025_10_08 ENABLE_ENHANCED_FEATURES=OFF # Developer use only for testing Velox PR.