diff --git a/.github/workflows/velox_nightly.yml b/.github/workflows/velox_nightly.yml index 2a8bf47994a7..035494a6fd7d 100644 --- a/.github/workflows/velox_nightly.yml +++ b/.github/workflows/velox_nightly.yml @@ -171,7 +171,8 @@ jobs: run: | cd $GITHUB_WORKSPACE/ && \ ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip - ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip + # iceberg/hudi support for spark-4.1 is not avaiable, skip it first + ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip - name: Upload bundle package uses: actions/upload-artifact@v4 with: @@ -287,8 +288,8 @@ jobs: ./build/mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip ./build/mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip - # iceberg support for spark-4.1 with java-17 is not avaiable, skip it first - ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip + # iceberg/hudi support for spark-4.1 is not avaiable, skip it first + ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip - name: Upload bundle package uses: actions/upload-artifact@v4 with: @@ -323,8 +324,8 @@ jobs: run: | cd $GITHUB_WORKSPACE/ && \ ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip - # iceberg support for spark-4.1 with java-21 is not avaiable, skip it first - ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip + # iceberg/hudi support for spark-4.1 is not avaiable, skip it first + ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip - name: Upload bundle package uses: actions/upload-artifact@v4 with: @@ -486,8 +487,8 @@ jobs: ./build/mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip ./build/mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip - # iceberg support for spark-4.1 with java-17 is not avaiable, skip it first - ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip + # iceberg/hudi support for spark-4.1 with java-17 is not avaiable, skip it first + ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip - name: Upload bundle package uses: actions/upload-artifact@v4 with: diff --git a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/enhanced/VeloxIcebergSuite.scala b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/enhanced/VeloxIcebergSuite.scala index c3d3c8edc658..53ac42ef2801 100644 --- a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/enhanced/VeloxIcebergSuite.scala +++ b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/enhanced/VeloxIcebergSuite.scala @@ -16,6 +16,7 @@ */ package org.apache.gluten.execution.enhanced +import org.apache.gluten.config.VeloxConfig import org.apache.gluten.execution._ import org.apache.gluten.tags.EnhancedFeaturesTest @@ -383,4 +384,139 @@ class VeloxIcebergSuite extends IcebergSuite { } } } + test("iceberg max aggregate with nulls enhanced true") { + withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "true") { + withTable("store_sales_test_nulls_enhanced_true") { + spark.sql(""" + |create table store_sales_test_nulls_enhanced_true ( + | ss_item_sk int, + | ss_sales_price decimal(7,2) + |) using iceberg + |""".stripMargin) + + spark.sql(""" + |insert into store_sales_test_nulls_enhanced_true values + |(1, 200.00), + |(2, 200.00), + |(3, null), + |(4, 199.98), + |(5, 199.96) + |""".stripMargin) + + val result = spark + .sql("select max(ss_sales_price) from store_sales_test_nulls_enhanced_true") + .collect() + + assert(result.length == 1, "Should return 1 row") + assert(result(0).get(0) != null, "MAX should not return NULL") + assert( + result(0).getDecimal(0).doubleValue() == 200.00, + s"MAX should return 200.00, but got ${result(0).get(0)}" + ) + } + } + } + + test("iceberg max aggregate without nulls enhanced true") { + withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "true") { + withTable("store_sales_test_no_nulls_enhanced_true") { + spark.sql(""" + |create table store_sales_test_no_nulls_enhanced_true ( + | ss_item_sk int, + | ss_sales_price decimal(7,2) + |) using iceberg + |""".stripMargin) + + spark.sql(""" + |insert into store_sales_test_no_nulls_enhanced_true values + |(1, 200.00), + |(2, 200.00), + |(3, 200.00), + |(4, 199.98), + |(5, 199.96), + |(6, 199.96), + |(7, 199.92), + |(8, 199.92), + |(9, 199.92), + |(10, 199.90), + |(11, null) + |""".stripMargin) + + val result = spark + .sql("select max(ss_sales_price) from store_sales_test_no_nulls_enhanced_true where ss_sales_price is not null") + .collect() + + assert(result.length == 1, "Should return 1 row") + assert(result(0).get(0) != null, "MAX should not return NULL") + assert( + result(0).getDecimal(0).doubleValue() == 200.00, + s"MAX should return 200.00, but got ${result(0).get(0)}" + ) + } + } + } + + test("iceberg max aggregate with nulls enhanced false") { + withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "false") { + withTable("store_sales_test_nulls_enhanced_false") { + spark.sql(""" + |create table store_sales_test_nulls_enhanced_false ( + | ss_item_sk int, + | ss_sales_price decimal(7,2) + |) using iceberg + |""".stripMargin) + + spark.sql(""" + |insert into store_sales_test_nulls_enhanced_false values + |(1, 200.00), + |(2, 199.98), + |(3, null), + |(4, 199.96) + |""".stripMargin) + + val result = spark + .sql("select max(ss_sales_price) from store_sales_test_nulls_enhanced_false") + .collect() + + assert(result.length == 1, "Should return 1 row") + assert(result(0).get(0) != null, "MAX should not return NULL") + assert( + result(0).getDecimal(0).doubleValue() == 200.00, + s"MAX should return 200.00, but got ${result(0).get(0)}" + ) + } + } + } + + test("iceberg max aggregate without nulls enhanced false") { + withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "false") { + withTable("store_sales_test_no_nulls_enhanced_false") { + spark.sql(""" + |create table store_sales_test_no_nulls_enhanced_false ( + | ss_item_sk int, + | ss_sales_price decimal(7,2) + |) using iceberg + |""".stripMargin) + + spark.sql(""" + |insert into store_sales_test_no_nulls_enhanced_false values + |(1, 200.00), + |(2, 199.98), + |(3, 199.96), + |(4, null) + |""".stripMargin) + + val result = spark + .sql("select max(ss_sales_price) from store_sales_test_no_nulls_enhanced_false where ss_sales_price is not null") + .collect() + + assert(result.length == 1, "Should return 1 row") + assert(result(0).get(0) != null, "MAX should not return NULL") + assert( + result(0).getDecimal(0).doubleValue() == 200.00, + s"MAX should return 200.00, but got ${result(0).get(0)}" + ) + } + } + } } diff --git a/dev/HardwareConcurrency.h b/dev/HardwareConcurrency.h new file mode 100644 index 000000000000..d2af05b69f3c --- /dev/null +++ b/dev/HardwareConcurrency.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace folly { + +unsigned int hardware_concurrency() noexcept; + +unsigned int available_concurrency() noexcept { + return hardware_concurrency(); +} + +} // namespace folly diff --git a/dev/ci-velox-buildshared-centos-8.sh b/dev/ci-velox-buildshared-centos-8.sh index 48f7b4111585..6c050a816548 100755 --- a/dev/ci-velox-buildshared-centos-8.sh +++ b/dev/ci-velox-buildshared-centos-8.sh @@ -18,5 +18,7 @@ set -e source /opt/rh/gcc-toolset-11/enable +# Fixme: this is a hack to use new header, should upgrade folly instead. +cp dev/HardwareConcurrency.h /usr/local/include/folly/system/HardwareConcurrency.h ./dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=ON \ --build_examples=ON --build_benchmarks=ON diff --git a/dev/vcpkg/ports/folly/adding-api.patch b/dev/vcpkg/ports/folly/adding-api.patch new file mode 100644 index 000000000000..67973b93a089 --- /dev/null +++ b/dev/vcpkg/ports/folly/adding-api.patch @@ -0,0 +1,34 @@ +diff --git a/folly/system/HardwareConcurrency.cpp b/folly/system/HardwareConcurrency.cpp +index 71213c80a..cd1852034 100644 +--- a/folly/system/HardwareConcurrency.cpp ++++ b/folly/system/HardwareConcurrency.cpp +@@ -36,4 +36,18 @@ unsigned int hardware_concurrency() noexcept { + return std::thread::hardware_concurrency(); + } + ++unsigned int available_concurrency() noexcept { ++#if defined(__linux__) && !defined(__ANDROID__) ++ cpu_set_t cpuset; ++ if (!sched_getaffinity(0, sizeof(cpuset), &cpuset)) { ++ auto count = CPU_COUNT(&cpuset); ++ if (count != 0) { ++ return count; ++ } ++ } ++#endif ++ ++ return std::thread::hardware_concurrency(); ++} ++ + } // namespace folly +diff --git a/folly/system/HardwareConcurrency.h b/folly/system/HardwareConcurrency.h +index 7c497634c..71dfb5db1 100644 +--- a/folly/system/HardwareConcurrency.h ++++ b/folly/system/HardwareConcurrency.h +@@ -20,4 +20,6 @@ namespace folly { + + unsigned int hardware_concurrency() noexcept; + ++unsigned int available_concurrency() noexcept; ++ + } // namespace folly diff --git a/dev/vcpkg/ports/folly/portfile.cmake b/dev/vcpkg/ports/folly/portfile.cmake index cd352416345a..bc1c4e969342 100644 --- a/dev/vcpkg/ports/folly/portfile.cmake +++ b/dev/vcpkg/ports/folly/portfile.cmake @@ -13,6 +13,7 @@ vcpkg_from_github( disable-uninitialized-resize-on-new-stl.patch fix-unistd-include.patch fix-absolute-dir.patch + adding-api.patch ) file(REMOVE "${SOURCE_PATH}/CMake/FindFastFloat.cmake") file(REMOVE "${SOURCE_PATH}/CMake/FindFmt.cmake") diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh index c3e3cf0f427a..e39b146f704f 100755 --- a/ep/build-velox/src/get-velox.sh +++ b/ep/build-velox/src/get-velox.sh @@ -18,8 +18,8 @@ set -exu CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) VELOX_REPO=https://github.com/IBM/velox.git -VELOX_BRANCH=dft-2026_03_10-iceberg -VELOX_ENHANCED_BRANCH=ibm-2026_03_10 +VELOX_BRANCH=dft-2026_03_11-iceberg +VELOX_ENHANCED_BRANCH=temp-fix VELOX_HOME="" RUN_SETUP_SCRIPT=ON ENABLE_ENHANCED_FEATURES=OFF diff --git a/ep/build-velox/src/setup-centos8.sh b/ep/build-velox/src/setup-centos8.sh index e056ce71dd1f..ff69dc97ddf7 100755 --- a/ep/build-velox/src/setup-centos8.sh +++ b/ep/build-velox/src/setup-centos8.sh @@ -43,8 +43,9 @@ export CC=/opt/rh/gcc-toolset-11/root/bin/gcc export CXX=/opt/rh/gcc-toolset-11/root/bin/g++ DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)/deps-download} -FB_OS_VERSION="v2024.07.01.00" -FMT_VERSION="10.1.1" +FB_OS_VERSION="v2026.01.05.00" +FMT_VERSION="11.2.0" +FAST_FLOAT_VERSION="v8.0.2" BOOST_VERSION="boost-1.84.0" GEOS_VERSION="3.10.7" @@ -58,7 +59,7 @@ function install_build_prerequisites { dnf_install epel-release dnf-plugins-core # For ccache, ninja dnf config-manager --set-enabled powertools dnf update -y - dnf_install ninja-build curl ccache gcc-toolset-11 git wget which + dnf_install ninja-build curl ccache gcc-toolset-11 git wget which expat-devel gettext-devel dnf_install yasm dnf_install autoconf automake python39 python39-devel python39-pip libtool pip3.9 install cmake==3.28.3 @@ -78,6 +79,24 @@ function install_conda { dnf_install conda } +function install_git { + # Remove an older version if present. + dnf remove -y git + wget_and_untar https://github.com/git/git/archive/v2.52.0.tar.gz git + ( + cd ${DEPENDENCY_DIR}/git + make prefix=/usr/local all -j$(nproc) + make prefix=/usr/local install + ) +} + +function install_xxhash { + wget_and_untar https://github.com/Cyan4973/xxHash/archive/refs/tags/v0.8.1.tar.gz xxhash + ( + cd ${DEPENDENCY_DIR}/xxhash + make && make install + ) +} function install_gflags { # Remove an older version if present. @@ -136,6 +155,11 @@ function install_fizz { cmake_install_dir fizz/fizz -DBUILD_TESTS=OFF } +function install_fast_float { + wget_and_untar https://github.com/fastfloat/fast_float/archive/refs/tags/"${FAST_FLOAT_VERSION}".tar.gz fast_float + cmake_install_dir fast_float -DBUILD_TESTS=OFF +} + function install_folly { wget_and_untar https://github.com/facebook/folly/archive/refs/tags/${FB_OS_VERSION}.tar.gz folly cmake_install_dir folly -DFOLLY_HAVE_INT128_T=ON -DFOLLY_NO_EXCEPTION_TRACER=ON @@ -160,7 +184,7 @@ function install_duckdb { if $BUILD_DUCKDB ; then echo 'Building DuckDB' wget_and_untar https://github.com/duckdb/duckdb/archive/refs/tags/v0.8.1.tar.gz duckdb - cmake_install_dir duckdb -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF -DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE=Release + cmake_install_dir duckdb -DGIT_COMMIT_HASH="6536a77" -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF -DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE=Release fi } @@ -179,6 +203,7 @@ function install_geos { function install_velox_deps { run_and_time install_velox_deps_from_dnf + run_and_time install_git run_and_time install_conda run_and_time install_gflags run_and_time install_glog @@ -187,6 +212,8 @@ function install_velox_deps { run_and_time install_boost run_and_time install_protobuf run_and_time install_fmt + run_and_time install_fast_float + run_and_time install_xxhash run_and_time install_folly run_and_time install_fizz run_and_time install_wangle diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala index b4fa188f44e6..ed98bdc1a56c 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala @@ -118,7 +118,6 @@ trait HashJoinLikeExecTransformer extends BaseJoinExec with TransformSupport { fromFields.length == toFields.length && fromFields.zip(toFields).forall { case (l, r) => - l.name.equalsIgnoreCase(r.name) && sameType(l.dataType, r.dataType) } diff --git a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 0dadfa1d0bd8..2d693bcd9e01 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -780,7 +780,7 @@ class VeloxTestSettings extends BackendTestSettings { // Generated suites for org.apache.spark.sql enableSuite[GlutenCacheManagerSuite] enableSuite[GlutenDataFrameShowSuite] - // TODO: 4.x enableSuite[GlutenDataFrameSubquerySuite] // 1 failure + enableSuite[GlutenDataFrameSubquerySuite] enableSuite[GlutenDataFrameTableValuedFunctionsSuite] enableSuite[GlutenDataFrameTransposeSuite] enableSuite[GlutenDeprecatedDatasetAggregatorSuite]