Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions .github/workflows/velox_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/ && \
./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg/hudi support for spark-4.1 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -287,8 +288,8 @@ jobs:
./build/mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
./build/mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg support for spark-4.1 with java-17 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg/hudi support for spark-4.1 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -323,8 +324,8 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/ && \
./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg support for spark-4.1 with java-21 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg/hudi support for spark-4.1 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -486,8 +487,8 @@ jobs:
./build/mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
./build/mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg support for spark-4.1 with java-17 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
# iceberg/hudi support for spark-4.1 with java-17 is not avaiable, skip it first
./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pceleborn -Puniffle -Pdelta -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.gluten.execution.enhanced

import org.apache.gluten.config.VeloxConfig
import org.apache.gluten.execution._
import org.apache.gluten.tags.EnhancedFeaturesTest

Expand Down Expand Up @@ -383,4 +384,139 @@ class VeloxIcebergSuite extends IcebergSuite {
}
}
}
test("iceberg max aggregate with nulls enhanced true") {
withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "true") {
withTable("store_sales_test_nulls_enhanced_true") {
spark.sql("""
|create table store_sales_test_nulls_enhanced_true (
| ss_item_sk int,
| ss_sales_price decimal(7,2)
|) using iceberg
|""".stripMargin)

spark.sql("""
|insert into store_sales_test_nulls_enhanced_true values
|(1, 200.00),
|(2, 200.00),
|(3, null),
|(4, 199.98),
|(5, 199.96)
|""".stripMargin)

val result = spark
.sql("select max(ss_sales_price) from store_sales_test_nulls_enhanced_true")
.collect()

assert(result.length == 1, "Should return 1 row")
assert(result(0).get(0) != null, "MAX should not return NULL")
assert(
result(0).getDecimal(0).doubleValue() == 200.00,
s"MAX should return 200.00, but got ${result(0).get(0)}"
)
}
}
}

test("iceberg max aggregate without nulls enhanced true") {
withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "true") {
withTable("store_sales_test_no_nulls_enhanced_true") {
spark.sql("""
|create table store_sales_test_no_nulls_enhanced_true (
| ss_item_sk int,
| ss_sales_price decimal(7,2)
|) using iceberg
|""".stripMargin)

spark.sql("""
|insert into store_sales_test_no_nulls_enhanced_true values
|(1, 200.00),
|(2, 200.00),
|(3, 200.00),
|(4, 199.98),
|(5, 199.96),
|(6, 199.96),
|(7, 199.92),
|(8, 199.92),
|(9, 199.92),
|(10, 199.90),
|(11, null)
|""".stripMargin)

val result = spark
.sql("select max(ss_sales_price) from store_sales_test_no_nulls_enhanced_true where ss_sales_price is not null")
.collect()

assert(result.length == 1, "Should return 1 row")
assert(result(0).get(0) != null, "MAX should not return NULL")
assert(
result(0).getDecimal(0).doubleValue() == 200.00,
s"MAX should return 200.00, but got ${result(0).get(0)}"
)
}
}
}

test("iceberg max aggregate with nulls enhanced false") {
withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "false") {
withTable("store_sales_test_nulls_enhanced_false") {
spark.sql("""
|create table store_sales_test_nulls_enhanced_false (
| ss_item_sk int,
| ss_sales_price decimal(7,2)
|) using iceberg
|""".stripMargin)

spark.sql("""
|insert into store_sales_test_nulls_enhanced_false values
|(1, 200.00),
|(2, 199.98),
|(3, null),
|(4, 199.96)
|""".stripMargin)

val result = spark
.sql("select max(ss_sales_price) from store_sales_test_nulls_enhanced_false")
.collect()

assert(result.length == 1, "Should return 1 row")
assert(result(0).get(0) != null, "MAX should not return NULL")
assert(
result(0).getDecimal(0).doubleValue() == 200.00,
s"MAX should return 200.00, but got ${result(0).get(0)}"
)
}
}
}

test("iceberg max aggregate without nulls enhanced false") {
withSQLConf(VeloxConfig.ENABLE_ENHANCED_FEATURES.key -> "false") {
withTable("store_sales_test_no_nulls_enhanced_false") {
spark.sql("""
|create table store_sales_test_no_nulls_enhanced_false (
| ss_item_sk int,
| ss_sales_price decimal(7,2)
|) using iceberg
|""".stripMargin)

spark.sql("""
|insert into store_sales_test_no_nulls_enhanced_false values
|(1, 200.00),
|(2, 199.98),
|(3, 199.96),
|(4, null)
|""".stripMargin)

val result = spark
.sql("select max(ss_sales_price) from store_sales_test_no_nulls_enhanced_false where ss_sales_price is not null")
.collect()

assert(result.length == 1, "Should return 1 row")
assert(result(0).get(0) != null, "MAX should not return NULL")
assert(
result(0).getDecimal(0).doubleValue() == 200.00,
s"MAX should return 200.00, but got ${result(0).get(0)}"
)
}
}
}
}
27 changes: 27 additions & 0 deletions dev/HardwareConcurrency.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace folly {

unsigned int hardware_concurrency() noexcept;

unsigned int available_concurrency() noexcept {
return hardware_concurrency();
}

} // namespace folly
2 changes: 2 additions & 0 deletions dev/ci-velox-buildshared-centos-8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,7 @@
set -e

source /opt/rh/gcc-toolset-11/enable
# Fixme: this is a hack to use new header, should upgrade folly instead.
cp dev/HardwareConcurrency.h /usr/local/include/folly/system/HardwareConcurrency.h
./dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --build_tests=ON \
--build_examples=ON --build_benchmarks=ON
34 changes: 34 additions & 0 deletions dev/vcpkg/ports/folly/adding-api.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
diff --git a/folly/system/HardwareConcurrency.cpp b/folly/system/HardwareConcurrency.cpp
index 71213c80a..cd1852034 100644
--- a/folly/system/HardwareConcurrency.cpp
+++ b/folly/system/HardwareConcurrency.cpp
@@ -36,4 +36,18 @@ unsigned int hardware_concurrency() noexcept {
return std::thread::hardware_concurrency();
}

+unsigned int available_concurrency() noexcept {
+#if defined(__linux__) && !defined(__ANDROID__)
+ cpu_set_t cpuset;
+ if (!sched_getaffinity(0, sizeof(cpuset), &cpuset)) {
+ auto count = CPU_COUNT(&cpuset);
+ if (count != 0) {
+ return count;
+ }
+ }
+#endif
+
+ return std::thread::hardware_concurrency();
+}
+
} // namespace folly
diff --git a/folly/system/HardwareConcurrency.h b/folly/system/HardwareConcurrency.h
index 7c497634c..71dfb5db1 100644
--- a/folly/system/HardwareConcurrency.h
+++ b/folly/system/HardwareConcurrency.h
@@ -20,4 +20,6 @@ namespace folly {

unsigned int hardware_concurrency() noexcept;

+unsigned int available_concurrency() noexcept;
+
} // namespace folly
1 change: 1 addition & 0 deletions dev/vcpkg/ports/folly/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ vcpkg_from_github(
disable-uninitialized-resize-on-new-stl.patch
fix-unistd-include.patch
fix-absolute-dir.patch
adding-api.patch
)
file(REMOVE "${SOURCE_PATH}/CMake/FindFastFloat.cmake")
file(REMOVE "${SOURCE_PATH}/CMake/FindFmt.cmake")
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get-velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ set -exu

CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
VELOX_BRANCH=dft-2026_03_10-iceberg
VELOX_ENHANCED_BRANCH=ibm-2026_03_10
VELOX_BRANCH=dft-2026_03_11-iceberg
VELOX_ENHANCED_BRANCH=temp-fix
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
ENABLE_ENHANCED_FEATURES=OFF
Expand Down
35 changes: 31 additions & 4 deletions ep/build-velox/src/setup-centos8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ export CC=/opt/rh/gcc-toolset-11/root/bin/gcc
export CXX=/opt/rh/gcc-toolset-11/root/bin/g++
DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)/deps-download}

FB_OS_VERSION="v2024.07.01.00"
FMT_VERSION="10.1.1"
FB_OS_VERSION="v2026.01.05.00"
FMT_VERSION="11.2.0"
FAST_FLOAT_VERSION="v8.0.2"
BOOST_VERSION="boost-1.84.0"
GEOS_VERSION="3.10.7"

Expand All @@ -58,7 +59,7 @@ function install_build_prerequisites {
dnf_install epel-release dnf-plugins-core # For ccache, ninja
dnf config-manager --set-enabled powertools
dnf update -y
dnf_install ninja-build curl ccache gcc-toolset-11 git wget which
dnf_install ninja-build curl ccache gcc-toolset-11 git wget which expat-devel gettext-devel
dnf_install yasm
dnf_install autoconf automake python39 python39-devel python39-pip libtool
pip3.9 install cmake==3.28.3
Expand All @@ -78,6 +79,24 @@ function install_conda {
dnf_install conda
}

function install_git {
# Remove an older version if present.
dnf remove -y git
wget_and_untar https://github.com/git/git/archive/v2.52.0.tar.gz git
(
cd ${DEPENDENCY_DIR}/git
make prefix=/usr/local all -j$(nproc)
make prefix=/usr/local install
)
}

function install_xxhash {
wget_and_untar https://github.com/Cyan4973/xxHash/archive/refs/tags/v0.8.1.tar.gz xxhash
(
cd ${DEPENDENCY_DIR}/xxhash
make && make install
)
}

function install_gflags {
# Remove an older version if present.
Expand Down Expand Up @@ -136,6 +155,11 @@ function install_fizz {
cmake_install_dir fizz/fizz -DBUILD_TESTS=OFF
}

function install_fast_float {
wget_and_untar https://github.com/fastfloat/fast_float/archive/refs/tags/"${FAST_FLOAT_VERSION}".tar.gz fast_float
cmake_install_dir fast_float -DBUILD_TESTS=OFF
}

function install_folly {
wget_and_untar https://github.com/facebook/folly/archive/refs/tags/${FB_OS_VERSION}.tar.gz folly
cmake_install_dir folly -DFOLLY_HAVE_INT128_T=ON -DFOLLY_NO_EXCEPTION_TRACER=ON
Expand All @@ -160,7 +184,7 @@ function install_duckdb {
if $BUILD_DUCKDB ; then
echo 'Building DuckDB'
wget_and_untar https://github.com/duckdb/duckdb/archive/refs/tags/v0.8.1.tar.gz duckdb
cmake_install_dir duckdb -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF -DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE=Release
cmake_install_dir duckdb -DGIT_COMMIT_HASH="6536a77" -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF -DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE=Release
fi
}

Expand All @@ -179,6 +203,7 @@ function install_geos {

function install_velox_deps {
run_and_time install_velox_deps_from_dnf
run_and_time install_git
run_and_time install_conda
run_and_time install_gflags
run_and_time install_glog
Expand All @@ -187,6 +212,8 @@ function install_velox_deps {
run_and_time install_boost
run_and_time install_protobuf
run_and_time install_fmt
run_and_time install_fast_float
run_and_time install_xxhash
run_and_time install_folly
run_and_time install_fizz
run_and_time install_wangle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ trait HashJoinLikeExecTransformer extends BaseJoinExec with TransformSupport {
fromFields.length == toFields.length &&
fromFields.zip(toFields).forall {
case (l, r) =>
l.name.equalsIgnoreCase(r.name) &&
sameType(l.dataType, r.dataType)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ class VeloxTestSettings extends BackendTestSettings {
// Generated suites for org.apache.spark.sql
enableSuite[GlutenCacheManagerSuite]
enableSuite[GlutenDataFrameShowSuite]
// TODO: 4.x enableSuite[GlutenDataFrameSubquerySuite] // 1 failure
enableSuite[GlutenDataFrameSubquerySuite]
enableSuite[GlutenDataFrameTableValuedFunctionsSuite]
enableSuite[GlutenDataFrameTransposeSuite]
enableSuite[GlutenDeprecatedDatasetAggregatorSuite]
Expand Down
Loading