From bd20b03dfc70e10ac7b7459ed4f4476e4405179a Mon Sep 17 00:00:00 2001 From: glutenperfbot Date: Tue, 17 Feb 2026 13:12:14 +0000 Subject: [PATCH 1/3] [GLUTEN-6887][VL] Daily Update Velox Version (dft-2026_02_17) Upstream Velox's New Commits: 54f466296 by Pedro Eugenio Rocha Pedreira, feat(cursor): Support parallel execution in TaskDebuggerCursor (#16384) 2a7eb0840 by Shanyue Wan, feat: Add resolveVectorFunctionWithMetadataWithCoercions (#16099) 6a4d205df by Suryadev Sahadevan Rajesh, docs: Add build badge for velox (#16383) 6dbd60423 by Abhinav Mukherjee, Add the map_update UDF (#15968) 9ea254547 by Bradley Dice, build(cuda): Add CUDA_VERSION build arg to adapters dockerfile (#16234) da458a4ea by Abhinav Mukherjee, Add the L2_NORM UDF (#15970) e7493488b by Muhammad Haseeb, fix(cudf): Fix Velox cuDF table scan tests (#16335) 2c26f11b7 by PHILO-HE, misc: Optimize Spark dayname function (#16194) c07126302 by Rui Mo, fix: Load reused lazy vector in filter project (#16108) f5fec6c03 by Ping Liu, feat: Add IcebergConfig (#16350) 8830b83f2 by Kent Yao, fix: Use XORShiftRandom in Spark rand function for compatibility (#16308) 44f99c3d3 by Shruti, feat(cudf): Enable precomputation support for join filters (#16212) c99aa5555 by Shruti Shivakumar, feat(cudf): Support full outer join in Velox-cuDF (#16229) 806c60fe6 by Jialiang Tan, feat: Add fast path to PrefixEncoding when no duplicates (#16321) 99b6b832f by Kent Yao, fix(test): Fix race condition in SkewedPartitionRebalancerTest.serializedRebalanceExecution (#16244) e839804c0 by Simon Eves, fix(cudf): Fix velox_cudf_s3_read_test (#16331) f41a1bc39 by Karthikeyan Natarajan, refactor(cudf): Refactor Cudf Driver Adapter and CudfLocalPartition (#16264) 0cbee9e96 by David Goode, feat(operator): Documentation for MixedUnion operator (#16345) 786c53a26 by Muhammad Haseeb, fix(cudf): Update cuDF dependency tree to fix debug builds (#16316) d02f09fcd by Deepak Majeti, feat(cudf): Use BufferedInput enqueue/load APIs (#16259) b3c6e3b02 by Kk Pulla, fix(expr): Use stable folly::hasher for expression hashing (#16284) a4b3458b5 by Ke Wang, feat: Add ssdFlushThresholdBytes options to SSD cache (#16313) 818f7f12c by Jiahao Liang, Populate WS IO stats for Velox SST Writer (#16326) 6fc36a796 by Pedro Eugenio Rocha Pedreira, docs(blog): velox::StringView API Changes and Best Practices (#16333) f1f6e8e34 by Xiaoxuan Meng, feat: Add IndexReader interface for index-based lookups (#16330) b88ce66a7 by Jialiang Tan, feat: Add overriding file create config for different operators (#16318) 887721970 by Bradley Dice, fix(cudf): Add missing Folly::folly dependency to velox_cudf_config_test (#16319) 80ad4c170 by jiangtian, fix: Call prepareReuse on argVectors in AggregateWindow to clear string buffers (#15680) 6f5a4853d by Xiaoxuan Meng, feat: Add batched index bounds encoding support to KeyEncoder (#16329) 62c4a0615 by Xiaoxuan Meng, fix: Backout "Flush row group by buffered bytes in parquet writer" (#16317) 0122842cd by Jialiang Tan, refactor: Move file io stats inside spill stats (#16255) d9caff3de by Abhinav Mukherjee, Add custom fuzzer tests for array_subset UDF (#16027) a88d36cd6 by Jialiang Tan, fix: Fix flaky SkewedPartitionRebalancerTest.serializedRebalanceExecution (#16300) e21b995f0 by Masha Basmanova, feat: Add EnforceDistinct operator (#16297) f0a3b54a4 by Kent Yao, feat(sparksql): Add transform with index parameter support (#16211) f5d95719a by Pedro Eugenio Rocha Pedreira, feat(python): Hook support in LocalDebuggerRunner (#16291) 40f3787ab by Ping Liu, refactor(parquet): Arrow writer to align with Velox coding standards (#16295) 0e99bded8 by Masha Basmanova, test: Enhance parsing of IF expressions in DuckParser (#16294) 15da8e764 by Devavret Makkar, perf(cudf): Fix several usages of default stream (#16258) 6984fc2cd by Christian Zentgraf, fix(build): Define serialize function in CudfHiveConnectorSplit (#16287) 5ce6c4827 by Rui Mo, misc: Make `requests_` and `coalescedLoads_` protected (#16249) b7ac8b584 by Kent Yao, feat(function): Add randstr Spark function (#16014) 022acc10f by Masha Basmanova, fix: Rename Type::isUnKnown() to isUnknown() (#16292) b3b3ee2d0 by Natasha Sehgal, feat: Add ExprSet and ExprMap type aliases using folly::F14 for expression deduplication (#16272) 6e01ab2c1 by wecharyu, feat: Flush row group by buffered bytes in parquet writer (#15751) 8c1a8aa1f by Ping Liu, feat(parquet): Add NaN statistics to Parquet writer (#14725) b9e6b55de by Kent Yao, feat: Add Spark monthname function (#16011) c2d2181be by Mariam Almesfer, test: Validate ANSI support for Spark CAST(decimal as string) (#16124) df3455499 by Chengcheng Jin, feat(sparksql): Add Spark to_pretty_string function (#16245) d42096b97 by David Goode, feat(operator): Velox MixedUnion support (#16184) 843bea84c by Mohammad Linjawi, test: Validate Spark string-to-date cast (#16092) ad7805bf2 by Pedro Eugenio Rocha Pedreira, feat(cursor): Support custom callbacks on breakpoints (#16267) Signed-off-by: glutenperfbot --- ep/build-velox/src/get-velox.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh index 1f54431a1458..c395089c7252 100755 --- a/ep/build-velox/src/get-velox.sh +++ b/ep/build-velox/src/get-velox.sh @@ -18,8 +18,8 @@ set -exu CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) VELOX_REPO=https://github.com/IBM/velox.git -VELOX_BRANCH=dft-2026_02_06 -VELOX_ENHANCED_BRANCH=ibm-2026_02_06 +VELOX_BRANCH=dft-2026_02_17 +VELOX_ENHANCED_BRANCH=ibm-2026_02_17 VELOX_HOME="" RUN_SETUP_SCRIPT=ON ENABLE_ENHANCED_FEATURES=OFF From 9e8a1ed36be8d2af2b51fdcfea6fef9c28d25227 Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Tue, 17 Feb 2026 17:47:40 +0000 Subject: [PATCH 2/3] Fix compilation --- cpp/velox/memory/GlutenBufferedInputBuilder.h | 10 +++++----- cpp/velox/memory/GlutenDirectBufferedInput.h | 6 +++--- cpp/velox/utils/VeloxWriterUtils.cc | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/velox/memory/GlutenBufferedInputBuilder.h b/cpp/velox/memory/GlutenBufferedInputBuilder.h index 86116ff1e88e..25b05f98707e 100644 --- a/cpp/velox/memory/GlutenBufferedInputBuilder.h +++ b/cpp/velox/memory/GlutenBufferedInputBuilder.h @@ -30,8 +30,8 @@ class GlutenBufferedInputBuilder : public facebook::velox::connector::hive::Buff const facebook::velox::FileHandle& fileHandle, const facebook::velox::dwio::common::ReaderOptions& readerOpts, const facebook::velox::connector::ConnectorQueryCtx* connectorQueryCtx, - std::shared_ptr ioStats, - std::shared_ptr fsStats, + std::shared_ptr ioStatistics, + std::shared_ptr ioStats, folly::Executor* executor, const folly::F14FastMap& fileReadOps = {}) override { if (connectorQueryCtx->cache()) { @@ -42,8 +42,8 @@ class GlutenBufferedInputBuilder : public facebook::velox::connector::hive::Buff connectorQueryCtx->cache(), facebook::velox::connector::Connector::getTracker(connectorQueryCtx->scanId(), readerOpts.loadQuantum()), fileHandle.groupId, - ioStats, - std::move(fsStats), + std::move(ioStatistics), + std::move(ioStats), executor, readerOpts, fileReadOps); @@ -54,8 +54,8 @@ class GlutenBufferedInputBuilder : public facebook::velox::connector::hive::Buff fileHandle.uuid, facebook::velox::connector::Connector::getTracker(connectorQueryCtx->scanId(), readerOpts.loadQuantum()), fileHandle.groupId, + std::move(ioStatistics), std::move(ioStats), - std::move(fsStats), executor, readerOpts, fileReadOps); diff --git a/cpp/velox/memory/GlutenDirectBufferedInput.h b/cpp/velox/memory/GlutenDirectBufferedInput.h index edaff5c603d8..3aef323da7bc 100644 --- a/cpp/velox/memory/GlutenDirectBufferedInput.h +++ b/cpp/velox/memory/GlutenDirectBufferedInput.h @@ -29,8 +29,8 @@ class GlutenDirectBufferedInput : public facebook::velox::dwio::common::DirectBu facebook::velox::StringIdLease fileNum, std::shared_ptr tracker, facebook::velox::StringIdLease groupId, - std::shared_ptr ioStats, - std::shared_ptr fsStats, + std::shared_ptr ioStatistics, + std::shared_ptr ioStats, folly::Executor* executor, const facebook::velox::io::ReaderOptions& readerOptions, folly::F14FastMap fileReadOps = {}) @@ -40,8 +40,8 @@ class GlutenDirectBufferedInput : public facebook::velox::dwio::common::DirectBu std::move(fileNum), std::move(tracker), std::move(groupId), + std::move(ioStatistics), std::move(ioStats), - std::move(fsStats), executor, readerOptions, std::move(fileReadOps)) {} diff --git a/cpp/velox/utils/VeloxWriterUtils.cc b/cpp/velox/utils/VeloxWriterUtils.cc index 50e4ca601e16..026418a223c4 100644 --- a/cpp/velox/utils/VeloxWriterUtils.cc +++ b/cpp/velox/utils/VeloxWriterUtils.cc @@ -61,7 +61,7 @@ std::unique_ptr makeParquetWriteOption(const std::unordered_mapsecond; if (parquetGzipWindowSizeStr == kGzipWindowSize4k) { auto codecOptions = std::make_shared(); - codecOptions->window_bits = kGzipWindowBits4k; + codecOptions->windowBits = kGzipWindowBits4k; writeOption->codecOptions = std::move(codecOptions); } } @@ -77,7 +77,7 @@ std::unique_ptr makeParquetWriteOption(const std::unordered_map(); auto it = sparkConfs.find(kParquetZSTDCompressionLevel); auto compressionLevel = it != sparkConfs.end() ? std::stoi(it->second) : kZSTDDefaultCompressionLevel; - codecOptions->compression_level = compressionLevel; + codecOptions->compressionLevel = compressionLevel; writeOption->codecOptions = std::move(codecOptions); } else if (boost::iequals(compressionCodecStr, "uncompressed")) { compressionCodec = CompressionKind::CompressionKind_NONE; From 001d1c4776dfd87968e7b45753ec00897cc72edd Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Wed, 18 Feb 2026 16:55:42 +0000 Subject: [PATCH 3/3] Test --- .github/workflows/velox_backend_x86.yml | 2 +- .../gluten/execution/VeloxIcebergSuite.scala | 3 ++ .../execution/VeloxTPCHIcebergSuite.scala | 3 ++ .../results/group-by-ordinal.sql.out | 12 ++--- .../sql-tests/results/group-by.sql.out | 2 +- .../sql-tests/results/random.sql.out | 8 ++-- .../results/group-by-ordinal.sql.out | 12 ++--- .../sql-tests/results/group-by.sql.out | 2 +- .../sql-tests/results/random.sql.out | 8 ++-- .../results/group-by-ordinal.sql.out | 12 ++--- .../sql-tests/results/group-by.sql.out | 2 +- .../sql-tests/results/random.sql.out | 8 ++-- .../results/group-by-ordinal.sql.out | 12 ++--- .../sql-tests/results/group-by.sql.out | 2 +- .../sql-tests/results/random.sql.out | 8 ++-- .../results/table-valued-functions.sql.out | 2 +- .../sql/GlutenGeneratorFunctionSuite.scala | 2 +- .../udaf/udaf-group-by-ordinal.sql.out | 6 +-- .../utils/velox/VeloxTestSettings.scala | 2 - .../sql/GlutenGeneratorFunctionSuite.scala | 2 +- .../GlutenTakeOrderedAndProjectSuite.scala | 46 +------------------ .../udaf/udaf-group-by-ordinal.sql.out | 6 +-- .../utils/velox/VeloxTestSettings.scala | 2 - .../sql/GlutenGeneratorFunctionSuite.scala | 2 +- .../GlutenTakeOrderedAndProjectSuite.scala | 46 +------------------ 25 files changed, 65 insertions(+), 147 deletions(-) diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml index 6edd8162ea13..3ac8edc42445 100644 --- a/.github/workflows/velox_backend_x86.yml +++ b/.github/workflows/velox_backend_x86.yml @@ -821,7 +821,7 @@ jobs: java -version export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l $SPARK_HOME - $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Piceberg-test -Pdelta -Phudi -Ppaimon -Pspark-ut \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest \ -DargLine="-Dspark.test.home=$SPARK_HOME" - name: Upload test report diff --git a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala index edb30dac61fa..de9eaba59791 100644 --- a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala +++ b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala @@ -16,4 +16,7 @@ */ package org.apache.gluten.execution +import org.apache.gluten.tags.SkipTest + +@SkipTest class VeloxIcebergSuite extends IcebergSuite diff --git a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala index 5456e0ba89df..42820c04dcaf 100644 --- a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala +++ b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala @@ -16,6 +16,8 @@ */ package org.apache.gluten.execution +import org.apache.gluten.tags.SkipTest + import org.apache.spark.SparkConf import org.apache.spark.sql.functions.col @@ -23,6 +25,7 @@ import org.apache.iceberg.spark.SparkWriteOptions import java.io.File +@SkipTest class VeloxTPCHIcebergSuite extends VeloxTPCHSuite { protected val tpchBasePath: String = getClass.getResource("/").getPath + "../../../src/test/resources" diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out index cc20dd33e0b2..92e4a861fa1a 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out @@ -141,12 +141,12 @@ from -- !query schema struct -- !query output -1 0.5488135024422883 1 -1 0.7151893651681639 2 -2 0.5448831775801376 2 -2 0.6027633705776989 1 -3 0.4236547969336536 1 -3 0.6458941151817286 2 +1 0.5234194256885571 2 +1 0.7604953758285915 1 +2 0.0953472826424725 1 +2 0.3163249920547614 2 +3 0.2710259815484829 2 +3 0.7141011170991605 1 -- !query diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out index b54621f8ec0c..a12e830c1117 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out @@ -662,5 +662,5 @@ GROUP BY a IS NULL -- !query schema struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint> -- !query output -0.5488135024422883 7 +0.7604953758285915 7 1.0 2 diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out index a9d334e7f2b1..b269d40c3566 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out @@ -7,7 +7,7 @@ SELECT rand(0) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -15,7 +15,7 @@ SELECT rand(cast(3 / 7 AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -23,7 +23,7 @@ SELECT rand(NULL) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -31,7 +31,7 @@ SELECT rand(cast(NULL AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out index cc20dd33e0b2..92e4a861fa1a 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out +++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out @@ -141,12 +141,12 @@ from -- !query schema struct -- !query output -1 0.5488135024422883 1 -1 0.7151893651681639 2 -2 0.5448831775801376 2 -2 0.6027633705776989 1 -3 0.4236547969336536 1 -3 0.6458941151817286 2 +1 0.5234194256885571 2 +1 0.7604953758285915 1 +2 0.0953472826424725 1 +2 0.3163249920547614 2 +3 0.2710259815484829 2 +3 0.7141011170991605 1 -- !query diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out index 408b9f9425bf..48b35bf1e0d8 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out @@ -691,7 +691,7 @@ GROUP BY a IS NULL -- !query schema struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint> -- !query output -0.5488135024422883 7 +0.7604953758285915 7 1.0 2 diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out index a9d334e7f2b1..b269d40c3566 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out +++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out @@ -7,7 +7,7 @@ SELECT rand(0) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -15,7 +15,7 @@ SELECT rand(cast(3 / 7 AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -23,7 +23,7 @@ SELECT rand(NULL) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -31,7 +31,7 @@ SELECT rand(cast(NULL AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out index 5b8637012e57..bf85afd626af 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out +++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out @@ -208,12 +208,12 @@ from -- !query schema struct -- !query output -1 0.5488135024422883 1 -1 0.7151893651681639 2 -2 0.5448831775801376 2 -2 0.6027633705776989 1 -3 0.4236547969336536 1 -3 0.6458941151817286 2 +1 0.5234194256885571 2 +1 0.7604953758285915 1 +2 0.0953472826424725 1 +2 0.3163249920547614 2 +3 0.2710259815484829 2 +3 0.7141011170991605 1 -- !query select * from data group by a, b, 1 diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out index a4a3f76fa6a7..f56420926050 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out @@ -907,7 +907,7 @@ GROUP BY a IS NULL -- !query schema struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint> -- !query output -0.5488135024422883 7 +0.7604953758285915 7 1.0 2 diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out index f8460c1d437e..dea2c69ba035 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out +++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out @@ -4,7 +4,7 @@ SELECT rand(0) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -12,7 +12,7 @@ SELECT rand(cast(3 / 7 AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -20,7 +20,7 @@ SELECT rand(NULL) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -28,7 +28,7 @@ SELECT rand(cast(NULL AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query diff --git a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out index b968b4e09fac..0f29c27268c8 100644 --- a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out +++ b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out @@ -208,12 +208,12 @@ from -- !query schema struct -- !query output -1 0.5488135024422883 1 -1 0.7151893651681639 2 -2 0.5448831775801376 2 -2 0.6027633705776989 1 -3 0.4236547969336536 1 -3 0.6458941151817286 2 +1 0.5234194256885571 2 +1 0.7604953758285915 1 +2 0.0953472826424725 1 +2 0.3163249920547614 2 +3 0.2710259815484829 2 +3 0.7141011170991605 1 -- !query diff --git a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out index db0b74cd6ac7..4e3a176ba9c6 100644 --- a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out @@ -907,7 +907,7 @@ GROUP BY a IS NULL -- !query schema struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint> -- !query output -0.5488135024422883 7 +0.7604953758285915 7 1.0 2 diff --git a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out index 17e6f871b9c5..8a182a0646ca 100644 --- a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out +++ b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out @@ -4,7 +4,7 @@ SELECT rand(0) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -12,7 +12,7 @@ SELECT rand(cast(3 / 7 AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -20,7 +20,7 @@ SELECT rand(NULL) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query @@ -28,7 +28,7 @@ SELECT rand(cast(NULL AS int)) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query diff --git a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out index 0d5675fa6fde..1995e9e87b3a 100644 --- a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out +++ b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out @@ -247,7 +247,7 @@ select * from explode(array(rand(0))) -- !query schema struct -- !query output -0.5488135024422883 +0.7604953758285915 -- !query diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala index b3d51e802985..2050237e0d9e 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala @@ -22,7 +22,7 @@ class GlutenGeneratorFunctionSuite extends GeneratorFunctionSuite with GlutenSQL testGluten("SPARK-45171: Handle evaluated nondeterministic expression") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { val df = sql("select explode(array(rand(0)))") - checkAnswer(df, Row(0.5488135024422883)) + checkAnswer(df, Row(0.7604953758285915)) } } } diff --git a/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out index 45a19ba2c3f1..5eb1f6689ba9 100644 --- a/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out +++ b/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out @@ -139,11 +139,11 @@ from -- !query schema struct -- !query output -1 0.5234194256885571 1 +1 0.5234194256885571 2 1 0.7604953758285915 1 2 0.0953472826424725 1 -2 0.3163249920547614 1 -3 0.2710259815484829 1 +2 0.3163249920547614 2 +3 0.2710259815484829 2 3 0.7141011170991605 1 diff --git a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index f5c9d22db6ac..6e94d4cc0ea4 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -742,8 +742,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenSQLWindowFunctionSuite] .exclude("test with low buffer spill threshold") enableSuite[GlutenTakeOrderedAndProjectSuite] - // The results of rand() differ between vanilla spark and velox. - .exclude("SPARK-47104: Non-deterministic expressions in projection") enableSuite[GlutenSessionExtensionSuite] enableSuite[TestFileSourceScanExecTransformer] enableSuite[GlutenBucketedReadWithoutHiveSupportSuite] diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala index b3d51e802985..2050237e0d9e 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala @@ -22,7 +22,7 @@ class GlutenGeneratorFunctionSuite extends GeneratorFunctionSuite with GlutenSQL testGluten("SPARK-45171: Handle evaluated nondeterministic expression") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { val df = sql("select explode(array(rand(0)))") - checkAnswer(df, Row(0.5488135024422883)) + checkAnswer(df, Row(0.7604953758285915)) } } } diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala index 2731e0547111..bc231e52adc3 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala @@ -16,50 +16,8 @@ */ package org.apache.spark.sql.execution -import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row} -import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Rand} -import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.sql.GlutenSQLTestsBaseTrait class GlutenTakeOrderedAndProjectSuite extends TakeOrderedAndProjectSuite - with GlutenSQLTestsBaseTrait { - - private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan) - - testGluten("SPARK-47104: Non-deterministic expressions in projection") { - val expected = (input: SparkPlan) => { - GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true, input))) - } - val schema = StructType.fromDDL("a int, b int, c double") - val rdd = sparkContext.parallelize( - Seq( - Row(1, 2, 0.6027633705776989d), - Row(2, 3, 0.7151893651681639d), - Row(3, 4, 0.5488135024422883d)), - 1) - val df = spark.createDataFrame(rdd, schema) - val projection = df.queryExecution.sparkPlan.output.take(2) :+ - Alias(Rand(Literal(0, IntegerType)), "_uuid")() - - // test executeCollect - checkThatPlansAgree( - df, - input => - TakeOrderedAndProjectExec(limit, sortOrder, projection, SortExec(sortOrder, false, input)), - input => expected(input), - sortAnswers = false) - - // test doExecute - checkThatPlansAgree( - df, - input => - noOpFilter( - TakeOrderedAndProjectExec( - limit, - sortOrder, - projection, - SortExec(sortOrder, false, input))), - input => expected(input), - sortAnswers = false) - } -} + with GlutenSQLTestsBaseTrait {} diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out index 45a19ba2c3f1..5eb1f6689ba9 100644 --- a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out +++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out @@ -139,11 +139,11 @@ from -- !query schema struct -- !query output -1 0.5234194256885571 1 +1 0.5234194256885571 2 1 0.7604953758285915 1 2 0.0953472826424725 1 -2 0.3163249920547614 1 -3 0.2710259815484829 1 +2 0.3163249920547614 2 +3 0.2710259815484829 2 3 0.7141011170991605 1 diff --git a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index e8f8dfa76253..2b9e4555d716 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -708,8 +708,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenSQLWindowFunctionSuite] .exclude("test with low buffer spill threshold") enableSuite[GlutenTakeOrderedAndProjectSuite] - // The results of rand() differ between vanilla spark and velox. - .exclude("SPARK-47104: Non-deterministic expressions in projection") enableSuite[GlutenSessionExtensionSuite] enableSuite[TestFileSourceScanExecTransformer] enableSuite[GlutenBucketedReadWithoutHiveSupportSuite] diff --git a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala index b3d51e802985..2050237e0d9e 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala @@ -22,7 +22,7 @@ class GlutenGeneratorFunctionSuite extends GeneratorFunctionSuite with GlutenSQL testGluten("SPARK-45171: Handle evaluated nondeterministic expression") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { val df = sql("select explode(array(rand(0)))") - checkAnswer(df, Row(0.5488135024422883)) + checkAnswer(df, Row(0.7604953758285915)) } } } diff --git a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala index 2731e0547111..bc231e52adc3 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala @@ -16,50 +16,8 @@ */ package org.apache.spark.sql.execution -import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row} -import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Rand} -import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.sql.GlutenSQLTestsBaseTrait class GlutenTakeOrderedAndProjectSuite extends TakeOrderedAndProjectSuite - with GlutenSQLTestsBaseTrait { - - private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan) - - testGluten("SPARK-47104: Non-deterministic expressions in projection") { - val expected = (input: SparkPlan) => { - GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true, input))) - } - val schema = StructType.fromDDL("a int, b int, c double") - val rdd = sparkContext.parallelize( - Seq( - Row(1, 2, 0.6027633705776989d), - Row(2, 3, 0.7151893651681639d), - Row(3, 4, 0.5488135024422883d)), - 1) - val df = spark.createDataFrame(rdd, schema) - val projection = df.queryExecution.sparkPlan.output.take(2) :+ - Alias(Rand(Literal(0, IntegerType)), "_uuid")() - - // test executeCollect - checkThatPlansAgree( - df, - input => - TakeOrderedAndProjectExec(limit, sortOrder, projection, SortExec(sortOrder, false, input)), - input => expected(input), - sortAnswers = false) - - // test doExecute - checkThatPlansAgree( - df, - input => - noOpFilter( - TakeOrderedAndProjectExec( - limit, - sortOrder, - projection, - SortExec(sortOrder, false, input))), - input => expected(input), - sortAnswers = false) - } -} + with GlutenSQLTestsBaseTrait {}