From adb5cf44ebced5db5accfb2235eb55efbd50ce30 Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Wed, 4 Feb 2026 15:05:25 +0000 Subject: [PATCH 1/3] Test branch --- ep/build-velox/src/get-velox.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh index 8d658a83ddde..4c1fa46e46a9 100755 --- a/ep/build-velox/src/get-velox.sh +++ b/ep/build-velox/src/get-velox.sh @@ -17,8 +17,8 @@ set -exu CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) -VELOX_REPO=https://github.com/IBM/velox.git -VELOX_BRANCH=dft-2026_01_30 +VELOX_REPO=https://github.com/rui-mo/velox.git +VELOX_BRANCH=test_fb VELOX_ENHANCED_BRANCH=ibm-2026_01_30 VELOX_HOME="" RUN_SETUP_SCRIPT=ON From e8fe9dd659566e5ffd39328f5d2674dca2c39b3d Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 1 May 2025 17:12:57 +0100 Subject: [PATCH 2/3] [VL] test arrow + new thrift Signed-off-by: Yuan Zhou --- dev/ci-velox-buildstatic-centos-7.sh | 2 +- ep/build-velox/src/modify_arrow.patch | 68 ++++++++++++++++----------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/dev/ci-velox-buildstatic-centos-7.sh b/dev/ci-velox-buildstatic-centos-7.sh index 8d992005000e..5b6dea934706 100755 --- a/dev/ci-velox-buildstatic-centos-7.sh +++ b/dev/ci-velox-buildstatic-centos-7.sh @@ -20,5 +20,5 @@ set -e source /opt/rh/devtoolset-11/enable source /opt/rh/rh-git227/enable export NUM_THREADS=4 -./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \ +./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=ON --build_tests=OFF --build_benchmarks=OFF \ --build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON diff --git a/ep/build-velox/src/modify_arrow.patch b/ep/build-velox/src/modify_arrow.patch index 7d4d8e557b58..0aa2dd683e0d 100644 --- a/ep/build-velox/src/modify_arrow.patch +++ b/ep/build-velox/src/modify_arrow.patch @@ -11,7 +11,7 @@ index d56f6a36d..9b4088df9 100644 + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc) endif() endif() - + @@ -823,9 +822,6 @@ if(ARROW_WITH_OPENTELEMETRY) opentelemetry-cpp::ostream_span_exporter opentelemetry-cpp::otlp_http_exporter) @@ -21,11 +21,11 @@ index d56f6a36d..9b4088df9 100644 - endif() list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) endif() - + @@ -860,6 +856,14 @@ if(ARROW_USE_XSIMD) list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD}) endif() - + +# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY) +# because they depend on Protobuf. +if(ARROW_WITH_PROTOBUF) @@ -69,11 +69,36 @@ index a24f272fe..e25f78c85 100644 #include #include #include +diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt +index e9df0c8d7..b92002c75 100644 +--- a/cpp/thirdparty/versions.txt ++++ b/cpp/thirdparty/versions.txt +@@ -107,8 +107,8 @@ ARROW_SUBSTRAIT_BUILD_VERSION=v0.27.0 + ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=4ed375f69d972a57fdc5ec406c17003a111831d8640d3f1733eccd4b3ff45628 + ARROW_S2N_TLS_BUILD_VERSION=v1.3.35 + ARROW_S2N_TLS_BUILD_SHA256_CHECKSUM=9d32b26e6bfcc058d98248bf8fc231537e347395dd89cf62bb432b55c5da990d +-ARROW_THRIFT_BUILD_VERSION=0.16.0 +-ARROW_THRIFT_BUILD_SHA256_CHECKSUM=f460b5c1ca30d8918ff95ea3eb6291b3951cf518553566088f3f2be8981f6209 ++ARROW_THRIFT_BUILD_VERSION=0.21.0 ++ARROW_THRIFT_BUILD_SHA256_CHECKSUM=9a24f3eba9a4ca493602226c16d8c228037db3b9291c6fc4019bfe3bd39fc67c + ARROW_UCX_BUILD_VERSION=1.12.1 + ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 + ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc -index d2d976677..d7dd01ecd 100644 +index d2d976677..8d7dafd84 100644 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ b/java/dataset/src/main/cpp/jni_wrapper.cc -@@ -126,20 +126,14 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener { +@@ -27,7 +27,9 @@ + #include "arrow/dataset/file_base.h" + #include "arrow/filesystem/localfs.h" + #include "arrow/filesystem/path_util.h" ++#ifdef ARROW_S3 + #include "arrow/filesystem/s3fs.h" ++#endif + #include "arrow/engine/substrait/util.h" + #include "arrow/engine/substrait/serde.h" + #include "arrow/engine/substrait/relation.h" +@@ -126,20 +128,14 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener { : vm_(vm), java_reservation_listener_(java_reservation_listener) {} arrow::Status OnReservation(int64_t size) override { @@ -96,6 +121,16 @@ index d2d976677..d7dd01ecd 100644 env->CallObjectMethod(java_reservation_listener_, unreserve_memory_method, size); RETURN_NOT_OK(arrow::dataset::jni::CheckException(env)); return arrow::Status::OK(); +@@ -622,7 +618,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe + JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( + JNIEnv* env, jobject) { + JNI_METHOD_START ++#ifdef ARROW_S3 + JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); ++#endif + JNI_METHOD_END() + } + diff --git a/java/pom.xml b/java/pom.xml index a8328576b..57f282c6c 100644 --- a/java/pom.xml @@ -110,26 +145,3 @@ index a8328576b..57f282c6c 100644 -DARROW_SUBSTRAIT=${ARROW_DATASET} -DARROW_USE_CCACHE=ON -DCMAKE_BUILD_TYPE=Release -diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc -index d2d976677..eb4b6d1d2 100644 ---- a/java/dataset/src/main/cpp/jni_wrapper.cc -+++ b/java/dataset/src/main/cpp/jni_wrapper.cc -@@ -27,7 +27,9 @@ - #include "arrow/dataset/file_base.h" - #include "arrow/filesystem/localfs.h" - #include "arrow/filesystem/path_util.h" -+#ifdef ARROW_S3 - #include "arrow/filesystem/s3fs.h" -+#endif - #include "arrow/engine/substrait/util.h" - #include "arrow/engine/substrait/serde.h" - #include "arrow/engine/substrait/relation.h" -@@ -622,7 +624,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe - JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( - JNIEnv* env, jobject) { - JNI_METHOD_START -+#ifdef ARROW_S3 - JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); -+#endif - JNI_METHOD_END() - } From 66e376025a35e1f31aa976292cad921eb7b22f1b Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Mon, 9 Feb 2026 10:12:38 +0000 Subject: [PATCH 3/3] Integrate --- cpp/CMake/FindThrift.cmake | 167 ------------------------------------- cpp/velox/CMakeLists.txt | 6 ++ dev/build-arrow.sh | 8 +- dev/vcpkg/vcpkg.json | 8 +- 4 files changed, 13 insertions(+), 176 deletions(-) delete mode 100644 cpp/CMake/FindThrift.cmake diff --git a/cpp/CMake/FindThrift.cmake b/cpp/CMake/FindThrift.cmake deleted file mode 100644 index 273500a6ae36..000000000000 --- a/cpp/CMake/FindThrift.cmake +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2012 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# * Find Thrift (a cross platform RPC lib/tool) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# Thrift_ROOT - When set, this path is inspected instead of standard library -# locations as the root of the Thrift installation. The environment variable -# THRIFT_HOME overrides this variable. -# -# This module defines Thrift_FOUND, whether Thrift is found or not -# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not -# -# thrift::thrift, a library target to use Thrift thrift::compiler, a executable -# target to use Thrift compiler - -function(EXTRACT_THRIFT_VERSION) - if(THRIFT_INCLUDE_DIR) - file(READ "${THRIFT_INCLUDE_DIR}/thrift/config.h" THRIFT_CONFIG_H_CONTENT) - string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" - THRIFT_VERSION_DEFINITION "${THRIFT_CONFIG_H_CONTENT}") - string(REGEX MATCH "[0-9.]+" Thrift_VERSION "${THRIFT_VERSION_DEFINITION}") - set(Thrift_VERSION - "${Thrift_VERSION}" - PARENT_SCOPE) - else() - set(Thrift_VERSION - "" - PARENT_SCOPE) - endif() -endfunction(EXTRACT_THRIFT_VERSION) - -if(MSVC_TOOLCHAIN AND NOT DEFINED THRIFT_MSVC_LIB_SUFFIX) - if(NOT ARROW_THRIFT_USE_SHARED) - if(ARROW_USE_STATIC_CRT) - if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(THRIFT_MSVC_LIB_SUFFIX "mtd") - else() - set(THRIFT_MSVC_LIB_SUFFIX "mt") - endif() - else() - if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(THRIFT_MSVC_LIB_SUFFIX "mdd") - else() - set(THRIFT_MSVC_LIB_SUFFIX "md") - endif() - endif() - endif() -endif() -set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}") - -if(ARROW_THRIFT_USE_SHARED) - set(THRIFT_LIB_NAMES thrift) - if(CMAKE_IMPORT_LIBRARY_SUFFIX) - list( - APPEND - THRIFT_LIB_NAMES - "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" - ) - endif() - list( - APPEND - THRIFT_LIB_NAMES - "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) -else() - set(THRIFT_LIB_NAMES - "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) -endif() - -if(Thrift_ROOT) - find_library( - THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") - find_path( - THRIFT_INCLUDE_DIR thrift/Thrift.h - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "include") - find_program( - THRIFT_COMPILER thrift - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "bin") - extract_thrift_version() -else() - # THRIFT-4760: The pkgconfig files are currently only installed when using - # autotools. Starting with 0.13, they are also installed for the CMake-based - # installations of Thrift. - find_package(PkgConfig QUIET) - pkg_check_modules(THRIFT_PC thrift) - if(THRIFT_PC_FOUND) - set(THRIFT_INCLUDE_DIR "${THRIFT_PC_INCLUDEDIR}") - - list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}") - - find_library( - THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATHS ${THRIFT_PC_LIBRARY_DIRS} - NO_DEFAULT_PATH) - find_program( - THRIFT_COMPILER thrift - HINTS ${THRIFT_PC_PREFIX} - NO_DEFAULT_PATH - PATH_SUFFIXES "bin") - set(Thrift_VERSION ${THRIFT_PC_VERSION}) - else() - find_library( - THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") - find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include") - find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin") - extract_thrift_version() - endif() -endif() - -if(THRIFT_COMPILER) - set(Thrift_COMPILER_FOUND TRUE) -else() - set(Thrift_COMPILER_FOUND FALSE) -endif() - -find_package_handle_standard_args( - Thrift - REQUIRED_VARS THRIFT_LIB THRIFT_INCLUDE_DIR - VERSION_VAR Thrift_VERSION - HANDLE_COMPONENTS) - -if(Thrift_FOUND) - if(ARROW_THRIFT_USE_SHARED) - add_library(thrift::thrift SHARED IMPORTED) - else() - add_library(thrift::thrift STATIC IMPORTED) - endif() - set_target_properties( - thrift::thrift - PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}" INTERFACE_INCLUDE_DIRECTORIES - "${THRIFT_INCLUDE_DIR}") - if(WIN32 AND NOT MSVC_TOOLCHAIN) - # We don't need this for Visual C++ because Thrift uses "#pragma - # comment(lib, "Ws2_32.lib")" in thrift/windows/config.h for Visual C++. - set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES - "ws2_32") - endif() - - if(Thrift_COMPILER_FOUND) - add_executable(thrift::compiler IMPORTED) - set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION - "${THRIFT_COMPILER}") - endif() -endif() diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index 56fab701ee07..8f5e8bce15fd 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -450,6 +450,12 @@ if(ENABLE_GPU) facebook::velox::velox_cudf_expression cudf::cudf velox_curl) endif() +import_library(thriftprotocol ${VELOX_HOME}/deps-download/fbthrift/_build/lib/libthriftprotocol.a) +import_library(thriftcpp2 ${VELOX_HOME}/deps-download/fbthrift/_build/lib/libthriftcpp2.a) +import_library(thriftmetadata ${VELOX_HOME}/deps-download/fbthrift/_build/lib/libthriftmetadata.a) +import_library(facebook::velox::dwio::parquet::thrift::raw ${VELOX_BUILD_PATH}/velox/dwio/parquet/thrift/libvelox_dwio_parquet_thrift_raw.a) +target_link_libraries(velox PRIVATE thriftprotocol thriftcpp2 thriftmetadata facebook::velox::dwio::parquet::thrift::raw) + add_custom_command( TARGET velox POST_BUILD diff --git a/dev/build-arrow.sh b/dev/build-arrow.sh index 41bebc7c4c27..540eb867dad4 100755 --- a/dev/build-arrow.sh +++ b/dev/build-arrow.sh @@ -53,8 +53,7 @@ function build_arrow_cpp() { -DARROW_FILESYSTEM=ON \ -DARROW_PROTOBUF_USE_SHARED=OFF \ -DARROW_DEPENDENCY_USE_SHARED=OFF \ - -DARROW_DEPENDENCY_SOURCE=BUNDLED \ - -DARROW_WITH_THRIFT=ON \ + -DARROW_WITH_THRIFT=OFF \ -DARROW_WITH_LZ4=ON \ -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ @@ -68,11 +67,6 @@ function build_arrow_cpp() { -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_STATIC=ON - - # Install thrift. - cd _build/thrift_ep-prefix/src/thrift_ep-build - ${SUDO} cmake --install ./ --prefix "${INSTALL_PREFIX}"/ - popd } function build_arrow_java() { diff --git a/dev/vcpkg/vcpkg.json b/dev/vcpkg/vcpkg.json index aba59968f25a..68543fee63f6 100644 --- a/dev/vcpkg/vcpkg.json +++ b/dev/vcpkg/vcpkg.json @@ -48,7 +48,10 @@ "protobuf", "benchmark", "icu", - "thrift", + "fizz", + "wangle", + "mvfst", + { "name": "fbthrift", "features": ["cpp2"]}, "libstemmer", "geos", "grpc" @@ -114,6 +117,7 @@ { "name": "glog", "version": "0.6.0"}, { "name": "gflags", "version": "2.2.2"}, { "name": "azure-identity-cpp", "version": "1.11.0"}, - { "name": "grpc", "version": "1.51.1"} + { "name": "grpc", "version": "1.51.1"}, + { "name": "fbthrift", "version-date": "2026.01.05.00"} ] }