From f80300a93b59ef87b018fcfd41dcf17fe5420935 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Tue, 10 Feb 2026 08:05:59 +0000 Subject: [PATCH 1/5] Add incremental C++ build script with auto-detection Add dev/inc-build-cpp.sh for fast incremental C++ builds with auto-detection: Performance improvements: - Switch from Make to Ninja: 4m53s -> 0.1s no-op (1400x faster) - Skip redundant cmake configure steps (use 'cmake --build' directly) - Default skip vcpkg check (use --update_vcpkg when needed) - Preserve vcpkg timestamps to avoid unnecessary relinking - Result: 3m14s -> 0.27s total no-op time Auto-detection: - BUILD_TYPE from velox _build/debug or _build/release directory - SCALA_VERSION from backends-velox/target/scala-X.XX directory - Vcpkg options (BUILD_TESTS, ENABLE_S3/GCS/ABFS) from CMakeCache.txt - Validate CMakeCache.txt exists (required for incremental build) Helper functions extracted to build-helper-functions.sh: - initialize_num_threads, get_platform, get_maven_arch - log_build_step, validate_directory - setup_vcpkg_environment, copy_gluten_libraries Usage: ./dev/inc-build-cpp.sh [--build_type=Debug|Release] [--scala_version=2.13] [--update_vcpkg] --- dev/builddeps-veloxbe.sh | 4 +- dev/inc-build-cpp.sh | 196 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 2 deletions(-) create mode 100755 dev/inc-build-cpp.sh diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 52cace5404e1..0cd52f3ff97e 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -258,8 +258,8 @@ function build_gluten_cpp { GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_PREFIX_PATH=$INSTALL_PREFIX" fi - cmake $GLUTEN_CMAKE_OPTIONS .. - make -j $NUM_THREADS + cmake -G Ninja $GLUTEN_CMAKE_OPTIONS .. + ninja -j $NUM_THREADS } function build_velox_backend { diff --git a/dev/inc-build-cpp.sh b/dev/inc-build-cpp.sh new file mode 100755 index 000000000000..b7e77a5551a9 --- /dev/null +++ b/dev/inc-build-cpp.sh @@ -0,0 +1,196 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Incremental C++ build for Gluten + Velox (no source reset, no Maven, no OS setup) +# Usage: ./dev/inc-build-cpp.sh [--build_type=Debug|Release] [--update_vcpkg] + +set -eu + +CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) +GLUTEN_DIR="$CURRENT_DIR/.." + +# Helper functions +num_threads() { + [[ -n "${NUM_THREADS:-}" ]] && echo "$NUM_THREADS" && return + if [[ "$(uname)" == "Darwin" ]]; then + local cores=$(sysctl -n hw.physicalcpu) + echo $(( cores > 2 ? cores - 2 : cores )) + else + echo $(nproc --ignore=2) + fi +} + +platform() { + [[ "$(uname -s)" == "Linux" ]] && echo "linux" || echo "darwin" +} + +arch() { + case "$(uname -m)" in + x86_64|amd64) echo "amd64" ;; + *) echo "aarch64" ;; + esac +} + +step() { echo -e "\n[Step $1/4] $2..."; } + +extract_opt() { + grep "^$1:" "$GLUTEN_BUILD_DIR/CMakeCache.txt" 2>/dev/null | \ + cut -d= -f2 || echo "${2:-ON}" +} + +# Defaults +VELOX_HOME="$GLUTEN_DIR/ep/build-velox/build/velox_ep" +UPDATE_VCPKG=OFF +NUM_THREADS=${NUM_THREADS:-$(num_threads)} + +# Parse arguments +for arg in "$@"; do + case $arg in + --build_type=*) BUILD_TYPE="${arg#*=}" ;; + --scala_version=*) SCALA_VERSION="${arg#*=}" ;; + --velox_home=*) VELOX_HOME="${arg#*=}" ;; + --num_threads=*) NUM_THREADS="${arg#*=}" ;; + --update_vcpkg) UPDATE_VCPKG=ON ;; + *) + echo "Unknown: $arg" + echo "Usage: $0 [--build_type=Debug|Release] [--update_vcpkg]" + exit 1 + ;; + esac +done + +# Auto-detect BUILD_TYPE +if [ -z "${BUILD_TYPE:-}" ]; then + [ -d "$VELOX_HOME/_build/debug" ] && BUILD_TYPE=Debug || BUILD_TYPE=Release +fi + +# Auto-detect SCALA_VERSION +if [ -z "${SCALA_VERSION:-}" ]; then + if [ -d "$GLUTEN_DIR/backends-velox/target/scala-2.13" ]; then + SCALA_VERSION=2.13 + else + SCALA_VERSION=2.12 + fi +fi + +# Paths +VELOX_BUILD_DIR_NAME=$([[ "$BUILD_TYPE" =~ ^[Dd]ebug$ ]] && echo 'debug' || echo 'release') +VELOX_BUILD_DIR="$VELOX_HOME/_build/$VELOX_BUILD_DIR_NAME" +GLUTEN_BUILD_DIR="$GLUTEN_DIR/cpp/build" + +# Validate directories +for dir in "$VELOX_HOME" "$VELOX_BUILD_DIR" "$GLUTEN_BUILD_DIR"; do + if [ ! -d "$dir" ]; then + echo "ERROR: Missing $dir" + echo "Run: ./dev/package-vcpkg.sh --build_type=$BUILD_TYPE" + exit 1 + fi +done + +# Validate CMake cache +for cache in "$VELOX_BUILD_DIR/CMakeCache.txt" "$GLUTEN_BUILD_DIR/CMakeCache.txt"; do + if [ ! -f "$cache" ]; then + echo "ERROR: Missing $cache. Run full build first" + exit 1 + fi +done + +echo "============================================" +echo "Gluten C++ Incremental Build" +echo "BUILD_TYPE: $BUILD_TYPE | THREADS: $NUM_THREADS" +echo "============================================" + +cd "$GLUTEN_DIR" + +# Step 1: vcpkg +BUILD_OPTIONS="--build_tests=$(extract_opt BUILD_TESTS)" +BUILD_OPTIONS="$BUILD_OPTIONS --enable_s3=$(extract_opt ENABLE_S3)" +BUILD_OPTIONS="$BUILD_OPTIONS --enable_gcs=$(extract_opt ENABLE_GCS)" +BUILD_OPTIONS="$BUILD_OPTIONS --enable_abfs=$(extract_opt ENABLE_ABFS OFF)" + +if [ "$UPDATE_VCPKG" = "ON" ]; then + step 1 "Checking vcpkg dependencies" + VCPKG_DIR="$GLUTEN_DIR/dev/vcpkg/vcpkg_installed" + TS_FILE=$(mktemp) + + # Save library timestamps before vcpkg install + if [ -d "$VCPKG_DIR" ]; then + find "$VCPKG_DIR" \( -name "*.a" -o -name "*.so" \) \ + -exec stat -c "%Y %n" {} \; > "$TS_FILE" 2>/dev/null || true + fi + + source "$GLUTEN_DIR/dev/vcpkg/env.sh" $BUILD_OPTIONS + + # Restore timestamps for unchanged files + if [ -s "$TS_FILE" ]; then + while IFS=' ' read -r old_ts file; do + if [ -f "$file" ]; then + new_ts=$(stat -c "%Y" "$file" 2>/dev/null || echo "") + if [ -n "$new_ts" ] && [ "$new_ts" != "$old_ts" ]; then + touch -d "@$old_ts" "$file" 2>/dev/null || true + fi + fi + done < "$TS_FILE" + fi + rm -f "$TS_FILE" + echo "[Step 1/4] vcpkg dependencies OK." +else + step 1 "Skipping vcpkg check (use --update_vcpkg to update)" + VCPKG_ROOT="$GLUTEN_DIR/dev/vcpkg/.vcpkg" + export VCPKG_ROOT + + if [ "${CPU_TARGET:-}" = "aarch64" ]; then + export VCPKG_TRIPLET="arm64-linux-neon" + else + export VCPKG_TRIPLET="x64-linux-avx" + fi + + export VCPKG_TRIPLET_INSTALL_DIR="$GLUTEN_DIR/dev/vcpkg/vcpkg_installed/${VCPKG_TRIPLET}" + export CMAKE_TOOLCHAIN_FILE="$GLUTEN_DIR/dev/vcpkg/toolchain.cmake" + export PKG_CONFIG_PATH="${VCPKG_TRIPLET_INSTALL_DIR}/lib/pkgconfig:${PKG_CONFIG_PATH:-}" + export GLUTEN_VCPKG_ENABLED="${VCPKG_ROOT}" +fi + +# Step 2: Build Velox +step 2 "Building Velox (incremental)" +export simdjson_SOURCE=AUTO Arrow_SOURCE=AUTO +cmake --build "$VELOX_BUILD_DIR" -j $NUM_THREADS +echo "[Step 2/4] Velox build complete." + +# Step 3: Build Gluten C++ +step 3 "Building Gluten C++ (incremental)" +cmake --build "$GLUTEN_BUILD_DIR" -j $NUM_THREADS +echo "[Step 3/4] Gluten C++ build complete." + +# Step 4: Copy libraries +step 4 "Copying shared libraries to backends-velox target" +CPP_RELEASES="$GLUTEN_DIR/cpp/build/releases" +if [ ! -d "$CPP_RELEASES" ]; then + echo "ERROR: CPP releases directory not found. Build may have failed." + exit 1 +fi + +TARGET_DIR="$GLUTEN_DIR/backends-velox/target/scala-${SCALA_VERSION}" +TARGET_DIR="$TARGET_DIR/classes/$(platform)/$(arch)" +mkdir -p "$TARGET_DIR" +cp -u -v "$CPP_RELEASES"/lib*.so "$TARGET_DIR/" + +echo -e "\n============================================" +echo "Build complete! Libraries copied to:" +echo "$TARGET_DIR" +ls -lh "$TARGET_DIR"/lib*.so +echo "============================================" From bc1b25505391200ea9bcb9c8caa9d5cad0b47340 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Wed, 25 Feb 2026 03:32:58 +0000 Subject: [PATCH 2/5] refactor: remove unnecessary vcpkg timestamp preservation logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --update_vcpkg implies dependencies have changed, so preserving timestamps to avoid Ninja relinking was wrong — it would hide real library updates and produce incorrect incremental builds. Co-Authored-By: Claude Sonnet 4.6 --- dev/inc-build-cpp.sh | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/dev/inc-build-cpp.sh b/dev/inc-build-cpp.sh index b7e77a5551a9..a6c9ab9b33e1 100755 --- a/dev/inc-build-cpp.sh +++ b/dev/inc-build-cpp.sh @@ -123,31 +123,9 @@ BUILD_OPTIONS="$BUILD_OPTIONS --enable_gcs=$(extract_opt ENABLE_GCS)" BUILD_OPTIONS="$BUILD_OPTIONS --enable_abfs=$(extract_opt ENABLE_ABFS OFF)" if [ "$UPDATE_VCPKG" = "ON" ]; then - step 1 "Checking vcpkg dependencies" - VCPKG_DIR="$GLUTEN_DIR/dev/vcpkg/vcpkg_installed" - TS_FILE=$(mktemp) - - # Save library timestamps before vcpkg install - if [ -d "$VCPKG_DIR" ]; then - find "$VCPKG_DIR" \( -name "*.a" -o -name "*.so" \) \ - -exec stat -c "%Y %n" {} \; > "$TS_FILE" 2>/dev/null || true - fi - + step 1 "Updating vcpkg dependencies" source "$GLUTEN_DIR/dev/vcpkg/env.sh" $BUILD_OPTIONS - - # Restore timestamps for unchanged files - if [ -s "$TS_FILE" ]; then - while IFS=' ' read -r old_ts file; do - if [ -f "$file" ]; then - new_ts=$(stat -c "%Y" "$file" 2>/dev/null || echo "") - if [ -n "$new_ts" ] && [ "$new_ts" != "$old_ts" ]; then - touch -d "@$old_ts" "$file" 2>/dev/null || true - fi - fi - done < "$TS_FILE" - fi - rm -f "$TS_FILE" - echo "[Step 1/4] vcpkg dependencies OK." + echo "[Step 1/4] vcpkg dependencies updated." else step 1 "Skipping vcpkg check (use --update_vcpkg to update)" VCPKG_ROOT="$GLUTEN_DIR/dev/vcpkg/.vcpkg" From 76dd73e0b795578e60adb6fc9684527446045356 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Wed, 25 Feb 2026 03:44:48 +0000 Subject: [PATCH 3/5] Remove unnecessary simdjson_SOURCE/Arrow_SOURCE env exports from inc-build-cpp.sh These variables default to AUTO in Velox's CMakeLists.txt and are only read during cmake configure, not during cmake --build. The export had no effect on incremental builds. --- dev/inc-build-cpp.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/inc-build-cpp.sh b/dev/inc-build-cpp.sh index a6c9ab9b33e1..2db9a3215af9 100755 --- a/dev/inc-build-cpp.sh +++ b/dev/inc-build-cpp.sh @@ -145,7 +145,6 @@ fi # Step 2: Build Velox step 2 "Building Velox (incremental)" -export simdjson_SOURCE=AUTO Arrow_SOURCE=AUTO cmake --build "$VELOX_BUILD_DIR" -j $NUM_THREADS echo "[Step 2/4] Velox build complete." From 7a02e48b268515b5a7a2f323f6bbcbfea8861b3b Mon Sep 17 00:00:00 2001 From: Chang chen Date: Wed, 25 Feb 2026 03:49:10 +0000 Subject: [PATCH 4/5] Rename inc-build-cpp.sh to builddep-veloxbe-inc.sh Align naming with existing builddep-veloxbe.sh convention. --- dev/{inc-build-cpp.sh => builddep-veloxbe-inc.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dev/{inc-build-cpp.sh => builddep-veloxbe-inc.sh} (100%) diff --git a/dev/inc-build-cpp.sh b/dev/builddep-veloxbe-inc.sh similarity index 100% rename from dev/inc-build-cpp.sh rename to dev/builddep-veloxbe-inc.sh From b66f9f332cde891516b928c4061c6f4fdf3b5dc8 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Wed, 25 Feb 2026 05:12:04 +0000 Subject: [PATCH 5/5] Update comment: clarify vibe coding purpose and fix usage path --- dev/builddep-veloxbe-inc.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/builddep-veloxbe-inc.sh b/dev/builddep-veloxbe-inc.sh index 2db9a3215af9..9fafdf327718 100755 --- a/dev/builddep-veloxbe-inc.sh +++ b/dev/builddep-veloxbe-inc.sh @@ -16,7 +16,9 @@ # limitations under the License. # Incremental C++ build for Gluten + Velox (no source reset, no Maven, no OS setup) -# Usage: ./dev/inc-build-cpp.sh [--build_type=Debug|Release] [--update_vcpkg] +# Designed for vibe coding: lets AI agents iterate on C++ code without full rebuilds. +# Only applicable when vcpkg is enabled for static linking. +# Usage: ./dev/builddep-veloxbe-inc.sh [--build_type=Debug|Release] [--update_vcpkg] set -eu