Skip to content

perf: idempotent_dir for dataset generation #28284

perf: idempotent_dir for dataset generation

perf: idempotent_dir for dataset generation #28284

Workflow file for this run

name: Linters and Tests
# Concurrency control:
# - PRs: new commits on a feature branch will cancel in-progress (outdated) runs.
# - Push to develop: runs queue sequentially, never cancelled.
# - `workflow_dispatch`: groups by branch and queues if run on develop.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/develop' }}
on:
push:
branches: [develop]
pull_request: { }
workflow_dispatch: { }
permissions:
actions: read
contents: read
checks: write # audit-check creates checks
issues: write # audit-check creates issues
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
NIGHTLY_TOOLCHAIN: nightly-2026-02-05
jobs:
lint-toml:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v6
- uses: spiraldb/actions/.github/actions/lint-toml@0.18.5
validate-workflow-yaml:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v6
- name: Validate YAML file
run: |
# Lint the workflows and yamllint's configuration file.
yamllint \
--strict \
-c .yamllint.yaml \
.github/
python-lint:
name: "Python (lint)"
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=python-lint', github.run_id)
|| 'ubuntu-latest' }}
timeout-minutes: 10
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
# Use uvx for ruff to avoid building the Rust extension (saves ~4.5 min)
- name: Python Lint - Format
run: uvx ruff format --check .
- name: Python Lint - Ruff
run: uvx ruff check .
# PyRight needs the project for type information, so use uv run
- name: Python Lint - PyRight
env:
MATURIN_PEP517_ARGS: "--profile ci"
run: |
uv sync --all-packages
uv run basedpyright vortex-python
python-test:
name: "Python (test)"
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-large/image=ubuntu24-full-x64-pre-v2/tag=python-test', github.run_id)
|| 'ubuntu-latest' }}
timeout-minutes: 30
env:
RUST_LOG: "info,maturin=off,uv=debug"
MATURIN_PEP517_ARGS: "--profile ci"
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Pytest - Vortex
run: |
uv run --all-packages pytest --benchmark-disable -n auto test/
working-directory: vortex-python/
- name: Setup benchmark environment
run: sudo bash scripts/setup-benchmark.sh
- name: Pytest Benchmarks - Vortex
run: |
bash ../scripts/bench-taskset.sh uv run --all-packages pytest --benchmark-only benchmark/
working-directory: vortex-python/
- name: Doctest - PyVortex
run: |
uv run --all-packages make doctest
working-directory: docs/
- name: Ensure docs build - PyVortex
run: |
uv run --all-packages make html
working-directory: docs/
rust-docs:
name: "Rust (docs)"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-small/image=ubuntu24-full-x64-pre-v2/tag=rust-docs', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Docs
run: |
RUSTDOCFLAGS="-D warnings" cargo doc --profile ci --no-deps
# nextest doesn't support doc tests, so we run it here
cargo test --profile ci --doc --workspace --all-features --exclude vortex-cxx --exclude vortex-jni --exclude vortex-ffi --exclude xtask --no-fail-fast
build-rust:
name: "Rust build (${{matrix.config.name}})"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner={1}/image=ubuntu24-full-x64-pre-v2/tag={2}', github.run_id, matrix.config.runner, matrix.config.name)
|| 'ubuntu-latest' }}
env:
# disable lints for build, they will be caught in Rust lint job.
RUSTFLAGS: "-A warnings"
strategy:
fail-fast: false
matrix:
config:
- name: "all-features"
runner: amd64-large
args: "--all-features --all-targets"
- name: "default features"
runner: amd64-large
args: "--all-targets"
- name: "with tokio dispatcher"
runner: amd64-small
# Only build the crates that have the tokio features, not re-building other crates with no-default-features
args: "--no-default-features --features tokio --all-targets -p vortex -p vortex-io -p vortex-file -p vortex-layout"
- name: "wasm32 with default features"
runner: amd64-medium
target: wasm32-unknown-unknown
env:
rustflags: "RUSTFLAGS='-A warnings --cfg getrandom_backend=\"unsupported\"'"
args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda --exclude vortex-sqllogictest"
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Install wasm32 target
if: ${{ matrix.config.target == 'wasm32-unknown-unknown' }}
run: rustup target add wasm32-unknown-unknown
- uses: ./.github/actions/check-rebuild
with:
command: "${{matrix.config.env.rustflags}} cargo hack build --profile ci --locked ${{matrix.config.args}} --ignore-private"
- name: "Make sure no files changed after build"
run: |
git status --porcelain
test -z "$(git status --porcelain)"
check-min-deps:
name: "Check build with minimal dependencies"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=rust-min-deps', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- run: cargo minimal-versions check --direct --workspace --ignore-private
rust-lint:
name: "Rust (lint)"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-large/image=ubuntu24-full-x64-pre-v2/tag=rust-lint', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Install nightly for fmt
run: rustup toolchain install $NIGHTLY_TOOLCHAIN --component rustfmt
- name: Rust Lint - Format
run: cargo +$NIGHTLY_TOOLCHAIN fmt --all --check
- name: Rustc check
run: RUSTFLAGS="-D warnings" cargo check --profile ci --locked --all-features --all-targets
- name: Rustc check (release)
run: RUSTFLAGS="-D warnings" cargo check --locked --all-features --all-targets --release
- name: Rust Lint - Clippy All Features
run: cargo clippy --profile ci --locked --all-features --all-targets -- -D warnings
- name: Rust Lint - Clippy Default Features
run: cargo clippy --profile ci --locked --all-targets -- -D warnings
cpp-lint:
name: "C/C++ (lint)"
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v6
- name: C/C++ Lint - clang-format
run: |
git ls-files vortex-cuda vortex-cxx vortex-duckdb vortex-ffi \
| grep -E '\.(cpp|hpp|cu|cuh|h)$' \
| grep -v 'kernels/src/bit_unpack_.*\.cu$' \
| grep -v 'kernels/src/bit_unpack_.*_lanes\.cuh$' \
| xargs clang-format --dry-run --Werror --style=file
rust-lint-no-default:
name: "Rust (lint, no default)"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=rust-lint-no-default', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Rust Lint - Clippy No Default Features
shell: bash
run: |
cargo hack --no-dev-deps --ignore-private clippy --profile ci --no-default-features -- -D warnings
public-api:
name: "Public API lock files"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-xsmall/image=ubuntu24-full-x64-pre-v2/tag=public-api', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Install nightly for public-api
run: rustup toolchain install $NIGHTLY_TOOLCHAIN
- name: Regenerate public API lock files
run: cargo +$NIGHTLY_TOOLCHAIN run --profile ci -p xtask -- public-api
- name: Verify lock files are up to date
run: |
if ! git diff --quiet '**/public-api.lock'; then
git diff -U0 '**/public-api.lock'
echo ""
echo ""
echo "Changed files:"
git diff --name-only '**/public-api.lock'
echo ""
echo ""
echo "Public API lock files are out of date."
echo "Run './scripts/public-api.sh' locally to"
echo "regenerate them, then commit the changes."
exit 1
fi
rust-test-other:
name: "Rust tests (${{ matrix.os }})"
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
include:
- os: windows-x64
runner: runs-on=${{ github.run_id }}/pool=windows-x64-pre
fallback_runner: windows-latest
- os: linux-arm64
runner: runs-on=${{ github.run_id }}/runner=arm64-medium/image=ubuntu24-full-arm64-pre-v2/tag=rust-test-linux-arm64
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& matrix.runner
|| matrix.fallback_runner }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- name: Setup (Windows)
if: matrix.os == 'windows-x64'
run: |
echo "C:\rust\cargo\bin" >> $env:GITHUB_PATH
- uses: ./.github/actions/setup-prebuild
- name: Rust Tests (Windows)
if: matrix.os == 'windows-x64'
run: |
cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast `
--exclude vortex-bench --exclude vortex-python --exclude vortex-duckdb `
--exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-nvcomp `
--exclude vortex-cub --exclude vortex-test-e2e-cuda --exclude duckdb-bench `
--exclude lance-bench --exclude datafusion-bench --exclude random-access-bench `
--exclude compress-bench --exclude xtask --exclude vortex-datafusion `
--exclude gpu-scan-cli --exclude vortex-sqllogictest
- name: Rust Tests (Other)
if: matrix.os != 'windows-x64'
run: |
cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast --exclude vortex-bench --exclude xtask --exclude vortex-sqllogictest
- uses: ./.github/actions/check-rebuild
if: matrix.os != 'windows-x64'
with:
command: "cargo test --profile ci --locked --workspace --all-features --no-run --exclude vortex-bench --exclude xtask --exclude vortex-sqllogictest"
- name: Alert incident.io
if: failure() && github.event_name == 'push' && github.ref == 'refs/heads/develop'
uses: ./.github/actions/alert-incident-io
with:
api-key: ${{ secrets.INCIDENT_IO_ALERT_TOKEN }}
alert-title: "Rust tests (${{ matrix.os }}) failed on develop"
deduplication-key: ci-rust-test-${{ matrix.os }}-failure
build-java:
name: "Java"
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/pool=amd64-medium-pre-v2/tag=java', github.run_id)
|| 'ubuntu-latest' }}
timeout-minutes: 30
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- run: ./gradlew test --parallel
working-directory: ./java
license-check-and-audit-check:
name: License Check and Audit Check
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
checks:
- advisories
- bans licenses sources
# Prevent sudden announcement of a new advisory from failing ci:
continue-on-error: ${{ matrix.checks == 'advisories' }}
steps:
- uses: actions/checkout@v6
- uses: EmbarkStudios/cargo-deny-action@v2
with:
command: check ${{ matrix.checks }}
cxx-test:
name: "C++ build"
timeout-minutes: 30
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=cxx-build', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Build and run C++ unit tests
run: |
mkdir -p vortex-cxx/build
cmake -S vortex-cxx -B vortex-cxx/build -DVORTEX_ENABLE_TESTING=ON -DVORTEX_ENABLE_ASAN=ON
cmake --build vortex-cxx/build --parallel $(nproc)
ctest --test-dir vortex-cxx/build -j $(nproc) -V
- name: Build and run the example in release mode
run: |
cmake -S vortex-cxx/examples -B vortex-cxx/examples/build -DCMAKE_BUILD_TYPE=Release
cmake --build vortex-cxx/examples/build --parallel $(nproc)
vortex-cxx/examples/build/hello-vortex vortex-cxx/examples/goldenfiles/example.vortex
- uses: ./.github/actions/check-rebuild
with:
command: "cargo build --profile ci --locked -p vortex-cxx --lib"
sqllogic-test:
name: "SQL logic tests"
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=sql-logic-test', github.run_id)
|| 'ubuntu-latest' }}
timeout-minutes: 30
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Run sqllogictest tests
run: |
./vortex-sqllogictest/slt/tpch/generate_data.sh
cargo test --profile ci -p vortex-sqllogictest --test sqllogictests
- uses: ./.github/actions/check-rebuild
with:
command: "cargo test --profile ci -p vortex-sqllogictest --test sqllogictests --no-run"
wasm-integration:
name: "WASM integration smoke test"
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
targets: "wasm32-wasip1"
- name: Setup Wasmer
shell: bash
run: |
curl https://get.wasmer.io -sSfL | sh
echo "$HOME/.wasmer/bin" >> $GITHUB_PATH
- run: cargo build --profile ci --target wasm32-wasip1
working-directory: ./wasm-test
- run: wasmer run ./target/wasm32-wasip1/ci/wasm-test.wasm
working-directory: ./wasm-test
generated-files:
name: "Check generated source files are up to date"
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=generated-files', github.run_id)
|| 'ubuntu-latest' }}
timeout-minutes: 30
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: Install nightly for cbindgen macro expansion
run: rustup toolchain install $NIGHTLY_TOOLCHAIN
- name: "regenerate all .fbs/.proto Rust code"
run: |
cargo run --profile ci -p xtask -- generate-fbs
cargo run --profile ci -p xtask -- generate-proto
- name: "regenerate FFI header file"
run: |
cargo +$NIGHTLY_TOOLCHAIN build --profile ci -p vortex-ffi
- name: "Make sure no files changed after regenerating"
run: |
git status --porcelain
test -z "$(git status --porcelain)"
- name: "Checkout develop flatbuffers"
working-directory: vortex-flatbuffers/
run: |
cp -R flatbuffers flatbuffers.HEAD
git fetch origin develop --depth 1
git checkout origin/develop -- flatbuffers
- name: "Verify flatbuffer back-compat"
working-directory: vortex-flatbuffers/
run: |
find flatbuffers/ -type f -name "*.fbs" | sed 's/^flatbuffers\///' | xargs -I{} -n1 flatc -I flatbuffers.HEAD --conform-includes flatbuffers --conform flatbuffers/{} flatbuffers.HEAD/{}
ffi-c-test:
name: "C API test build"
timeout-minutes: 10
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=cxx-build', github.run_id)
|| 'ubuntu-latest' }}
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-prebuild
- name: "regenerate FFI header file"
run: |
cargo +$NIGHTLY_TOOLCHAIN build --profile ci -p vortex-ffi
- name: Build and run C++ unit tests
run: |
cd vortex-ffi
mkdir build
cmake -Bbuild -DRUST_BUILD_PROFILE=ci
cmake --build build -j $(nproc)
ctest --test-dir build -j $(nproc)