From 86d39c4dac0fc020eb5573e7ca1bdcb32658e839 Mon Sep 17 00:00:00 2001 From: bearstonem Date: Wed, 17 Dec 2025 13:39:14 -0800 Subject: [PATCH 1/4] Fix installation issues in setup.sh - Add CUDA_HOME auto-detection with fallback to common paths - Fix linker errors by setting LIBRARY_PATH and LDFLAGS for libcuda.so - Convert git submodule URLs from SSH to HTTPS to avoid auth issues - Fix o-voxel dependencies to prevent reinstalling cumesh/flex_gemm - Add informative logging messages during installation - Update README with troubleshooting notes Fixes: - 'ld: cannot find -lcuda' linker errors during nvdiffrec/cumesh build - 'Permission denied (publickey)' errors when cloning CuMesh submodules - Dependency conflicts when installing o-voxel Co-Authored-By: Warp --- README.md | 6 +++++- setup.sh | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f55e315..18c2ace 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,11 @@ Data processing is streamlined for instant conversions that are fully **renderin **Before running the following command there are somethings to note:** - By adding `--new-env`, a new conda environment named `trellis2` will be created. If you want to use an existing conda environment, please remove this flag. - By default the `trellis2` environment will use pytorch 2.6.0 with CUDA 12.4. If you want to use a different version of CUDA, you can remove the `--new-env` flag and manually install the required dependencies. Refer to [PyTorch](https://pytorch.org/get-started/previous-versions/) for the installation command. - - If you have multiple CUDA Toolkit versions installed, `CUDA_HOME` should be set to the correct version before running the command. For example, if you have CUDA Toolkit 12.4 and 13.0 installed, you can run `export CUDA_HOME=/usr/local/cuda-12.4` before running the command. + - If you have multiple CUDA Toolkit versions installed, `CUDA_HOME` should be set to the correct version before running the command. For example, if you have CUDA Toolkit 12.4 and 13.0 installed, you can run `export CUDA_HOME=/usr/local/cuda-12.4` before running the command. The script will attempt to auto-detect CUDA if `CUDA_HOME` is not set. + - **IMPORTANT:** The script automatically configures CUDA library paths to fix common linker issues with `libcuda.so`. If you encounter linker errors, ensure that: + - `libcuda.so` exists in `/usr/lib/x86_64-linux-gnu/` or your system's library path + - Your CUDA toolkit is properly installed with development libraries + - The script automatically converts git submodule URLs from SSH to HTTPS to avoid authentication issues during installation. - By default, the code uses the `flash-attn` backend for attention. For GPUs do not support `flash-attn` (e.g., NVIDIA V100), you can install `xformers` manually and set the `ATTN_BACKEND` environment variable to `xformers` before running the code. See the [Minimal Example](#minimal-example) for more details. - The installation may take a while due to the large number of dependencies. Please be patient. If you encounter any issues, you can try to install the dependencies one by one, specifying one flag at a time. - If you encounter any issues during the installation, feel free to open an issue or contact us. diff --git a/setup.sh b/setup.sh index e09e2b1..d9f4011 100644 --- a/setup.sh +++ b/setup.sh @@ -66,6 +66,26 @@ else exit 1 fi +# Set CUDA_HOME if not already set (for CUDA platform) +if [ "$PLATFORM" = "cuda" ] ; then + if [ -z "$CUDA_HOME" ]; then + # Try to find CUDA installation + if [ -d "/usr/local/cuda-12.4" ]; then + export CUDA_HOME=/usr/local/cuda-12.4 + echo "[INFO] CUDA_HOME not set, using: $CUDA_HOME" + elif [ -d "/usr/local/cuda" ]; then + export CUDA_HOME=/usr/local/cuda + echo "[INFO] CUDA_HOME not set, using: $CUDA_HOME" + else + echo "[WARNING] CUDA_HOME not set and could not auto-detect CUDA installation." + echo "[WARNING] If compilation fails, please set CUDA_HOME manually before running this script." + echo "[WARNING] Example: export CUDA_HOME=/usr/local/cuda-12.4" + fi + else + echo "[INFO] Using CUDA_HOME: $CUDA_HOME" + fi +fi + if [ "$NEW_ENV" = true ] ; then conda create -n trellis2 python=3.10 conda activate trellis2 @@ -112,7 +132,11 @@ fi if [ "$NVDIFFREC" = true ] ; then if [ "$PLATFORM" = "cuda" ] ; then + echo "[NVDIFFREC] Setting CUDA library paths..." + export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH} + export LDFLAGS="-L/usr/lib/x86_64-linux-gnu -L${CUDA_HOME}/lib64/stubs" mkdir -p /tmp/extensions + rm -rf /tmp/extensions/nvdiffrec git clone -b renderutils https://github.com/JeffreyXiang/nvdiffrec.git /tmp/extensions/nvdiffrec pip install /tmp/extensions/nvdiffrec --no-build-isolation else @@ -121,8 +145,20 @@ if [ "$NVDIFFREC" = true ] ; then fi if [ "$CUMESH" = true ] ; then + echo "[CUMESH] Setting CUDA library paths..." + export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH} + export LDFLAGS="-L/usr/lib/x86_64-linux-gnu -L${CUDA_HOME}/lib64/stubs" mkdir -p /tmp/extensions - git clone https://github.com/JeffreyXiang/CuMesh.git /tmp/extensions/CuMesh --recursive + rm -rf /tmp/extensions/CuMesh + echo "[CUMESH] Cloning repository..." + git clone https://github.com/JeffreyXiang/CuMesh.git /tmp/extensions/CuMesh + cd /tmp/extensions/CuMesh + echo "[CUMESH] Fixing submodule URLs to use HTTPS..." + git config --file=.gitmodules submodule.third_party/cubvh.url https://github.com/JeffreyXiang/cubvh.git + git submodule sync + echo "[CUMESH] Initializing submodules..." + git submodule update --init --recursive + cd $WORKDIR pip install /tmp/extensions/CuMesh --no-build-isolation fi @@ -133,7 +169,16 @@ if [ "$FLEXGEMM" = true ] ; then fi if [ "$OVOXEL" = true ] ; then + echo "[O-VOXEL] Setting CUDA library paths..." + export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH} + export LDFLAGS="-L/usr/lib/x86_64-linux-gnu -L${CUDA_HOME}/lib64/stubs" mkdir -p /tmp/extensions + rm -rf /tmp/extensions/o-voxel cp -r o-voxel /tmp/extensions/o-voxel + echo "[O-VOXEL] Fixing dependencies to avoid git reinstall..." + cd /tmp/extensions/o-voxel + sed -i 's|cumesh @ git+https://github.com/JeffreyXiang/CuMesh.git|"cumesh"|' pyproject.toml + sed -i 's|flex_gemm @ git+https://github.com/JeffreyXiang/FlexGEMM.git|"flex_gemm"|' pyproject.toml + cd $WORKDIR pip install /tmp/extensions/o-voxel --no-build-isolation fi From c48a32e2fc7b56c024b02864494f3032cf4a1f4d Mon Sep 17 00:00:00 2001 From: bearstonem Date: Wed, 17 Dec 2025 16:21:58 -0800 Subject: [PATCH 2/4] Add --no-build-isolation flag to flash-attn installation This ensures flash-attn builds correctly with the existing environment. --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index d9f4011..adee1f9 100644 --- a/setup.sh +++ b/setup.sh @@ -106,7 +106,7 @@ fi if [ "$FLASHATTN" = true ] ; then if [ "$PLATFORM" = "cuda" ] ; then - pip install flash-attn==2.7.3 + pip install flash-attn==2.7.3 --no-build-isolation elif [ "$PLATFORM" = "hip" ] ; then echo "[FLASHATTN] Prebuilt binaries not found. Building from source..." mkdir -p /tmp/extensions From f41582be7839da26e059a115bcf866751b751494 Mon Sep 17 00:00:00 2001 From: bearstonem Date: Mon, 13 Apr 2026 15:09:59 -0700 Subject: [PATCH 3/4] Fix o-voxel pyproject.toml sed: don't double-quote replacement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dependency entries in o-voxel/pyproject.toml live inside a quoted TOML array, like: dependencies = [ "cumesh @ git+https://github.com/JeffreyXiang/CuMesh.git", ... ] The previous sed replaced `cumesh @ git+...` with `"cumesh"` (literal quotes in the replacement), producing `""cumesh""` — invalid TOML. pip then fails with `tomllib.TOMLDecodeError: Unclosed array`. Strip the quotes from the replacement so the outer pair is preserved. --- setup.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.sh b/setup.sh index adee1f9..9b32935 100644 --- a/setup.sh +++ b/setup.sh @@ -177,8 +177,12 @@ if [ "$OVOXEL" = true ] ; then cp -r o-voxel /tmp/extensions/o-voxel echo "[O-VOXEL] Fixing dependencies to avoid git reinstall..." cd /tmp/extensions/o-voxel - sed -i 's|cumesh @ git+https://github.com/JeffreyXiang/CuMesh.git|"cumesh"|' pyproject.toml - sed -i 's|flex_gemm @ git+https://github.com/JeffreyXiang/FlexGEMM.git|"flex_gemm"|' pyproject.toml + # The git refs in pyproject.toml live inside a quoted TOML dependencies + # array (e.g. "cumesh @ git+https://..."). Replace only the inner ref so + # the surrounding quotes are preserved — otherwise we produce ""cumesh"" + # and pip rejects pyproject.toml with "Unclosed array". + sed -i 's|cumesh @ git+https://github.com/JeffreyXiang/CuMesh.git|cumesh|' pyproject.toml + sed -i 's|flex_gemm @ git+https://github.com/JeffreyXiang/FlexGEMM.git|flex_gemm|' pyproject.toml cd $WORKDIR pip install /tmp/extensions/o-voxel --no-build-isolation fi From cd24b8fbc72b8466133aa8b2f21c356b33d296cc Mon Sep 17 00:00:00 2001 From: bearstonem Date: Mon, 13 Apr 2026 16:03:39 -0700 Subject: [PATCH 4/4] flash-attn: install prebuilt 2.8.3 wheel instead of source-building 2.7.3 flash-attn 2.7.3 has no prebuilt wheel for torch 2.7 (release ships 2.1-2.6 + 2.8 only), and source-building against torch 2.7.1+cu128 fails under modern setuptools/wheel during egg-info collection. 2.8.3 publishes an exact `cu12torch2.7cxx11abiFALSE` wheel for cp311 (and other cp3x). The bumped version is API-compatible for the dense and varlen attention paths TRELLIS.2 uses. --- setup.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 9b32935..deb2b80 100644 --- a/setup.sh +++ b/setup.sh @@ -106,7 +106,14 @@ fi if [ "$FLASHATTN" = true ] ; then if [ "$PLATFORM" = "cuda" ] ; then - pip install flash-attn==2.7.3 --no-build-isolation + # flash-attn 2.7.3 has no prebuilt wheel for torch 2.7 (only 2.1-2.6 + 2.8), + # and source builds against torch 2.7+cu128 fail in modern setuptools. + # 2.8.3 ships an exact torch2.7+cu12 wheel and is a drop-in replacement + # for the API surfaces TRELLIS.2 uses (varlen / dense attention). + FA_VER="2.8.3" + FA_PY=$(python -c 'import sys;print(f"cp{sys.version_info.major}{sys.version_info.minor}")') + FA_WHL="https://github.com/Dao-AILab/flash-attention/releases/download/v${FA_VER}/flash_attn-${FA_VER}+cu12torch2.7cxx11abiFALSE-${FA_PY}-${FA_PY}-linux_x86_64.whl" + pip install --no-cache-dir "$FA_WHL" elif [ "$PLATFORM" = "hip" ] ; then echo "[FLASHATTN] Prebuilt binaries not found. Building from source..." mkdir -p /tmp/extensions