diff --git a/build_cumesh.bat b/build_cumesh.bat new file mode 100644 index 0000000..f20ea6d --- /dev/null +++ b/build_cumesh.bat @@ -0,0 +1,11 @@ +@echo off +call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 +set TORCH_CUDA_ARCH_LIST=12.0 +set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8 +set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8 +set EXT_DIR=%TEMP%\trellis2_extensions +mkdir "%EXT_DIR%" 2>nul +if not exist "%EXT_DIR%\CuMesh" ( + git clone https://github.com/JeffreyXiang/CuMesh.git "%EXT_DIR%\CuMesh" --recursive +) +C:\workspace\MODEL\TRELLIS.2\.venv\Scripts\pip install "%EXT_DIR%\CuMesh" --no-build-isolation --force-reinstall diff --git a/build_flexgemm.bat b/build_flexgemm.bat new file mode 100644 index 0000000..7973905 --- /dev/null +++ b/build_flexgemm.bat @@ -0,0 +1,5 @@ +@echo off +call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 +set TORCH_CUDA_ARCH_LIST=12.0 +C:\workspace\MODEL\TRELLIS.2\.venv\Scripts\pip.exe install C:\Users\kschmid\AppData\Local\Temp\trellis2_extensions\FlexGEMM --no-build-isolation --force-reinstall --no-deps > C:\workspace\flexgemm_build.log 2>&1 +echo Exit code: %ERRORLEVEL% >> C:\workspace\flexgemm_build.log diff --git a/build_nvdiffrast.bat b/build_nvdiffrast.bat new file mode 100644 index 0000000..0f1129a --- /dev/null +++ b/build_nvdiffrast.bat @@ -0,0 +1,5 @@ +@echo off +call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" +set TORCH_CUDA_ARCH_LIST=12.0 +c:\workspace\MODEL\TRELLIS.2\.venv\Scripts\pip install --no-build-isolation --force-reinstall git+https://github.com/NVlabs/nvdiffrast.git +pause diff --git a/download_texture_models.py b/download_texture_models.py new file mode 100644 index 0000000..8bda815 --- /dev/null +++ b/download_texture_models.py @@ -0,0 +1,127 @@ +""" +Download all models required for Trellis2TexturingPipeline. + +Models downloaded: + microsoft/TRELLIS.2-4B + - texturing_pipeline.json + - ckpts/shape_enc_next_dc_f16c32_fp16 (~400 MB) + - ckpts/tex_dec_next_dc_f16c32_fp16 (~400 MB) + - ckpts/slat_flow_imgshape2tex_dit_1_3B_512_bf16 (~2.6 GB) + - ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16 (~2.6 GB) + briaai/RMBG-2.0 (background removal) (~1.0 GB) + facebook/dinov3-vitl16-pretrain-lvd1689m (~1.1 GB) + +Total: ~8.1 GB +""" + +import argparse +import os +from pathlib import Path + +try: + from huggingface_hub import hf_hub_download, snapshot_download +except ImportError: + raise SystemExit("huggingface_hub not installed. Run: pip install huggingface_hub") + +TRELLIS_REPO = "microsoft/TRELLIS.2-4B" +RMBG_REPO = "briaai/RMBG-2.0" +DINO_REPO = "facebook/dinov3-vitl16-pretrain-lvd1689m" +DINO_DIRECT_FILENAME = "dinov3_vitl16_pretrain_lvd1689m-8aa4cbdd.pth" +DINO_DIRECT_URL = ( + "https://dinov3.llamameta.net/dinov3_vitl16/dinov3_vitl16_pretrain_lvd1689m-8aa4cbdd.pth" + "?Policy=eyJTdGF0ZW1lbnQiOlt7InVuaXF1ZV9oYXNoIjoiYTdwYzV6YzI3bDczbmJxcjNhZXAyYnhkIiwiUmVzb3VyY2UiOiJodHRwczpcL1wvZGlub3YzLmxsYW1hbWV0YS5uZXRcLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3NzQ0MTgxMDl9fX1dfQ__" + "&Signature=qIEnfQZHBmwdp38IDvUjV3wkZMiNX3v9-K-YXheXCFRo97GBhwNHG-PbJkb1Fw9PECXYcklow%7EiJ%7EV3Qh8TfytTe2t%7ElcNs63kCoxmRuBB8%7ElUXD5R%7E0nKX1HqqZQzxV%7ECXUcDh6wKWX5dcEeMG39bVjOAhADvItOLCPQU-l%7ECP9NM2Nb9sXi2Eyp21gU5PNAL11ziKY2U4dRuKvjDn9EhfGS1cvftO8AFGsIOEapmWbEZ9lpWgEGFWiWbt6ajy%7EnK6NoiycLatTTNF-hFCmSiD6xavV7MfgtXHmFi8SE7Vvrv%7EwAjGuA0ZFOQnUNSCbVzyQ8Rj-S8PA1OGp28svxA__" + "&Key-Pair-Id=K15QRJLYKIFSLZ&Download-Request-ID=3857627804367069" +) + +TRELLIS_TEXTURE_FILES = [ + "texturing_pipeline.json", + "ckpts/shape_enc_next_dc_f16c32_fp16.json", + "ckpts/shape_enc_next_dc_f16c32_fp16.safetensors", + "ckpts/tex_dec_next_dc_f16c32_fp16.json", + "ckpts/tex_dec_next_dc_f16c32_fp16.safetensors", + "ckpts/slat_flow_imgshape2tex_dit_1_3B_512_bf16.json", + "ckpts/slat_flow_imgshape2tex_dit_1_3B_512_bf16.safetensors", + "ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.json", + "ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.safetensors", +] + + +def download_trellis(local_dir: Path, token: str = None): + print(f"\n[1/3] Downloading TRELLIS.2-4B texture models -> {local_dir}") + local_dir.mkdir(parents=True, exist_ok=True) + for filename in TRELLIS_TEXTURE_FILES: + dest = local_dir / filename + if dest.exists(): + print(f" skip {filename}") + continue + print(f" fetch {filename} ...") + hf_hub_download( + repo_id=TRELLIS_REPO, + filename=filename, + local_dir=str(local_dir), + token=token, + ) + print(" done.") + + +def download_rmbg(local_dir: Path, token: str = None): + print(f"\n[2/3] Downloading RMBG-2.0 (background removal) -> {local_dir}") + if any(local_dir.glob("*.safetensors")) or any(local_dir.glob("*.bin")): + print(" skip (already exists)") + return + snapshot_download( + repo_id=RMBG_REPO, + local_dir=str(local_dir), + token=token, + ignore_patterns=["*.md", "*.txt", "*.png", "*.jpg"], + ) + print(" done.") + + +def download_dino(local_dir: Path, token: str = None): + print(f"\n[3/3] Downloading DINOv3-L ({DINO_DIRECT_FILENAME}) -> {local_dir}") + local_dir.mkdir(parents=True, exist_ok=True) + dest = local_dir / DINO_DIRECT_FILENAME + if dest.exists(): + print(f" skip {DINO_DIRECT_FILENAME} (already exists)") + return + import urllib.request + print(f" fetch {DINO_DIRECT_FILENAME} ...") + urllib.request.urlretrieve(DINO_DIRECT_URL, str(dest)) + print(" done.") + + +def main(): + parser = argparse.ArgumentParser(description="Download TRELLIS.2 texture pipeline models") + parser.add_argument( + "--model-dir", + type=Path, + default=Path(__file__).parent / "models", + help="Root directory to save models (default: ./models)", + ) + parser.add_argument( + "--token", + default=os.environ.get("HF_TOKEN"), + help="HuggingFace access token (or set HF_TOKEN env var)", + ) + args = parser.parse_args() + + trellis_dir = args.model_dir / "TRELLIS.2-4B" + rmbg_dir = args.model_dir / "RMBG-2.0" + dino_dir = args.model_dir / "dinov3-vitl16-pretrain-lvd1689m" + + print(f"Model root : {args.model_dir.resolve()}") + print("Est. size : ~8.1 GB") + + download_trellis(trellis_dir, args.token) + download_rmbg(rmbg_dir, args.token) + download_dino(dino_dir, args.token) + + print("\nAll models downloaded.") + print(f"\nLoad the pipeline with:") + print(f" Trellis2TexturingPipeline.from_pretrained(r'{trellis_dir}', config_file='texturing_pipeline.json')") + + +if __name__ == "__main__": + main() diff --git a/setup.bat b/setup.bat new file mode 100644 index 0000000..953c6cf --- /dev/null +++ b/setup.bat @@ -0,0 +1,163 @@ +@echo off +setlocal EnableDelayedExpansion + +:: ============================================================ +:: setup.bat -- Windows equivalent of setup.sh for TRELLIS.2 +:: ============================================================ + +set NEW_ENV=0 +set BASIC=0 +set FLASHATTN=0 +set CUMESH=0 +set OVOXEL=0 +set FLEXGEMM=0 +set NVDIFFRAST=0 +set NVDIFFREC=0 +set HELP=0 + +if "%~1"=="" set HELP=1 + +:parse_args +if "%~1"=="" goto done_args +if /i "%~1"=="-h" set HELP=1 & shift & goto parse_args +if /i "%~1"=="--help" set HELP=1 & shift & goto parse_args +if /i "%~1"=="--new-env" set NEW_ENV=1 & shift & goto parse_args +if /i "%~1"=="--basic" set BASIC=1 & shift & goto parse_args +if /i "%~1"=="--flash-attn" set FLASHATTN=1 & shift & goto parse_args +if /i "%~1"=="--cumesh" set CUMESH=1 & shift & goto parse_args +if /i "%~1"=="--o-voxel" set OVOXEL=1 & shift & goto parse_args +if /i "%~1"=="--flexgemm" set FLEXGEMM=1 & shift & goto parse_args +if /i "%~1"=="--nvdiffrast" set NVDIFFRAST=1 & shift & goto parse_args +if /i "%~1"=="--nvdiffrec" set NVDIFFREC=1 & shift & goto parse_args +if /i "%~1"=="--texture" set BASIC=1 & set FLASHATTN=1 & set OVOXEL=1 & set CUMESH=1 & set FLEXGEMM=1 & set NVDIFFRAST=1 & shift & goto parse_args +if /i "%~1"=="--all" set NEW_ENV=1 & set BASIC=1 & set FLASHATTN=1 & set CUMESH=1 & set OVOXEL=1 & set FLEXGEMM=1 & set NVDIFFRAST=1 & set NVDIFFREC=1 & shift & goto parse_args +echo Error: Invalid argument: %~1 +set HELP=1 +shift & goto parse_args + +:done_args + +if %HELP%==1 ( + echo Usage: setup.bat [OPTIONS] + echo Options: + echo -h, --help Display this help message + echo --new-env Create a new venv at .venv ^(requires Python 3.10^) + echo --basic Install basic dependencies + echo --flash-attn Install flash-attention ^(CUDA only^) + echo --cumesh Install cumesh + echo --o-voxel Install o-voxel + echo --flexgemm Install flexgemm + echo --nvdiffrast Install nvdiffrast ^(CUDA only^) + echo --nvdiffrec Install nvdiffrec ^(CUDA only^) + echo --texture Install all dependencies for texturing ^(basic, flash-attn, o-voxel, cumesh, flexgemm, nvdiffrast^) + echo --all Run all of the above + goto :eof +) + +:: ---- Detect GPU platform ---------------------------------------- +set GPU_PLATFORM= +nvidia-smi >nul 2>&1 +if %errorlevel%==0 ( + set GPU_PLATFORM=cuda +) else ( + rocminfo >nul 2>&1 + if !errorlevel!==0 ( + set GPU_PLATFORM=hip + ) +) + +if "%GPU_PLATFORM%"=="" ( + echo Error: No supported GPU found ^(nvidia-smi and rocminfo both failed^) + exit /b 1 +) +echo Detected platform: %GPU_PLATFORM% + +set WORKDIR=%CD% +set EXT_DIR=%TEMP%\trellis2_extensions + +:: ---- Activate MSVC environment (needed for CUDA extension builds) ---- +:: NOTE: vcvarsall.bat sets PLATFORM=x64, so we use GPU_PLATFORM instead +set VCVARS="C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" +if exist %VCVARS% ( + call %VCVARS% x64 +) else ( + echo Warning: vcvarsall.bat not found at %VCVARS% + echo CUDA extension builds ^(cumesh, flexgemm, nvdiffrast, nvdiffrec, o-voxel^) will likely fail. + echo Open a Developer Command Prompt for VS 2022 and re-run setup.bat if needed. +) + +:: ---- --new-env -------------------------------------------------- +if %NEW_ENV%==1 ( + python -m venv "%WORKDIR%\.venv" + call "%WORKDIR%\.venv\Scripts\activate.bat" + if "%GPU_PLATFORM%"=="cuda" ( + pip install torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/cu128 --no-build-isolation + ) else if "%GPU_PLATFORM%"=="hip" ( + pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/rocm6.2.4 --no-build-isolation + ) +) + +:: ---- --basic ---------------------------------------------------- +if %BASIC%==1 ( + pip install packaging wheel setuptools imageio imageio-ffmpeg tqdm easydict opencv-python-headless ninja trimesh transformers gradio==6.0.1 tensorboard pandas lpips zstandard --no-build-isolation + pip install git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8 --no-build-isolation + :: NOTE: pillow-simd has no Windows wheel; install plain pillow instead + pip install pillow --no-build-isolation + pip install kornia timm --no-build-isolation +) + +:: ---- --flash-attn ----------------------------------------------- +if %FLASHATTN%==1 ( + if "%GPU_PLATFORM%"=="cuda" ( + pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.10/flash_attn-2.8.2+cu128torch2.7-cp312-cp312-win_amd64.whl --no-build-isolation + ) else ( + echo [FLASHATTN] Unsupported platform on Windows: %PLATFORM% + ) +) + +:: ---- --nvdiffrast ----------------------------------------------- +if %NVDIFFRAST%==1 ( + if "%GPU_PLATFORM%"=="cuda" ( + mkdir "%EXT_DIR%" 2>nul + git clone -b v0.4.0 https://github.com/NVlabs/nvdiffrast.git "%EXT_DIR%\nvdiffrast" + pip install "%EXT_DIR%\nvdiffrast" --no-build-isolation + ) else ( + echo [NVDIFFRAST] Unsupported platform: %PLATFORM% + ) +) + +:: ---- --nvdiffrec ------------------------------------------------ +if %NVDIFFREC%==1 ( + if "%GPU_PLATFORM%"=="cuda" ( + mkdir "%EXT_DIR%" 2>nul + git clone -b renderutils https://github.com/JeffreyXiang/nvdiffrec.git "%EXT_DIR%\nvdiffrec" + pip install "%EXT_DIR%\nvdiffrec" --no-build-isolation + ) else ( + echo [NVDIFFREC] Unsupported platform: %PLATFORM% + ) +) + +:: ---- --cumesh --------------------------------------------------- +if %CUMESH%==1 ( + mkdir "%EXT_DIR%" 2>nul + git clone https://github.com/JeffreyXiang/CuMesh.git "%EXT_DIR%\CuMesh" --recursive + pip install "%EXT_DIR%\CuMesh" --no-build-isolation +) + +:: ---- --flexgemm ------------------------------------------------- +if %FLEXGEMM%==1 ( + mkdir "%EXT_DIR%" 2>nul + git clone https://github.com/JeffreyXiang/FlexGEMM.git "%EXT_DIR%\FlexGEMM" --recursive + pip install "%EXT_DIR%\FlexGEMM" --no-build-isolation +) + +:: ---- --o-voxel -------------------------------------------------- +if %OVOXEL%==1 ( + mkdir "%EXT_DIR%" 2>nul + xcopy /E /I /Y "%WORKDIR%\o-voxel" "%EXT_DIR%\o-voxel" + pip install "%EXT_DIR%\o-voxel" --no-build-isolation +) + +echo. +echo Setup complete. +endlocal diff --git a/setup.sh b/setup.sh index e09e2b1..7da2f7d 100644 --- a/setup.sh +++ b/setup.sh @@ -1,5 +1,5 @@ # Read Arguments -TEMP=`getopt -o h --long help,new-env,basic,flash-attn,cumesh,o-voxel,flexgemm,nvdiffrast,nvdiffrec -n 'setup.sh' -- "$@"` +TEMP=`getopt -o h --long help,new-env,basic,flash-attn,cumesh,o-voxel,flexgemm,nvdiffrast,nvdiffrec,texture,all -n 'setup.sh' -- "$@"` eval set -- "$TEMP" @@ -30,6 +30,8 @@ while true ; do --flexgemm) FLEXGEMM=true ; shift ;; --nvdiffrast) NVDIFFRAST=true ; shift ;; --nvdiffrec) NVDIFFREC=true ; shift ;; + --texture) BASIC=true ; FLASHATTN=true ; OVOXEL=true ; CUMESH=true ; FLEXGEMM=true ; NVDIFFRAST=true ; shift ;; + --all) NEW_ENV=true ; BASIC=true ; FLASHATTN=true ; CUMESH=true ; OVOXEL=true ; FLEXGEMM=true ; NVDIFFRAST=true ; NVDIFFREC=true ; shift ;; --) shift ; break ;; *) ERROR=true ; break ;; esac @@ -52,7 +54,9 @@ if [ "$HELP" = true ] ; then echo " --flexgemm Install flexgemm" echo " --nvdiffrast Install nvdiffrast" echo " --nvdiffrec Install nvdiffrec" - return + echo " --texture Install all dependencies for texturing (basic, flash-attn, o-voxel, cumesh, flexgemm, nvdiffrast)" + echo " --all Run all of the above" + exit 0 fi # Get system information @@ -67,10 +71,15 @@ else fi if [ "$NEW_ENV" = true ] ; then - conda create -n trellis2 python=3.10 - conda activate trellis2 + if command -v conda > /dev/null; then + conda create -n trellis2 python=3.10 + conda activate trellis2 + else + python3 -m venv "$WORKDIR/.venv" + source "$WORKDIR/.venv/bin/activate" + fi if [ "$PLATFORM" = "cuda" ] ; then - pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu124 + pip install torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/cu128 elif [ "$PLATFORM" = "hip" ] ; then pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/rocm6.2.4 fi diff --git a/texture_mesh.py b/texture_mesh.py new file mode 100644 index 0000000..a854b30 --- /dev/null +++ b/texture_mesh.py @@ -0,0 +1,126 @@ +""" +Run TRELLIS.2 texturing on an arbitrary 3D mesh. + +Usage: + python texture_mesh.py [options] + +Examples: + python texture_mesh.py model.ply photo.png + python texture_mesh.py model.glb photo.webp --output out.glb --resolution 1024 --texture-size 2048 + python texture_mesh.py model.obj photo.jpg --steps 20 --guidance 3.0 --seed 42 +""" +import os +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + +import argparse +import sys +import time +import trimesh +from PIL import Image +from trellis2.pipelines import Trellis2TexturingPipeline + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Apply TRELLIS.2 PBR texturing to a 3D mesh", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("mesh", help="Input mesh file (.ply, .obj, .glb, .gltf)") + parser.add_argument("image", help="Reference image for texturing (.png, .jpg, .webp)") + parser.add_argument("--output", default=None, + help="Output GLB path (default: _textured.glb)") + parser.add_argument("--model", default="microsoft/TRELLIS.2-4B", + help="HuggingFace model ID or local path") + parser.add_argument("--resolution", type=int, default=1024, choices=[512, 1024, 1536], + help="Voxel resolution for geometry encoding") + parser.add_argument("--texture-size", type=int, default=2048, + help="Output texture map resolution (pixels)") + parser.add_argument("--steps", type=int, default=12, + help="Number of flow ODE solver steps") + parser.add_argument("--guidance", type=float, default=1.0, + help="Classifier-free guidance strength") + parser.add_argument("--guidance-rescale", type=float, default=0.0, + help="Guidance rescale factor") + parser.add_argument("--rescale-t", type=float, default=3.0, + help="Time rescaling for ODE solver") + parser.add_argument("--seed", type=int, default=0, + help="Random seed for reproducibility") + parser.add_argument("--no-preprocess", action="store_true", + help="Skip image preprocessing (background removal). " + "Use if image already has alpha channel or clean background.") + return parser.parse_args() + + +def main(): + args = parse_args() + + # Validate inputs + if not os.path.isfile(args.mesh): + print(f"Error: mesh file not found: {args.mesh}") + sys.exit(1) + if not os.path.isfile(args.image): + print(f"Error: image file not found: {args.image}") + sys.exit(1) + + # Default output path + if args.output is None: + stem = os.path.splitext(os.path.basename(args.mesh))[0] + args.output = os.path.join(os.path.dirname(args.mesh) or ".", f"{stem}_textured.glb") + + print(f"Mesh: {args.mesh}") + print(f"Image: {args.image}") + print(f"Output: {args.output}") + print(f"Resolution: {args.resolution}") + print(f"Texture size:{args.texture_size}") + print(f"Steps: {args.steps}") + print(f"Guidance: {args.guidance}") + print(f"Seed: {args.seed}") + print() + + # Load pipeline + print("Loading pipeline...") + pipeline = Trellis2TexturingPipeline.from_pretrained( + args.model, config_file="texturing_pipeline.json" + ) + pipeline.cuda() + + # Load inputs + print("Loading mesh...") + mesh = trimesh.load(args.mesh, force="mesh") + print(f" Vertices: {len(mesh.vertices):,} Faces: {len(mesh.faces):,}") + + print("Loading image...") + image = Image.open(args.image) + print(f" Size: {image.size} Mode: {image.mode}") + + # Run texturing + print("Running texturing...") + t0 = time.perf_counter() + output = pipeline.run( + mesh, + image, + seed=args.seed, + preprocess_image=not args.no_preprocess, + tex_slat_sampler_params={ + "steps": args.steps, + "guidance_strength": args.guidance, + "guidance_rescale": args.guidance_rescale, + "rescale_t": args.rescale_t, + }, + resolution=args.resolution, + texture_size=args.texture_size, + ) + t1 = time.perf_counter() + print(f" Texturing: {t1 - t0:.1f}s") + + # Export + print(f"Exporting to {args.output} ...") + t2 = time.perf_counter() + output.export(args.output, extension_webp=True) + t3 = time.perf_counter() + print(f" Export: {t3 - t2:.1f}s") + print(f" Total: {t3 - t0:.1f}s") + + +if __name__ == "__main__": + main() diff --git a/trellis2/modules/image_feature_extractor.py b/trellis2/modules/image_feature_extractor.py index c3cb515..4e25426 100644 --- a/trellis2/modules/image_feature_extractor.py +++ b/trellis2/modules/image_feature_extractor.py @@ -5,6 +5,95 @@ from transformers import DINOv3ViTModel import numpy as np from PIL import Image +from pathlib import Path + + +# Default location written by download_texture_models.py +_DINOV3_DEFAULT_PTH = ( + Path(__file__).parent.parent.parent + / "models" + / "dinov3-vitl16-pretrain-lvd1689m" + / "dinov3_vitl16_pretrain_lvd1689m-8aa4cbdd.pth" +) + + +def _find_dinov3_pth(model_name: str) -> "Path | None": + """Return a local .pth path for DINOv3, or None to fall back to HF.""" + import os + env = os.environ.get("DINOV3_PTH_PATH") + if env: + p = Path(env) + if p.exists(): + return p + if model_name.endswith(".pth"): + p = Path(model_name) + if p.exists(): + return p + if _DINOV3_DEFAULT_PTH.exists(): + return _DINOV3_DEFAULT_PTH + return None + + +def _load_dinov3_from_pth(pth_path) -> "DINOv3ViTModel": + """Load DINOv3ViTModel from a Meta-format .pth checkpoint.""" + from transformers import DINOv3ViTConfig + print(f" [DINOv3] loading from local .pth: {pth_path}") + ckpt = torch.load(str(pth_path), map_location="cpu", weights_only=True) + + hidden_size = ckpt["patch_embed.proj.weight"].shape[0] + num_blocks = sum(1 for k in ckpt if k.startswith("blocks.") and k.endswith(".norm1.weight")) + intermediate = ckpt["blocks.0.mlp.fc1.weight"].shape[0] + num_register = ckpt["storage_tokens"].shape[1] + patch_size = ckpt["patch_embed.proj.weight"].shape[2] + num_heads = hidden_size // 64 # standard ViT head_dim=64 + + cfg = DINOv3ViTConfig( + hidden_size=hidden_size, + num_hidden_layers=num_blocks, + num_attention_heads=num_heads, + intermediate_size=intermediate, + patch_size=patch_size, + num_register_tokens=num_register, + key_bias=True, + ) + + sd = {} + sd["embeddings.cls_token"] = ckpt["cls_token"] + sd["embeddings.mask_token"] = ckpt["mask_token"].unsqueeze(1) # [1, D] -> [1, 1, D] + sd["embeddings.register_tokens"] = ckpt["storage_tokens"] + sd["embeddings.patch_embeddings.weight"] = ckpt["patch_embed.proj.weight"] + sd["embeddings.patch_embeddings.bias"] = ckpt["patch_embed.proj.bias"] + + for i in range(num_blocks): + s, d = f"blocks.{i}", f"layer.{i}" + for sfx in ("norm1.weight", "norm1.bias", "norm2.weight", "norm2.bias"): + sd[f"{d}.{sfx}"] = ckpt[f"{s}.{sfx}"] + + qkv_w = ckpt[f"{s}.attn.qkv.weight"] + qkv_b = ckpt[f"{s}.attn.qkv.bias"] + H = hidden_size + sd[f"{d}.attention.q_proj.weight"] = qkv_w[:H] + sd[f"{d}.attention.k_proj.weight"] = qkv_w[H:2*H] + sd[f"{d}.attention.v_proj.weight"] = qkv_w[2*H:] + sd[f"{d}.attention.q_proj.bias"] = qkv_b[:H] + sd[f"{d}.attention.k_proj.bias"] = qkv_b[H:2*H] + sd[f"{d}.attention.v_proj.bias"] = qkv_b[2*H:] + + sd[f"{d}.attention.o_proj.weight"] = ckpt[f"{s}.attn.proj.weight"] + sd[f"{d}.attention.o_proj.bias"] = ckpt[f"{s}.attn.proj.bias"] + sd[f"{d}.layer_scale1.lambda1"] = ckpt[f"{s}.ls1.gamma"] + sd[f"{d}.layer_scale2.lambda1"] = ckpt[f"{s}.ls2.gamma"] + sd[f"{d}.mlp.up_proj.weight"] = ckpt[f"{s}.mlp.fc1.weight"] + sd[f"{d}.mlp.up_proj.bias"] = ckpt[f"{s}.mlp.fc1.bias"] + sd[f"{d}.mlp.down_proj.weight"] = ckpt[f"{s}.mlp.fc2.weight"] + sd[f"{d}.mlp.down_proj.bias"] = ckpt[f"{s}.mlp.fc2.bias"] + + model = DINOv3ViTModel(cfg) + missing, unexpected = model.load_state_dict(sd, strict=False) + if missing: + print(f" [DINOv3] {len(missing)} missing keys (e.g. {missing[0]})") + model.eval() + return model class DinoV2FeatureExtractor: @@ -62,7 +151,11 @@ class DinoV3FeatureExtractor: """ def __init__(self, model_name: str, image_size=512): self.model_name = model_name - self.model = DINOv3ViTModel.from_pretrained(model_name) + pth = _find_dinov3_pth(model_name) + if pth is not None: + self.model = _load_dinov3_from_pth(pth) + else: + self.model = DINOv3ViTModel.from_pretrained(model_name) self.model.eval() self.image_size = image_size self.transform = transforms.Compose([ diff --git a/trellis2/modules/sparse/conv/config.py b/trellis2/modules/sparse/conv/config.py index ac08489..cc8988e 100644 --- a/trellis2/modules/sparse/conv/config.py +++ b/trellis2/modules/sparse/conv/config.py @@ -1,3 +1,3 @@ SPCONV_ALGO = 'auto' # 'auto', 'implicit_gemm', 'native' -FLEX_GEMM_ALGO = 'masked_implicit_gemm_splitk' # 'explicit_gemm', 'implicit_gemm', 'implicit_gemm_splitk', 'masked_implicit_gemm', 'masked_implicit_gemm_splitk' +FLEX_GEMM_ALGO = 'explicit_gemm' # 'explicit_gemm', 'implicit_gemm', 'implicit_gemm_splitk', 'masked_implicit_gemm', 'masked_implicit_gemm_splitk' FLEX_GEMM_HASHMAP_RATIO = 2.0 # Ratio of hashmap size to input size diff --git a/trellis2/pipelines/rembg/BiRefNet.py b/trellis2/pipelines/rembg/BiRefNet.py index c71a992..b2079d6 100644 --- a/trellis2/pipelines/rembg/BiRefNet.py +++ b/trellis2/pipelines/rembg/BiRefNet.py @@ -8,7 +8,7 @@ class BiRefNet: def __init__(self, model_name: str = "ZhengPeng7/BiRefNet"): self.model = AutoModelForImageSegmentation.from_pretrained( - model_name, trust_remote_code=True + model_name, trust_remote_code=True, low_cpu_mem_usage=False ) self.model.eval() self.transform_image = transforms.Compose( @@ -18,7 +18,7 @@ def __init__(self, model_name: str = "ZhengPeng7/BiRefNet"): transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ] ) - + def to(self, device: str): self.model.to(device) @@ -27,7 +27,7 @@ def cuda(self): def cpu(self): self.model.cpu() - + def __call__(self, image: Image.Image) -> Image.Image: image_size = image.size input_images = self.transform_image(image).unsqueeze(0).to("cuda") @@ -39,4 +39,4 @@ def __call__(self, image: Image.Image) -> Image.Image: mask = pred_pil.resize(image_size) image.putalpha(mask) return image - \ No newline at end of file + diff --git a/trellis2/pipelines/trellis2_texturing.py b/trellis2/pipelines/trellis2_texturing.py index c184b5e..5dd8d88 100755 --- a/trellis2/pipelines/trellis2_texturing.py +++ b/trellis2/pipelines/trellis2_texturing.py @@ -313,7 +313,10 @@ def postprocess_mesh( normals = normals[vmap.cpu().numpy()] # rasterize - ctx = dr.RasterizeCudaContext() + try: + ctx = dr.RasterizeCudaContext() + except RuntimeError: + ctx = dr.RasterizeGLContext() uvs_torch = torch.cat([uvs_torch * 2 - 1, torch.zeros_like(uvs_torch[:, :1]), torch.ones_like(uvs_torch[:, :1])], dim=-1).unsqueeze(0) rast, _ = dr.rasterize( ctx, uvs_torch, faces_torch,