diff --git a/Dockerfile b/Dockerfile index a4cce199..f1dfac7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,33 +6,22 @@ RUN apt-get update && \ WORKDIR /app -# Add the application files -COPY . /app/ - -# Initialize and update git submodules -RUN cd /app && \ - git init && \ - git submodule init && \ - git submodule update --init --recursive && \ - git submodule update --recursive && \ - rm -rf .git */.git **/.git # Remove all .git directories - -# Setup conda and PyTorch +# First, copy only the files needed for dependency installation +COPY setup.sh ./ + +# Setup conda and install dependencies RUN conda config --set always_yes true && conda init RUN conda install cuda=12.4 pytorch==2.4.0 torchvision==0.19.0 pytorch-cuda=12.4 -c pytorch -c nvidia -# Install Kaolin dependencies first +# Install Kaolin and other dependencies RUN conda run -n base pip install -r https://raw.githubusercontent.com/NVIDIAGameWorks/kaolin/v0.17.0/tools/build_requirements.txt \ -r https://raw.githubusercontent.com/NVIDIAGameWorks/kaolin/v0.17.0/tools/viz_requirements.txt \ -r https://raw.githubusercontent.com/NVIDIAGameWorks/kaolin/v0.17.0/tools/requirements.txt -# Now install Kaolin with the correct version RUN conda run -n base pip install kaolin==0.17.0 -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.4.0_cu124.html -# Install diso and other dependencies RUN conda run -n base pip install diso -# Verify Kaolin installation RUN conda run -n base python -c "import kaolin; print(kaolin.__version__)" # Create a g++ wrapper for JIT, since the include dirs are passed with -i rather than -I for some reason @@ -60,6 +49,11 @@ RUN conda clean --all -f -y # This reduces the size of the image by a few hundred megs. Not great, but it's a start. RUN rdfind -makesymlinks true /opt/conda +# Only after all dependencies are installed, copy the application code +COPY ./trellis ./trellis +COPY ./assets ./assets +COPY ./extensions ./extensions + # Final stage FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-devel AS final @@ -86,6 +80,9 @@ RUN conda run -n base pip install fastapi uvicorn python-multipart COPY startup.sh /app/startup.sh RUN chmod +x /app/startup.sh +COPY .gitignore CODE_OF_CONDUCT.md LICENSE README.md setup.sh startup.sh headless_app.py model_generator.py example.py ./ +RUN chmod +x /app/*.sh + ENV PATH=/opt/conda/bin:$PATH # This script runs the post_install steps diff --git a/Dockerfile.runpod b/Dockerfile.runpod index af26a990..142b8bcf 100644 --- a/Dockerfile.runpod +++ b/Dockerfile.runpod @@ -1,49 +1,95 @@ -FROM runpod/base:0.6.2-cuda12.4.1 +FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-devel AS builder + +# Install build dependencies +RUN apt-get update && \ + apt-get install -y ffmpeg build-essential htop git python3-onnx rdfind WORKDIR /app -# Install system dependencies -RUN apt-get update && \ - apt-get install -y ffmpeg build-essential git python3-onnx rdfind && \ - rm -rf /var/lib/apt/lists/* +# First, copy only the files needed for dependency installation +COPY setup.sh ./ + +# Setup conda and PyTorch +RUN conda config --set always_yes true && conda init +RUN conda install cuda=12.4 pytorch==2.4.0 torchvision==0.19.0 pytorch-cuda=12.4 -c pytorch -c nvidia + +# Install Kaolin dependencies first +RUN conda run -n base pip install -r https://raw.githubusercontent.com/NVIDIAGameWorks/kaolin/v0.17.0/tools/build_requirements.txt \ + -r https://raw.githubusercontent.com/NVIDIAGameWorks/kaolin/v0.17.0/tools/viz_requirements.txt \ + -r https://raw.githubusercontent.com/NVIDIAGameWorks/kaolin/v0.17.0/tools/requirements.txt -# Copy the application files -COPY . . +# Now install Kaolin with the correct version +RUN conda run -n base pip install kaolin==0.17.0 -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.4.0_cu124.html -# Initialize and update git submodules -RUN cd /app && \ - git init && \ - git submodule init && \ - git submodule update --init --recursive && \ - git submodule update --recursive && \ - rm -rf .git */.git **/.git +# Install diso and other dependencies +RUN conda run -n base pip install diso -# Create a g++ wrapper for JIT compilation +# Verify Kaolin installation +RUN conda run -n base python -c "import kaolin; print(kaolin.__version__)" + +# Create a g++ wrapper for JIT, since the include dirs are passed with -i rather than -I for some reason RUN printf '#!/usr/bin/env bash\nexec /usr/bin/g++ -I/usr/local/cuda/include -I/usr/local/cuda/include/crt "$@"\n' > /usr/local/bin/gxx-wrapper && \ chmod +x /usr/local/bin/gxx-wrapper ENV CXX=/usr/local/bin/gxx-wrapper -# Install Python dependencies -RUN python3.11 -m pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cu118 -RUN python3.11 -m pip install --no-cache-dir kaolin==0.17.0 -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.0.0_cu118.html -RUN python3.11 -m pip install --no-cache-dir diso plyfile utils3d flash_attn xformers -RUN python3.11 -m pip install --no-cache-dir git+https://github.com/NVlabs/nvdiffrast.git - -# Run setup script with necessary components -RUN ./setup.sh --basic --xformers --flash-attn --diffoctreerast --vox2seq --spconv --mipgaussian --kaolin --nvdiffrast +# Run setup.sh - this won't install all the things, we'll need to install some later +RUN conda run -n base ./setup.sh --basic --xformers --flash-attn --diffoctreerast --vox2seq --spconv --mipgaussian --kaolin --nvdiffrast --demo -# Install RunPod -RUN python3.11 -m pip install --no-cache-dir runpod +# Now install additional Python packages +# These ones work inside the builder +RUN conda run -n base pip install plyfile utils3d flash_attn spconv-cu120 xformers +RUN conda run -n base pip install git+https://github.com/NVlabs/nvdiffrast.git -# Cleanup unnecessary files -RUN apt-get remove -y ffmpeg git python3-onnx && \ +# Cleanup after builds are done +RUN apt-get remove -y ffmpeg build-essential htop git python3-onnx && \ apt-get autoremove -y && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Copy and setup startup script -COPY startup.runpod.sh /app/startup.sh -RUN chmod +x /app/startup.sh +RUN conda clean --all -f -y + +# Deduplicate with rdfind +# This reduces the size of the image by a few hundred megs. Not great, but it's a start. +RUN rdfind -makesymlinks true /opt/conda + +# Only after all dependencies are installed, copy the application code +COPY ./trellis ./trellis +COPY ./assets ./assets +COPY ./extensions ./extensions + +# Final stage +FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-devel AS final + +WORKDIR /app +COPY --from=builder /usr/local/bin/gxx-wrapper /usr/local/bin/gxx-wrapper +COPY --from=builder /opt/conda /opt/conda +COPY --from=builder /root /root +COPY --from=builder /app /app + +# Reinstall any runtime tools needed +# git and build-essential are needed for post_install.sh script. vim and strace are +# useful for debugging the image size. +RUN apt-get update && \ + apt-get install -y build-essential \ + git \ + strace \ + vim && \ + rm -rf /var/lib/apt/lists/* + +# Add FastAPI dependencies +RUN conda run -n base pip install fastapi uvicorn python-multipart + +# Add RunPod dependencies +RUN conda run -n base pip install runpod + +# Add the new startup script +COPY startup.runpod.sh /app/startup.runpod.sh +RUN chmod +x /app/startup.runpod.sh + +COPY .gitignore CODE_OF_CONDUCT.md LICENSE README.md setup.sh startup.runpod.sh rp_handler.py model_generator.py example.py ./ +RUN chmod +x /app/*.sh + +ENV PATH=/opt/conda/bin:$PATH -# Use startup script as entrypoint -ENTRYPOINT ["/app/startup.runpod.sh"] \ No newline at end of file +# This script runs the post_install steps +CMD ["/app/startup.runpod.sh"] diff --git a/README.md b/README.md index 1acea907..da676967 100644 --- a/README.md +++ b/README.md @@ -79,14 +79,14 @@ We provide the following pretrained models: | Model | Description | #Params | Download | | --- | --- | --- | --- | -| TRELLIS-image-large | Large image-to-3D model | 1.2B | [Download](https://huggingface.co/JeffreyXiang/TRELLIS-image-large) | +| TRELLIS-image-large | Large image-to-3D model | 1.2B | [Download](https://huggingface.co/shakamone/trellis-large) | | TRELLIS-text-base | Base text-to-3D model | 342M | Coming Soon | | TRELLIS-text-large | Large text-to-3D model | 1.1B | Coming Soon | | TRELLIS-text-xlarge | Extra-large text-to-3D model | 2.0B | Coming Soon | The models are hosted on Hugging Face. You can directly load the models with their repository names in the code: ```python -TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large") +TrellisImageTo3DPipeline.from_pretrained("shakamone/trellis-large") ``` If you prefer loading the model from local, you can download the model files from the links above and load the model with the folder path (folder structure should be maintained): @@ -114,7 +114,7 @@ from trellis.pipelines import TrellisImageTo3DPipeline from trellis.utils import render_utils, postprocessing_utils # Load a pipeline from a model folder or a Hugging Face model hub. -pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large") +pipeline = TrellisImageTo3DPipeline.from_pretrained("shakamone/trellis-large") pipeline.cuda() # Load an image diff --git a/app.py b/app.py index 45016df7..ae77f902 100644 --- a/app.py +++ b/app.py @@ -243,6 +243,6 @@ def deactivate_button() -> gr.Button: # Launch the Gradio app if __name__ == "__main__": - pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large") + pipeline = TrellisImageTo3DPipeline.from_pretrained("shakamone/trellis-large") pipeline.cuda() demo.launch() diff --git a/example.py b/example.py index 155e6711..0b828f8a 100644 --- a/example.py +++ b/example.py @@ -10,7 +10,7 @@ from trellis.utils import render_utils, postprocessing_utils # Load a pipeline from a model folder or a Hugging Face model hub. -pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large") +pipeline = TrellisImageTo3DPipeline.from_pretrained("shakamone/trellis-large") pipeline.cuda() # Load an image diff --git a/headless_app.py b/headless_app.py index 4e3419a9..8b3eb67a 100644 --- a/headless_app.py +++ b/headless_app.py @@ -1,177 +1,58 @@ -import os -from typing import * -import torch -import numpy as np -import imageio -import uuid -import time -from easydict import EasyDict as edict -from PIL import Image -from fastapi import FastAPI, UploadFile, File -from fastapi.responses import FileResponse -from trellis.pipelines import TrellisImageTo3DPipeline -from trellis.representations import Gaussian, MeshExtractResult -from trellis.utils import render_utils, postprocessing_utils -import json +import base64 +from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware +from PIL import Image +from model_generator import ModelGenerator +import io +from pydantic import BaseModel + +# Add new request model +class ImageRequest(BaseModel): + image_base64: str + seed: int = 0 + randomize_seed: bool = True + ss_guidance_strength: float = 7.5 + ss_sampling_steps: int = 12 + slat_guidance_strength: float = 3.0 + slat_sampling_steps: int = 12 + mesh_simplify: float = 0.95 + texture_size: int = 1024 app = FastAPI() # Add CORS middleware app.add_middleware( CORSMiddleware, - allow_origins=["*"], # Allows all origins + allow_origins=["*"], allow_credentials=True, - allow_methods=["*"], # Allows all methods - allow_headers=["*"], # Allows all headers + allow_methods=["*"], + allow_headers=["*"], ) -MAX_SEED = np.iinfo(np.int32).max -TMP_DIR = "/workspace/Trellis-demo" -os.makedirs(TMP_DIR, exist_ok=True) - -def cleanup_old_files(directory: str, max_age_hours: int = 24): - """Clean up files older than max_age_hours""" - current_time = time.time() - for filename in os.listdir(directory): - filepath = os.path.join(directory, filename) - if os.path.isfile(filepath): - if (current_time - os.path.getmtime(filepath)) > (max_age_hours * 3600): - try: - os.remove(filepath) - except OSError: - pass - -@app.on_event("startup") -async def startup_event(): - """Run cleanup on startup""" - cleanup_old_files(TMP_DIR) - -# Initialize pipeline globally -pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large") -pipeline.cuda() - -def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]: - trial_id = str(uuid.uuid4()) - processed_image = pipeline.preprocess_image(image) - processed_image.save(f"{TMP_DIR}/{trial_id}.png") - return trial_id, processed_image - -def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict: - return { - 'gaussian': { - **gs.init_params, - '_xyz': gs._xyz.cpu().numpy().tolist(), - '_features_dc': gs._features_dc.cpu().numpy().tolist(), - '_scaling': gs._scaling.cpu().numpy().tolist(), - '_rotation': gs._rotation.cpu().numpy().tolist(), - '_opacity': gs._opacity.cpu().numpy().tolist(), - }, - 'mesh': { - 'vertices': mesh.vertices.cpu().numpy().tolist(), - 'faces': mesh.faces.cpu().numpy().tolist(), - }, - 'trial_id': trial_id, - } +# Initialize generator globally +generator = ModelGenerator() @app.post("/process-image") -async def process_image( - file: UploadFile = File(...), - seed: int = 0, - randomize_seed: bool = True, - ss_guidance_strength: float = 7.5, - ss_sampling_steps: int = 12, - slat_guidance_strength: float = 3.0, - slat_sampling_steps: int = 12 -): - # Read and process the uploaded image - image = Image.open(file.file) - trial_id, processed_image = preprocess_image(image) - - # Generate 3D model - if randomize_seed: - seed = np.random.randint(0, MAX_SEED) - - outputs = pipeline.run( - processed_image, - seed=seed, - formats=["gaussian", "mesh"], - preprocess_image=False, - sparse_structure_sampler_params={ - "steps": ss_sampling_steps, - "cfg_strength": ss_guidance_strength, - }, - slat_sampler_params={ - "steps": slat_sampling_steps, - "cfg_strength": slat_guidance_strength, - }, - ) - - # Generate preview video - video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color'] - video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal'] - video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))] - video_path = f"{TMP_DIR}/{trial_id}_preview.mp4" - imageio.mimsave(video_path, video, fps=15) - - # Pack state and return results - state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id) - - # Save state file - with open(f"{TMP_DIR}/{trial_id}_state.json", 'w') as f: - json.dump(state, f) - - return { - "trial_id": trial_id, - "state": state, - "preview_video": f"/preview/{trial_id}" - } - -@app.get("/preview/{trial_id}") -async def get_preview(trial_id: str): - video_path = f"{TMP_DIR}/{trial_id}_preview.mp4" - return FileResponse(video_path) - -@app.post("/extract-glb/{trial_id}") -async def extract_glb( - trial_id: str, - mesh_simplify: float = 0.95, - texture_size: int = 1024 -): - # Load the state file - state_path = f"{TMP_DIR}/{trial_id}_state.json" - if not os.path.exists(state_path): - return {"error": "Trial ID not found"} - - # Add this line to load the state - with open(state_path, 'r') as f: - state = json.load(f) - - # Generate GLB - glb_path = f"{TMP_DIR}/{trial_id}.glb" - gs = Gaussian( - aabb=state['gaussian']['aabb'], - sh_degree=state['gaussian']['sh_degree'], - mininum_kernel_size=state['gaussian']['mininum_kernel_size'], - scaling_bias=state['gaussian']['scaling_bias'], - opacity_bias=state['gaussian']['opacity_bias'], - scaling_activation=state['gaussian']['scaling_activation'], - ) - gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda') - gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda') - gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda') - gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda') - gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda') - - mesh = edict( - vertices=torch.tensor(state['mesh']['vertices'], device='cuda'), - faces=torch.tensor(state['mesh']['faces'], device='cuda'), - ) - - glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False) - glb.export(glb_path) - - return FileResponse(glb_path, filename=f"{trial_id}.glb") +async def process_image(request: ImageRequest): + try: + # Decode base64 image + image_data = base64.b64decode(request.image_base64) + image = Image.open(io.BytesIO(image_data)) + + # Generate 3D model + return generator.generate( + image=image, + seed=request.seed, + randomize_seed=request.randomize_seed, + ss_guidance_strength=request.ss_guidance_strength, + ss_sampling_steps=request.ss_sampling_steps, + slat_guidance_strength=request.slat_guidance_strength, + slat_sampling_steps=request.slat_sampling_steps, + mesh_simplify=request.mesh_simplify, + texture_size=request.texture_size + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) @app.get("/health") async def health_check(): diff --git a/model_generator.py b/model_generator.py new file mode 100644 index 00000000..d56cc48c --- /dev/null +++ b/model_generator.py @@ -0,0 +1,57 @@ +import torch +import numpy as np +from PIL import Image +from trellis.pipelines import TrellisImageTo3DPipeline +from trellis.utils import postprocessing_utils +import io +import base64 + +class ModelGenerator: + def __init__(self): + self.pipeline = TrellisImageTo3DPipeline.from_pretrained("shakamone/trellis-large") + self.pipeline.cuda() + self.MAX_SEED = np.iinfo(np.int32).max + + def generate(self, image, seed=0, randomize_seed=True, + ss_guidance_strength=7.5, ss_sampling_steps=12, + slat_guidance_strength=3.0, slat_sampling_steps=12, + mesh_simplify=0.95, texture_size=1024): + + print(f"Generating 3D model with seed: {seed}") + + # Update seed if randomization is requested + if randomize_seed: + seed = np.random.randint(0, self.MAX_SEED) + + # Generate 3D model + outputs = self.pipeline.run( + image, + seed=seed, + formats=["gaussian", "mesh"], + sparse_structure_sampler_params={ + "steps": ss_sampling_steps, + "cfg_strength": ss_guidance_strength, + }, + slat_sampler_params={ + "steps": slat_sampling_steps, + "cfg_strength": slat_guidance_strength, + } + ) + + # Generate GLB file + glb = postprocessing_utils.to_glb( + outputs['gaussian'][0], + outputs['mesh'][0], + simplify=mesh_simplify, + texture_size=texture_size + ) + + # Save GLB to bytes buffer and convert to base64 + buffer = io.BytesIO() + glb.export(buffer, file_type="glb") + glb_base64 = base64.b64encode(buffer.getvalue()).decode() + + return { + "glb_base64": glb_base64, + "seed": seed + } \ No newline at end of file diff --git a/rp_handler.py b/rp_handler.py index b4579bd4..a4300fa9 100644 --- a/rp_handler.py +++ b/rp_handler.py @@ -1,69 +1,41 @@ import runpod -import torch from PIL import Image -import os -from datetime import datetime -from trellis.pipelines import TrellisImageTo3DPipeline -from trellis.utils import render_utils, postprocessing_utils import base64 import io +from model_generator import ModelGenerator -# Initialize pipeline -def init_pipeline(): - pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large") - pipeline.cuda() - return pipeline - -# Global pipeline instance -trellis_pipe = init_pipeline() +# Initialize generator globally +generator = ModelGenerator() def handler(event): try: input_data = event["input"] - image_path = input_data.get("image_path") - mesh_simplify = input_data.get("mesh_simplify", 0.95) - texture_size = input_data.get("texture_size", 1024) - - if not image_path or not os.path.exists(image_path): - return {"error": "Image path not provided or file not found"} - - # Load and process image - image = Image.open(image_path) - - # Generate 3D model - outputs = trellis_pipe.run( - image, - seed=42, - formats=["gaussian", "mesh"], - sparse_structure_sampler_params={ - "steps": 12, - "cfg_strength": 7.5, - }, - slat_sampler_params={ - "steps": 12, - "cfg_strength": 3.0, - } - ) - # Generate GLB file - glb = postprocessing_utils.to_glb( - outputs['gaussian'][0], - outputs['mesh'][0], - simplify=mesh_simplify, - texture_size=texture_size + # Extract parameters from input + image_base64 = input_data.get("image_base64") + if not image_base64: + return {"error": "Image base64 data not provided"} + + try: + # Decode base64 to image + image_data = base64.b64decode(image_base64) + image = Image.open(io.BytesIO(image_data)) + except Exception as e: + return {"error": f"Failed to decode base64 image: {str(e)}"} + + # Generate 3D model using ModelGenerator + return generator.generate( + image=image, + seed=input_data.get("seed", 0), + randomize_seed=input_data.get("randomize_seed", True), + ss_guidance_strength=input_data.get("ss_guidance_strength", 7.5), + ss_sampling_steps=input_data.get("ss_sampling_steps", 12), + slat_guidance_strength=input_data.get("slat_guidance_strength", 3.0), + slat_sampling_steps=input_data.get("slat_sampling_steps", 12), + mesh_simplify=input_data.get("mesh_simplify", 0.95), + texture_size=input_data.get("texture_size", 1024) ) - # Save GLB to bytes buffer - buffer = io.BytesIO() - glb.export(buffer) - - # Convert to base64 - glb_base64 = base64.b64encode(buffer.getvalue()).decode() - - return { - "glb_base64": glb_base64 - } - except Exception as e: return {"error": str(e)} diff --git a/startup.runpod.sh b/startup.runpod.sh index edd3d263..b25757d0 100644 --- a/startup.runpod.sh +++ b/startup.runpod.sh @@ -8,11 +8,11 @@ if [ ! -f /app/.post_install_done ]; then echo "Running post-install steps..." # Install GPU-dependent packages - ./setup.sh --mipgaussian --diffoctreerast + conda run -n base ./setup.sh --mipgaussian --diffoctreerast # Verify installation export CXX=/usr/local/bin/gxx-wrapper - python3.11 example.py + python example.py # Mark completion touch /app/.post_install_done @@ -23,4 +23,4 @@ fi export CXX=/usr/local/bin/gxx-wrapper echo "Launching RunPod handler..." -python3.11 -u rp_handler.py \ No newline at end of file +python -u rp_handler.py