diff --git a/.gitignore b/.gitignore
index 194e236..ce58235 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 data/
+output/
 checkpoints/
 
 # Byte-compiled / optimized / DLL files
diff --git a/demo.py b/demo.py
index 326c6e5..e6cc45b 100644
--- a/demo.py
+++ b/demo.py
@@ -37,6 +37,8 @@
     else:
         weights_path = "naver/" + args.model_name
     model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(args.device)
+    
+    print(f"Hosting on {server_name}:{args.server_port}")
 
     # dust3r will write the 3D model inside tmpdirname
     with tempfile.TemporaryDirectory(suffix='dust3r_gradio_demo') as tmpdirname:
diff --git a/docker/docker-compose-cuda.yml b/docker/docker-compose-cuda.yml
index 85710af..d2fd41a 100644
--- a/docker/docker-compose-cuda.yml
+++ b/docker/docker-compose-cuda.yml
@@ -9,8 +9,12 @@ services:
     environment:
       - DEVICE=cuda
       - MODEL=${MODEL:-DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth}
+      - PYTHONPATH=/dust3r  # <--- ADD THIS LINE
     volumes:
-      - ./files/checkpoints:/dust3r/checkpoints
+      - ./files/checkpoints:/checkpoints
+      - ./files/data:/data
+      - ./files/output:/output
+      - ../:/dust3r
     cap_add:
       - IPC_LOCK
       - SYS_RESOURCE
diff --git a/docker/files/cuda.Dockerfile b/docker/files/cuda.Dockerfile
index a1d2edc..473f753 100644
--- a/docker/files/cuda.Dockerfile
+++ b/docker/files/cuda.Dockerfile
@@ -20,6 +20,8 @@ RUN pip install opencv-python==4.8.0.74
 WORKDIR /dust3r/croco/models/curope/
 RUN python setup.py build_ext --inplace
 
+RUN pip install boto3 zstandard
+
 WORKDIR /dust3r
 COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
diff --git a/docker/files/entrypoint.sh b/docker/files/entrypoint.sh
index 9637072..bb2f3db 100644
--- a/docker/files/entrypoint.sh
+++ b/docker/files/entrypoint.sh
@@ -5,4 +5,5 @@ set -eux
 DEVICE=${DEVICE:-cuda}
 MODEL=${MODEL:-DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth}
 
-exec python3 demo.py --weights "checkpoints/$MODEL" --device "$DEVICE" --local_network "$@"
+# Keep the container running for debugging
+tail -f /dev/null
diff --git a/docker/run.sh b/docker/run.sh
index 6c92036..420252e 100755
--- a/docker/run.sh
+++ b/docker/run.sh
@@ -35,10 +35,12 @@ set_dcomp() {
 run_docker() {
     export MODEL=${model_name}
     if [ "$with_cuda" -eq 1 ]; then
-        $dcomp -f docker-compose-cuda.yml up --build
+        $dcomp -f docker-compose-cuda.yml up --build -d
     else
-        $dcomp -f docker-compose-cpu.yml up --build
+        $dcomp -f docker-compose-cpu.yml up --build -d
     fi
+    echo "Docker container started in detached mode."
+    echo "To attach to the container, run: $dcomp exec dust3r-demo /bin/bash"
 }
 
 with_cuda=0
diff --git a/dust3r/analysis_scale.py b/dust3r/analysis_scale.py
new file mode 100644
index 0000000..2bab52b
--- /dev/null
+++ b/dust3r/analysis_scale.py
@@ -0,0 +1,67 @@
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+import argparse
+
+def plot_scale_statistics(results_path):
+    """Plot scale statistics from benchmark results.
+    
+    Args:
+        results_path: Path to the CSV file containing benchmark results
+    """
+    # Read the results
+    df = pd.read_csv(results_path)
+    
+    # Create figure with subplots
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
+    
+    # Plot 1: Scale factors over items
+    ax1.plot(df['item_id'], df['optimal_scale'], 'b.', alpha=0.5, label='Scale factors')
+    ax1.axhline(y=df['optimal_scale'].mean(), color='r', linestyle='--', label=f'Mean: {df["optimal_scale"].mean():.2f}')
+    ax1.set_xlabel('Item ID')
+    ax1.set_ylabel('Optimal Scale')
+    ax1.set_title('Scale Factors Distribution')
+    ax1.legend()
+    ax1.grid(True)
+    
+    # Plot 2: Histogram of scale factors
+    ax2.hist(df['optimal_scale'], bins=30, alpha=0.7, color='b')
+    ax2.axvline(x=df['optimal_scale'].mean(), color='r', linestyle='--', 
+                label=f'Mean: {df["optimal_scale"].mean():.2f}')
+    ax2.axvline(x=df['optimal_scale'].median(), color='g', linestyle='--', 
+                label=f'Median: {df["optimal_scale"].median():.2f}')
+    ax2.set_xlabel('Scale Factor')
+    ax2.set_ylabel('Frequency')
+    ax2.set_title('Histogram of Scale Factors')
+    ax2.legend()
+    ax2.grid(True)
+    
+    plt.tight_layout()
+    output_path = os.path.join(os.path.dirname(results_path), 'scale_statistics.png')
+    plt.savefig(output_path)
+    plt.close()
+    
+    # Print statistics
+    print("\nScale Factor Statistics:")
+    print(f"Mean: {df['optimal_scale'].mean():.3f}")
+    print(f"Median: {df['optimal_scale'].median():.3f}")
+    print(f"Std: {df['optimal_scale'].std():.3f}")
+    print(f"Min: {df['optimal_scale'].min():.3f}")
+    print(f"Max: {df['optimal_scale'].max():.3f}")
+    print(f"25th percentile: {df['optimal_scale'].quantile(0.25):.3f}")
+    print(f"75th percentile: {df['optimal_scale'].quantile(0.75):.3f}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Analyze scale factors from DUSt3R benchmark results.")
+    parser.add_argument('--results', type=str, required=True, help='Path to the benchmark results CSV file.')
+    
+    args = parser.parse_args()
+    
+    if not os.path.exists(args.results):
+        print(f"Results file not found at {args.results}")
+        return
+        
+    plot_scale_statistics(args.results)
+
+if __name__ == "__main__":
+    main()
diff --git a/dust3r/benchmark_ts_depth.py b/dust3r/benchmark_ts_depth.py
new file mode 100644
index 0000000..2c34d88
--- /dev/null
+++ b/dust3r/benchmark_ts_depth.py
@@ -0,0 +1,588 @@
+import os,sys
+code_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(f'{code_dir}/../')
+import pandas as pd
+from dataclasses import dataclass
+import json
+from typing import List, Dict
+import os
+import argparse
+import torch
+import logging
+import time
+import random
+import numpy as np
+import matplotlib.pyplot as plt
+import imageio
+from PIL import Image
+
+# dust3r imports
+from dust3r.model import AsymmetricCroCo3DStereo, load_model as load_dust3r_model
+from dust3r.inference import inference
+from dust3r.image_pairs import make_pairs
+from dust3r.utils.device import to_numpy
+from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
+from dust3r.utils.image import load_images, ImgNorm
+from dust3r.deserialize_depth_dataset import Boto3ResourceManager, deserialize_and_download_image, deserialize_and_download_tensor
+from dust3r.demo import get_3D_model_from_scene
+
+
+def load_model(args):
+    """Loads the DUSt3R model."""
+    if args.weights:
+        model = load_dust3r_model(args.weights, args.device)
+    elif args.model_name:
+        model = AsymmetricCroCo3DStereo.from_pretrained(args.model_name).to(args.device)
+    else:
+        raise ValueError("Either --model_name or --weights must be provided.")
+    logging.info(f"Loaded DUSt3R model on {args.device}")
+    model.eval()
+    return model
+
+
+@dataclass
+class DepthData:
+    dataset_creator: str
+    camera_names: List[str]
+    item_id: int
+    split: str
+    image_paths: Dict[str, str]
+    depth_map_paths: Dict[str, str]
+    normal_map_paths: Dict[str, str]
+    visible_mask_paths: Dict[str, str]
+    world_from_camera_transforms_path: str
+    camera_intrinsics_path: str
+
+    @classmethod
+    def from_row(cls, row):
+        return cls(
+            dataset_creator=row[0],
+            camera_names=list(row[1]),
+            item_id=row[2],
+            split=row[3],
+            image_paths=json.loads(row[4]),
+            depth_map_paths=json.loads(row[5]),
+            normal_map_paths=json.loads(row[6]),
+            visible_mask_paths=json.loads(row[7]),
+            world_from_camera_transforms_path=row[8],
+            camera_intrinsics_path=row[9],
+        )
+
+
+def deserialize_data(data: DepthData, resource_manager: Boto3ResourceManager, args):
+    """Deserialize all the data we need for a single benchmark item."""
+    camera_ids = list(data.image_paths.keys())
+    if len(camera_ids) < 2:
+        raise ValueError(
+            f"Need at least two images for inference, but got {len(camera_ids)}.")
+
+    cam1_id, cam2_id = random.sample(camera_ids, 2)
+    logging.info(f"Randomly selected cameras: {cam1_id}, {cam2_id}")
+
+    # It's conventional to use bit_depth=8 for RGB images.
+    print(f"data.image_paths[cam1_id]: {data.image_paths[cam1_id]}")
+    print(f"data.image_paths[cam2_id]: {data.image_paths[cam2_id]}")
+    img1 = deserialize_and_download_image(
+        data.image_paths[cam1_id], bit_depth=8, resource_manager=resource_manager, dtype=torch.float32) * 255
+    img2 = deserialize_and_download_image(
+        data.image_paths[cam2_id], bit_depth=8, resource_manager=resource_manager, dtype=torch.float32) * 255
+    img1 = img1.cuda()
+    img2 = img2.cuda()
+
+    
+    # With imageio
+    
+    # img1 = imageio.imread("data/20250611171250_left.png")
+    # img2 = imageio.imread("data/20250611171250_right.png")
+    # if img1.shape[-1] == 4:
+    #     img1 = img1[..., :3]
+    # if img2.shape[-1] == 4:
+    #     img2 = img2[..., :3]
+    # img1 = torch.as_tensor(img1).cuda().float().permute(2, 0, 1)
+    # img2 = torch.as_tensor(img2).cuda().float().permute(2, 0, 1)
+    # print(f"After img1.shape: {img1.shape} {img1.dtype=} {img1.min()=} {img1.max()=}")
+    # print(f"After img2.shape: {img2.shape} {img2.dtype=} {img2.min()=} {img2.max()=}")
+    
+    
+    depth_gt = deserialize_and_download_tensor(
+        data.depth_map_paths[cam1_id], resource_manager=resource_manager)
+    depth_gt = depth_gt
+    print(f"GT depth image max : {depth_gt.max()}, min: {depth_gt.min()}")
+    print(f"img1.shape before crop: {img1.shape}")
+
+    all_intrinsics = deserialize_and_download_tensor(
+        data.camera_intrinsics_path, resource_manager=resource_manager)
+    
+    cam1_idx = data.camera_names.index(cam1_id)
+    intrinsics = all_intrinsics[cam1_idx]
+
+    all_world_from_camera_transforms = deserialize_and_download_tensor(
+        data.world_from_camera_transforms_path, resource_manager=resource_manager)
+    cam2_idx = data.camera_names.index(cam2_id)
+    
+    transform1 = all_world_from_camera_transforms[cam1_idx]
+    transform2 = all_world_from_camera_transforms[cam2_idx]
+    
+    
+    # The translation vector is the last column of the 4x4 matrix
+    t1 = transform1[:3, 3]
+    t2 = transform2[:3, 3]
+    
+    gt_baseline = torch.linalg.norm(t1 - t2).item()
+    refactored_intrinsics = intrinsics.clone()
+    print(f"Original intrinsics: \n{intrinsics}")
+    print(f"GT Baseline: {gt_baseline}")
+    C, H, W = img1.shape[-3:]
+    # dust3r works well with smaller images, let's not crop to a large size
+    # target_h, target_w = 1200, 1600
+
+    # if H > target_h:
+    #     y_offset = (H - target_h) // 2
+    #     img1 = img1[..., y_offset:y_offset + target_h, :]
+    #     img2 = img2[..., y_offset:y_offset + target_h, :]
+    #     depth_gt = depth_gt[..., y_offset:y_offset + target_h, :]
+    #     # adjust intrinsics. cy is usually intrinsics[..., 1, 2]
+    #     refactored_intrinsics[..., 1, 2] -= y_offset
+
+    # if W > target_w:
+    #     x_offset = (W - target_w) // 2
+    #     img1 = img1[..., :, x_offset:x_offset + target_w]
+    #     img2 = img2[..., :, x_offset:x_offset + target_w]
+    #     depth_gt = depth_gt[..., :, x_offset:x_offset + target_w]
+    #     # adjust intrinsics. cx is usually intrinsics[..., 0, 2]
+    #     refactored_intrinsics[..., 0, 2] -= x_offset
+
+
+    # The model expects a batch dimension
+    return img1[None], img2[None], refactored_intrinsics, depth_gt, gt_baseline, transform1, transform2
+
+
+def _resize_image(image_data, size):
+    """Helper to resize image and adjust focals, inspired by dust3r.utils.image"""
+    rgb = image_data['rgb']
+    old_h, old_w = rgb.shape[:2]
+    
+    if isinstance(size, int):
+        new_w, new_h = size, size
+    else:
+        new_w, new_h = size
+
+    pil_img = Image.fromarray(rgb)
+    pil_img_resized = pil_img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+    
+    resized_rgb = np.array(pil_img_resized)
+    
+    fx, fy = image_data['focals']
+    new_fx = fx * new_w / old_w
+    new_fy = fy * new_h / old_h
+    
+    return {'rgb': resized_rgb, 'focals': (new_fx, new_fy), 'path': image_data['path']}
+
+
+def prepare_image_for_dust3r(img_tensor, size, idx=0):
+    """Prepare image tensor for dust3r input format.
+    
+    Args:
+        img_tensor: Input tensor (1, C, H, W)
+        size: Target size for resizing
+        idx: Index of the image in the sequence
+        
+    Returns:
+        dict: Image data in dust3r format with img, true_shape, idx, and instance
+    """
+    # Convert to numpy and permute to HWC
+    img_np = img_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy().astype(np.uint8)
+    
+    # Convert to PIL Image for resizing
+    pil_img = Image.fromarray(img_np)
+    W1, H1 = pil_img.size
+    
+    # Resize according to dust3r's logic
+    if size == 224:
+        # resize short side to 224 (then crop)
+        pil_img = _resize_pil_image(pil_img, round(size * max(W1/H1, H1/W1)))
+    else:
+        # resize long side to 512
+        pil_img = _resize_pil_image(pil_img, size)
+    
+    # Center crop
+    W, H = pil_img.size
+    cx, cy = W//2, H//2
+    if size == 224:
+        half = min(cx, cy)
+        pil_img = pil_img.crop((cx-half, cy-half, cx+half, cy+half))
+    else:
+        halfw, halfh = ((2*cx)//16)*8, ((2*cy)//16)*8
+        if W == H:  # if square
+            halfh = 3*halfw/4
+        pil_img = pil_img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))
+    
+    # Convert to dust3r format
+    img_norm = ImgNorm(pil_img)
+    return {
+        'img': img_norm[None],  # Remove [None] as it's handled by collate_with_cat
+        'true_shape': np.int32([pil_img.size[::-1]]),
+        'idx': idx,
+        'instance': str(idx)
+    }
+
+
+def _resize_pil_image(img, size):
+    """Resize PIL image maintaining aspect ratio."""
+    W, H = img.size
+    if W > H:
+        new_W = size
+        new_H = int(H * size / W)
+    else:
+        new_H = size
+        new_W = int(W * size / H)
+    return img.resize((new_W, new_H), Image.Resampling.LANCZOS)
+
+
+def find_optimal_scale(pred_depth, gt_depth):
+    """Find the optimal scale factor between predicted and ground truth depth.
+    
+    Args:
+        pred_depth: Predicted depth map (numpy array or torch tensor)
+        gt_depth: Ground truth depth map (numpy array or torch tensor)
+        
+    Returns:
+        scale: Optimal scale factor
+        error: Mean absolute error after scaling
+    """
+    # Convert to numpy if needed
+    if torch.is_tensor(pred_depth):
+        pred_depth = pred_depth.cpu().numpy()
+    if torch.is_tensor(gt_depth):
+        gt_depth = gt_depth.cpu().numpy()
+    
+    # Remove invalid values
+    valid_mask = (gt_depth > 0) & np.isfinite(gt_depth) & np.isfinite(pred_depth)
+    if not np.any(valid_mask):
+        return 1.0, float('inf')
+    
+    # Compute scale using median ratio
+    ratios = gt_depth[valid_mask] / (pred_depth[valid_mask] + 1e-6)
+    scale = np.median(ratios)
+    
+    # Compute error after scaling
+    scaled_pred = pred_depth * scale
+    error = np.mean(np.abs(scaled_pred[valid_mask] - gt_depth[valid_mask]))
+    
+    return scale, error
+
+
+def run_dust3r_inference(model, img1, img2, intrinsics, args, gt_pose1=None, gt_pose2=None, niter=300, schedule='cosine', lr=0.01):
+    """Runs inference on a pair of image tensors using DUSt3R.
+
+    Args:
+        model: The DUSt3R model.
+        img1: Left image tensor (1, C, H, W).
+        img2: Right image tensor (1, C, H, W).
+        intrinsics: Camera intrinsics tensor.
+        args: Command-line arguments.
+        gt_pose1: Ground truth pose for first camera (4x4 matrix)
+        gt_pose2: Ground truth pose for second camera (4x4 matrix)
+
+    Returns:
+        pred_depth: The predicted depth map (H, W).
+        inference_time: The time taken for inference.
+        pred_baseline: The predicted baseline between cameras
+    """
+    # Prepare images for dust3r
+    print(f"img1.shape: {img1.shape}")
+    img1_data = prepare_image_for_dust3r(img1, args.image_size, idx=0)
+    img2_data = prepare_image_for_dust3r(img2, args.image_size, idx=1)
+    # print(f"img1_data: {img1_data}")
+    
+    print(f"img1_data: {img1_data['img'].shape}")
+    
+    # Calculate scale factor from image shapes
+    original_h, original_w = img1.shape[2:]  # Get H, W from (1, C, H, W)
+    resized_h, resized_w = img1_data['img'].shape[2:]  # Get H, W from resized image
+    scale = min(original_h/resized_h, original_w/resized_w)
+    print(f"Scale factor: {scale}")
+    # Get focal lengths from intrinsics
+    focals = (intrinsics[0, 0].item()/scale, intrinsics[1, 1].item()/scale)
+    
+    gt_poses = [gt_pose1, gt_pose2]
+
+    # Create list of images in dust3r format
+    loaded_imgs = [img1_data, img2_data]
+    pairs = make_pairs(loaded_imgs, prefilter=None, symmetrize=True)
+
+    start_time = time.time()
+    with torch.cuda.amp.autocast(True):
+        output = inference(pairs, model, args.device, batch_size=1)
+    inference_time = time.time() - start_time
+    
+    # Enable gradients for optimization
+    torch.autograd.set_grad_enabled(True)
+    
+    scene = global_aligner(output, device=args.device, mode=GlobalAlignerMode.ModularPointCloudOptimizer, optimize_pp=True)
+    
+    scene.preset_pose([pose for pose in gt_poses], [True, True])
+    scene.preset_focal([focals[0], focals[1]], [True, True])
+    loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)
+    print(f"Focals: {scene.get_focals()}")
+    # Disable gradients after optimization
+    torch.autograd.set_grad_enabled(False)
+    
+    depth_maps = to_numpy(scene.get_depthmaps())
+    pred_depth = depth_maps[0]  # Depth for the first image
+    
+    # Get predicted camera poses
+    pred_poses = scene.get_im_poses()
+    pred_pose1 = pred_poses[0]  # First camera pose
+    pred_pose2 = pred_poses[1]  # Second camera pose
+    
+    # Print predicted camera poses
+    # print("\nPredicted Camera Poses:")
+    # print("Camera 1 (Reference):")
+    # print(pred_pose1)
+    # print("\nCamera 2:")
+    # print(pred_pose2)
+    
+    # Calculate predicted baseline from camera poses
+    pred_t1 = pred_pose1[:3, 3]  # Translation vector of first camera
+    pred_t2 = pred_pose2[:3, 3]  # Translation vector of second camera
+    pred_baseline = np.linalg.norm(pred_t2.cpu().numpy() - pred_t1.cpu().numpy())
+    print(f"Predicted Baseline: {pred_baseline}")
+    
+    return pred_depth, inference_time, pred_baseline, scene
+
+
+def compare_and_visualize(img1, pred_depth, depth_gt, item_id, out_dir):
+    """
+    Creates a 2x2 plot comparing predicted depth with ground truth.
+    Saves the plot to a file.
+    """
+    if isinstance(img1, torch.Tensor):
+        img1 = img1.squeeze(0).permute(1, 2, 0).cpu().numpy().astype(np.uint8)
+    if isinstance(depth_gt, torch.Tensor):
+        depth_gt = depth_gt.cpu().numpy()
+
+
+    # Squeeze out channel dimension if it exists
+    if depth_gt.ndim == 3 and depth_gt.shape[0] == 1:
+        depth_gt = np.squeeze(depth_gt, axis=0)
+    
+    # Handle invalid values in predicted depth
+    pred_depth = np.nan_to_num(pred_depth, nan=0.0, posinf=0.0, neginf=0.0)
+    
+    depth_diff = np.abs(pred_depth - depth_gt)
+
+    fig, axes = plt.subplots(3, 2, figsize=(16, 18))
+    fig.suptitle(f"Item ID: {item_id}")
+
+    im = axes[0, 0].imshow(img1)
+    axes[0, 0].set_title("Original Image")
+    axes[0, 0].axis('off')
+
+    im = axes[0, 1].imshow(depth_diff, cmap='hot', vmin=0, vmax=0.5)
+    axes[0, 1].set_title("Depth Difference (abs)")
+    axes[0, 1].axis('off')
+    fig.colorbar(im, ax=axes[0, 1])
+
+    # Determine shared color range for depth plots
+    valid_pred_depth = pred_depth[np.isfinite(pred_depth)]
+    valid_depth_gt = depth_gt[np.isfinite(depth_gt)]
+    
+    vmin, vmax = None, None
+    if valid_pred_depth.size > 0 and valid_depth_gt.size > 0:
+        vmin = min(np.min(valid_pred_depth), np.min(valid_depth_gt))
+        vmax = max(np.max(valid_pred_depth), np.max(valid_depth_gt))
+        print(f"Using visualization range: vmin={vmin}, vmax={vmax}")
+    else:
+        logging.warning(f"Could not determine a valid color range for item {item_id}. Using separate color bars.")
+        print("Warning: Could not determine valid color range")
+
+    im = axes[1, 0].imshow(depth_gt, cmap='viridis', vmin=vmin, vmax=vmax)
+    axes[1, 0].set_title("Ground Truth Depth")
+    axes[1, 0].axis('off')
+    fig.colorbar(im, ax=axes[1, 0])
+    
+    im = axes[1, 1].imshow(pred_depth, cmap='viridis', vmin=vmin, vmax=vmax)
+    axes[1, 1].set_title("Predicted Depth")
+    axes[1, 1].axis('off')
+    fig.colorbar(im, ax=axes[1, 1])
+    
+    # Add side-by-side histograms
+    valid_pred_flat = valid_pred_depth.flatten()
+    valid_gt_flat = valid_depth_gt.flatten()
+    
+    if valid_pred_flat.size > 0 and valid_gt_flat.size > 0:
+        all_valid_depths = np.concatenate([valid_pred_flat, valid_gt_flat])
+        # Use percentiles to avoid extreme outliers skewing the histogram range
+        bins = np.linspace(np.percentile(all_valid_depths, 1), np.percentile(all_valid_depths, 99), 100)
+        
+        axes[2, 0].hist(valid_gt_flat, bins=bins, color='blue', alpha=0.7)
+        axes[2, 0].set_title("Ground Truth Depth Histogram")
+        axes[2, 0].set_xlabel("Depth")
+        axes[2, 0].set_ylabel("Frequency")
+
+        axes[2, 1].hist(valid_pred_flat, bins=bins, color='green', alpha=0.7)
+        axes[2, 1].set_title("Predicted Depth Histogram")
+        axes[2, 1].set_xlabel("Depth")
+        axes[2, 1].sharey(axes[2, 0])  # Share y-axis for better comparison
+    else:
+        axes[2, 0].text(0.5, 0.5, "No valid GT data for histogram", ha='center', va='center')
+        axes[2, 0].axis('off')
+        axes[2, 1].text(0.5, 0.5, "No valid Pred data for histogram", ha='center', va='center')
+        axes[2, 1].axis('off')
+
+    plt.tight_layout()
+    output_path = os.path.join(out_dir, f"{item_id}_depth_comparison.png")
+    plt.savefig(output_path)
+    logging.info(f"Saved comparison plot to {output_path}")
+    plt.close(fig)
+
+
+def main(args):
+    model = load_model(args)
+    resource_manager = Boto3ResourceManager()
+
+    # Initialize DataFrame to store results
+    results_df = pd.DataFrame(columns=[
+        'item_id', 'mean_error',
+        'gt_min', 'gt_max', 'gt_mean',
+        'pred_min', 'pred_max', 'pred_mean',
+        'inference_time'
+    ])
+
+    def data_fn():
+        df = pd.read_parquet(args.meta_data_path)
+        # Set random seed for reproducibility
+        np.random.seed(1)
+        random.seed(1)
+        torch.manual_seed(1)
+        for i in range(len(df)):
+            idx = np.random.randint(0, len(df))
+            yield DepthData.from_row(df.iloc[idx])
+
+    processed_count = 0
+    for data in data_fn():
+        try:
+            if args.limit_num is not None and processed_count >= args.limit_num:
+                break
+                
+            logging.info(f"Processing item {data.item_id}")
+            try:
+                img1, img2, intrinsics, depth_gt, gt_baseline, gt_pose1, gt_pose2 = deserialize_data(
+                    data, resource_manager, args)
+            except ValueError as e:
+                logging.warning(f"Skipping item {data.item_id} due to: {e}")
+                continue
+
+            pred_depth_low_res, inference_time, _, scene = run_dust3r_inference(
+                model, img1, img2, intrinsics, args, gt_pose1, gt_pose2)
+            
+            # Save 3D model as PLY
+            # print(f"Saving 3D model for item {data.item_id}...")
+            # try:
+            #     model_filename = f"{data.item_id}_model.ply"
+            #     model_output_path = get_3D_model_from_scene(
+            #         outdir=args.out_dir,
+            #         silent=False,
+            #         scene=scene,
+            #         glb_name=model_filename
+            #     )
+            #     if model_output_path:
+            #         print(f"Saved 3D model to {model_output_path}")
+            #     else:
+            #         print(f"Warning: Could not generate or save 3D model for item {data.item_id}.")
+            # except Exception as e:
+            #     print(f"Error saving 3D model for item {data.item_id}: {e}")
+            
+            # Resize predicted depth to match ground truth depth resolution
+            H, W = img1.shape[2:]
+            pred_depth_tensor = torch.from_numpy(pred_depth_low_res).unsqueeze(0).unsqueeze(0)
+            pred_depth_resized = torch.nn.functional.interpolate(pred_depth_tensor, size=(H, W), mode='bilinear', align_corners=False)
+            pred_depth = pred_depth_resized.squeeze().cpu().numpy()
+
+            print(f"Pred depth min: {pred_depth.min()}, max: {pred_depth.max()}, mean: {pred_depth.mean()}")
+            print(f"Pred depth shape: {pred_depth.shape}")
+            
+            # Record statistics
+            stats = {
+                'item_id': data.item_id,
+                'mean_error': np.mean(np.abs(pred_depth - depth_gt.cpu().numpy())),
+                'gt_min': depth_gt.min(),
+                'gt_max': depth_gt.max(),
+                'gt_mean': depth_gt.mean(),
+                'pred_min': pred_depth.min(),
+                'pred_max': pred_depth.max(),
+                'pred_mean': pred_depth.mean(),
+                'inference_time': inference_time
+            }
+            results_df = pd.concat([results_df, pd.DataFrame([stats])], ignore_index=True)
+            
+            print(f"\nDepth Statistics for item {data.item_id}:")
+            print(f"Ground Truth - min: {depth_gt.min():.3f}, max: {depth_gt.max():.3f}, mean: {depth_gt.mean():.3f}")
+            print(f"Predicted - min: {pred_depth.min():.3f}, max: {pred_depth.max():.3f}, mean: {pred_depth.mean():.3f}")
+            print(f"Mean absolute error: {stats['mean_error']:.3f}")
+            
+            if isinstance(pred_depth, torch.Tensor):
+                pred_depth = pred_depth.cpu().numpy()
+
+            compare_and_visualize(img1, pred_depth, depth_gt, data.item_id, args.out_dir)
+
+            logging.info(
+                f"Inference time: {inference_time:.4f}s, Predicted depth map shape: {pred_depth.shape}")
+            
+            processed_count += 1
+            
+            # Save results every 10 scenes
+            if processed_count % 10 == 0:
+                results_path = os.path.join(args.out_dir, 'depth_benchmark_results.csv')
+                results_df.to_csv(results_path, index=False)
+                logging.info(f"Saved results to {results_path}")
+        except Exception as err:
+            print(f"error handling {data.item_id}")
+            continue
+        
+        
+        
+    # Save final results
+    results_path = os.path.join(args.out_dir, 'depth_benchmark_results.csv')
+    results_df.to_csv(results_path, index=False)
+    logging.info(f"Saved final results to {results_path}")
+    
+    # Print summary statistics
+    print("\nSummary Statistics:")
+    print(f"Total scenes processed: {processed_count}")
+    print(f"Mean error: {results_df['mean_error'].mean():.3f} ± {results_df['mean_error'].std():.3f}")
+    print(f"Mean inference time: {results_df['inference_time'].mean():.3f}s ± {results_df['inference_time'].std():.3f}s")
+
+
+if __name__ == "__main__":
+    code_dir = os.path.dirname(os.path.realpath(__file__))
+    parser = argparse.ArgumentParser(description="Run DUSt3R depth estimation and compare with ground truth.")
+    
+    # Model arguments
+    model_group = parser.add_mutually_exclusive_group(required=True)
+    model_group.add_argument("--weights", type=str, help="Path to DUSt3R model weights (.pth file).")
+    model_group.add_argument("--model_name", type=str, default="DUSt3R_ViTLarge_BaseDecoder_512_dpt", help="Name of the model from HuggingFace Hub (e.g., 'DUSt3R_ViTLarge_BaseDecoder_512_dpt').")
+
+    # Data arguments
+    parser.add_argument('--meta_data_path', default="metadata/depth_live_1724981057", type=str, help='Path to metadata parquet file.')
+    
+    # Output arguments
+    parser.add_argument('--out_dir', default=f'{code_dir}/../output/dust3r_benchmark/', type=str, help='The directory to save results.')
+
+    # Inference arguments
+    parser.add_argument("--device", type=str, default='cuda', help="PyTorch device to use ('cuda' or 'cpu').")
+    parser.add_argument("--image_size", type=int, default=512, choices=[224, 512], help="Image size for DUSt3R processing. Default: 512.")
+    parser.add_argument("--limit-num", type=int, help="Limit the number of items to process. If not set, process all items.")
+
+    args = parser.parse_args()
+
+    if args.device == 'cuda' and not torch.cuda.is_available():
+        print("CUDA is not available. Switching to CPU.")
+        args.device = 'cpu'
+
+    print("Starting DUSt3R depth benchmark...")
+    torch.autograd.set_grad_enabled(False)
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    main(args)
\ No newline at end of file
diff --git a/dust3r/demo.py b/dust3r/demo.py
index c491be0..3b4c9cc 100644
--- a/dust3r/demo.py
+++ b/dust3r/demo.py
@@ -65,7 +65,7 @@ def print_with_timestamp(*args, **kwargs):
 
 def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
                                  cam_color=None, as_pointcloud=False,
-                                 transparent_cams=False, silent=False):
+                                 transparent_cams=False, silent=False, glb_name='scene.glb'):
     assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
     pts3d = to_numpy(pts3d)
     imgs = to_numpy(imgs)
@@ -79,7 +79,9 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
         pts = np.concatenate([p[m] for p, m in zip(pts3d, mask)])
         col = np.concatenate([p[m] for p, m in zip(imgs, mask)])
         pct = trimesh.PointCloud(pts.reshape(-1, 3), colors=col.reshape(-1, 3))
+        
         scene.add_geometry(pct)
+        print(f"Added point cloud to scene. Number of points: {len(pts)}")
     else:
         meshes = []
         for i in range(len(imgs)):
@@ -100,15 +102,18 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
     rot = np.eye(4)
     rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
     scene.apply_transform(np.linalg.inv(cams2world[0] @ OPENGL @ rot))
-    outfile = os.path.join(outdir, 'scene.glb')
+    pct.apply_transform(np.linalg.inv(cams2world[0] @ OPENGL @ rot))
+    combined_mesh = trimesh.util.concatenate([mesh for mesh in scene.geometry.values()])
+    outfile = os.path.join(outdir, glb_name)
     if not silent:
         print('(exporting 3D scene to', outfile, ')')
-    scene.export(file_obj=outfile)
+    # combined_mesh.export(file_obj=outfile)
+    pct.export(file_obj=outfile)
     return outfile
 
 
-def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
-                            clean_depth=False, transparent_cams=False, cam_size=0.05):
+def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud=True, mask_sky=False,
+                            clean_depth=False, transparent_cams=False, cam_size=0.05, glb_name='scene.glb'):
     """
     extract 3D_model (glb file) from a reconstructed scene
     """
@@ -129,7 +134,7 @@ def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud
     scene.min_conf_thr = float(scene.conf_trf(torch.tensor(min_conf_thr)))
     msk = to_numpy(scene.get_masks())
     return _convert_scene_output_to_glb(outdir, rgbimg, pts3d, msk, focals, cams2world, as_pointcloud=as_pointcloud,
-                                        transparent_cams=transparent_cams, cam_size=cam_size, silent=silent)
+                                        transparent_cams=transparent_cams, cam_size=cam_size, silent=silent, glb_name=glb_name)
 
 
 def get_reconstructed_scene(outdir, model, device, silent, image_size, filelist, schedule, niter, min_conf_thr,
@@ -280,4 +285,4 @@ def main_demo(tmpdirname, model, device, image_size, server_name, server_port, s
                                     inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
                                             clean_depth, transparent_cams, cam_size],
                                     outputs=outmodel)
-    demo.launch(share=False, server_name=server_name, server_port=server_port)
+    demo.launch(share=True, server_name=server_name, server_port=server_port)
diff --git a/dust3r/deserialize_depth_dataset.py b/dust3r/deserialize_depth_dataset.py
new file mode 100644
index 0000000..141ce4b
--- /dev/null
+++ b/dust3r/deserialize_depth_dataset.py
@@ -0,0 +1,113 @@
+import abc
+import boto3
+from urllib.parse import urlparse
+import cv2
+import numpy as np
+import torch
+from io import BytesIO
+from zstandard import ZstdCompressor, ZstdDecompressor
+
+
+class AbstractResourceManager(abc.ABC):
+    @abc.abstractmethod
+    def get(self, uri: str) -> bytes:
+        raise NotImplementedError
+
+
+class Boto3ResourceManager(AbstractResourceManager):
+    def __init__(self):
+        self.s3_client = boto3.client("s3")
+
+    def get(self, s3_uri: str) -> bytes:
+        parsed_uri = urlparse(s3_uri)
+        if parsed_uri.scheme != "s3":
+            raise ValueError(f"URI scheme must be s3, not {parsed_uri.scheme}")
+        bucket = parsed_uri.netloc
+        key = parsed_uri.path.lstrip("/")
+        response = self.s3_client.get_object(Bucket=bucket, Key=key)
+        return response["Body"].read()
+
+
+def unpack_bytes_np(compressed_bytes: bytes):
+    return np.load(BytesIO(compressed_bytes), allow_pickle=True)
+
+def zstd_decompress_bytes(compressed_bytes: bytes) -> bytes:
+    return ZstdDecompressor().decompress(compressed_bytes)
+
+def zstd_decompress_arr(compressed_bytes: bytes) -> np.ndarray:
+    return unpack_bytes_np(zstd_decompress_bytes(compressed_bytes))
+
+def deserialize_and_download_image(
+    s3_uri: str, bit_depth: int, resource_manager: AbstractResourceManager, dtype: torch.dtype
+) -> torch.Tensor:
+    """Shared utility for DeserializedObjectView and DeserializedImage.
+
+    Look at those class docstrings for more information.
+    """
+    image_bytes = resource_manager.get(s3_uri)
+    if bit_depth == 8:
+        image_np = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
+        image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
+    elif bit_depth > 8 and bit_depth <= 16:
+        # note that torch half starts losing precision for bit_depth > 11; it becomes a choice for the user to
+        # tradeoff loading speed vs precision. For bit_depth=12, the max error is 1px (out of 4096 slots).
+        if dtype not in {torch.float, torch.half}:
+            raise ValueError(f"dtype must be torch.float or torch.half if bit_depth > 8, not {dtype}")
+        image_np = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR)
+        image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
+        # have to convert to float16 or float32 first, since np.uint16 is not supported by pytorch
+        dtype_np = np.float16 if dtype == torch.half else np.float32
+        image_np = image_np.astype(dtype_np)
+    else:
+        raise ValueError(f"bit_depth must be in the range [8, 16], not {bit_depth}!")
+
+    image = torch.from_numpy(image_np).permute(2, 0, 1).to(dtype)
+    if dtype in {torch.float, torch.half}:
+        image = image / (2**bit_depth - 1)
+
+    return image
+
+def deserialize_and_download_tensor(s3_uri: str, resource_manager: AbstractResourceManager) -> torch.Tensor:
+    """Deserialize and download a tensor from S3.
+
+    Parameters
+    ----------
+    s3_uri : str
+        The S3 URI of the tensor.
+    resource_manager : ResourceManager
+
+    Returns
+    -------
+    torch.Tensor
+    """
+    tensor_bytes = resource_manager.get(s3_uri)
+    tensor_np = zstd_decompress_arr(tensor_bytes)
+    tensor = torch.from_numpy(tensor_np)
+    return tensor
+
+
+def test_deserialize_and_download_image():
+    test_image_file = "s3://covariant-annotation-pipeline/resource_root/sim_scene_annotations/images-camera_array_01/409a/409ad6ba22b2cb129609ecbd52e5446e5f90d9920563103763e558eb576ffcf5.png"
+    resource_manager = Boto3ResourceManager()
+    tensor = deserialize_and_download_image(test_image_file, bit_depth=8, resource_manager=resource_manager, dtype=torch.float32)
+    tensor = (tensor * 255).to(torch.uint8)
+    cv2.imwrite("test_image.png", tensor.permute(1, 2, 0).cpu().numpy())
+
+def test_deserialize_and_download_tensor():
+    test_tensor_file = "s3://covariant-annotation-pipeline/resource_root/sim_scene_annotations/depth_maps-camera_array_01/fc64/fc64581dc26ef911ed77bb674b8736749351c832a5ded9d407d812da733304e9.blob"
+    resource_manager = Boto3ResourceManager()
+    tensor = deserialize_and_download_tensor(test_tensor_file, resource_manager)
+    # Scale depth values to 0-255 range for visualization
+    depth_min = tensor.min()
+    depth_max = tensor.max()
+    depth_normalized = ((tensor - depth_min) / (depth_max - depth_min) * 255).to(torch.uint8)
+    
+    # Save as grayscale image
+    depth_image = depth_normalized.cpu().numpy()
+    cv2.imwrite("test_depth.png", depth_image)
+    print(f"Saved depth image with range [{depth_min:.2f}, {depth_max:.2f}]")
+
+
+if __name__ == "__main__":
+    test_deserialize_and_download_tensor()
+    
diff --git a/dust3r/test.py b/dust3r/test.py
new file mode 100644
index 0000000..744118d
--- /dev/null
+++ b/dust3r/test.py
@@ -0,0 +1,260 @@
+import argparse
+import os
+import torch
+import numpy as np
+import copy
+import glob
+import json
+from scipy.spatial.transform import Rotation
+import matplotlib.pyplot as plt
+import time
+
+# DUSt3R imports
+from dust3r.model import load_model, AsymmetricCroCo3DStereo
+from dust3r.inference import inference
+from dust3r.image_pairs import make_pairs
+from dust3r.utils.image import load_images
+from dust3r.utils.device import to_numpy
+from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
+from dust3r.demo import get_3D_model_from_scene
+
+# MODIFIED: main now takes model and a list of basenames
+def main(model, args, basenames_list):
+
+    if not basenames_list:
+        print("No image pair basenames to process.")
+        return
+
+    all_inference_times = []
+
+    for current_basename in basenames_list:
+        print(f"\n--- Processing basename: {current_basename} ---")
+
+        # Specific pair mode is now the only mode
+        # base_name = current_basename # This was the old parameter, now using current_basename directly
+        pair_dir = args.image_pair_dir
+        left_path = os.path.join(pair_dir, f"{current_basename}_left.png")
+        right_path = os.path.join(pair_dir, f"{current_basename}_right.png")
+
+        print(f"Processing specific pair: \n  Left: {left_path}\n  Right: {right_path}")
+
+        if not os.path.exists(left_path) or not os.path.exists(right_path):
+            print(f"Error: One or both images for the pair not found. Searched for:\n  {left_path}\n  {right_path}")
+            print("Please ensure both files exist and the paths are correct. Skipping this pair.")
+            continue # Skip to the next basename
+
+        # Model is already loaded and passed as an argument
+        # os.makedirs(args.output_dir, exist_ok=True) # Output dir created once in __main__
+
+        loaded_imgs_all = load_images([left_path, right_path], size=args.image_size, verbose=True)
+        print(f"loaded_imgs_all: {loaded_imgs_all[0]['img'].shape} {loaded_imgs_all[0]['img'].dtype}")
+        pairs = make_pairs(loaded_imgs_all, prefilter=None, symmetrize=True)
+        
+        print(f"Starting inference for {current_basename}...")
+        start_time = time.time()
+        output = inference(pairs, model, args.device, batch_size=1, verbose=True)
+        end_time = time.time()
+        inference_duration = end_time - start_time
+        all_inference_times.append(inference_duration)
+        print(f"Inference for {current_basename} took {inference_duration:.2f} seconds.")
+
+        print("Performing global alignment...")
+        # For a single pair, PairViewer mode is appropriate.
+        # If multiple pairs were processed for a single scene, PointCloudOptimizer might be used,
+        # but here each pair is processed independently.
+        mode = GlobalAlignerMode.PairViewer 
+        scene = global_aligner(output, device=args.device, mode=mode, verbose=True)
+
+        # Global alignment optimization is typically for >2 images.
+        # Since we process pairs independently, full optimization per pair might be much.
+        # The original logic for niter was conditional on len(loaded_imgs_all) > 2
+        # which for a single pair is false. The demo.py uses PairViewer for 2 images.
+        # If optimization for each pair is desired, it can be added here.
+        # For now, sticking to PairViewer for individual pair processing.
+
+        # Save camera parameters (intrinsics and poses)
+        if scene.get_intrinsics() is not None and scene.get_im_poses() is not None:
+            intrinsics_list = to_numpy(scene.get_intrinsics()).tolist()
+            im_poses_list = to_numpy(scene.get_im_poses()).tolist()
+            
+            camera_params = {
+                "intrinsics": intrinsics_list,
+                "im_poses": im_poses_list
+            }
+            
+            json_output_path = os.path.join(args.output_dir, f"{current_basename}_camera_parameters.json")
+            try:
+                with open(json_output_path, 'w') as f:
+                    json.dump(camera_params, f, indent=4)
+                print(f"Saved camera parameters to {json_output_path}")
+            except Exception as e:
+                print(f"Error saving camera parameters for {current_basename} to {json_output_path}: {e}")
+        else:
+            print(f"Warning: Could not retrieve intrinsics or poses for {current_basename}. Skipping camera parameter saving.")
+
+        # Save 3D model
+        print(f"Saving 3D model for {current_basename}...")
+        try:
+            # Parameters for get_3D_model_from_scene:
+            # outdir, silent, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
+            # clean_depth=False, transparent_cams=False, cam_size=0.05, glb_name='scene.glb'
+            # We can make some of these configurable via args if needed later.
+            # For now, use sensible defaults.
+            # The demo.py uses silent=False by default.
+            # min_conf_thr is not in parsed_args, using default 3.
+            # as_pointcloud is not in parsed_args, using default False.
+            model_filename = f"{current_basename}_pct.ply"
+            model_output_path = get_3D_model_from_scene(
+                outdir=args.output_dir,
+                silent=False, # Or True if less verbose output is desired
+                scene=scene,
+                glb_name=model_filename
+                # min_conf_thr, as_pointcloud, etc., will use their defaults
+            )
+            if model_output_path:
+                print(f"Saved 3D model to {model_output_path}")
+            else:
+                print(f"Warning: Could not generate or save 3D model for {current_basename}.")
+        except Exception as e:
+            print(f"Error saving 3D model for {current_basename}: {e}")
+
+        print("Saving RGB and Depth images...")
+        rgb_images = scene.imgs
+        depth_maps_tensor = scene.get_depthmaps()
+        if depth_maps_tensor is None or len(depth_maps_tensor) == 0:
+            print(f"Error: No depth maps found for basename {current_basename}. Cannot save depth images.")
+            continue # Skip to the next basename
+        depth_maps = to_numpy(depth_maps_tensor)
+
+        if len(rgb_images) == 2 and len(depth_maps) == 2:
+            left_rgb_path = os.path.join(args.output_dir, f"{current_basename}_left_rgb.png")
+            right_rgb_path = os.path.join(args.output_dir, f"{current_basename}_right_rgb.png")
+            left_depth_path = os.path.join(args.output_dir, f"{current_basename}_left_depth_colored.png")
+            right_depth_path = os.path.join(args.output_dir, f"{current_basename}_right_depth_colored.png")
+
+            plt.imsave(left_rgb_path, rgb_images[0])
+            print(f"Saved left RGB image to {left_rgb_path}")
+            plt.imsave(right_rgb_path, rgb_images[1])
+            print(f"Saved right RGB image to {right_rgb_path}")
+
+            # Save colored depth maps
+            plt.imsave(left_depth_path, depth_maps[0], cmap='viridis')
+            print(f"Saved left colored depth image to {left_depth_path}")
+            plt.imsave(right_depth_path, depth_maps[1], cmap='viridis')
+            print(f"Saved right colored depth image to {right_depth_path}")
+
+            # Save raw depth maps
+            left_depth_raw_path = os.path.join(args.output_dir, f"{current_basename}_left_depth_raw.png")
+            right_depth_raw_path = os.path.join(args.output_dir, f"{current_basename}_right_depth_raw.png")
+            
+            plt.imsave(left_depth_raw_path, depth_maps[0], cmap='gray')
+            print(f"Saved left raw depth image to {left_depth_raw_path}")
+            plt.imsave(right_depth_raw_path, depth_maps[1], cmap='gray')
+            print(f"Saved right raw depth image to {right_depth_raw_path}")
+            
+            print(f"Images saved successfully for {current_basename}.")
+        else:
+            print(f"Error: Expected 2 RGB images and 2 depth maps for {current_basename}, but found {len(rgb_images)} RGBs and {len(depth_maps)} depths.")
+            # print("RGB images content:", rgb_images) # Potentially large output
+            # print("Depth maps content:", depth_maps) # Potentially large output
+        
+        print(f"--- Finished processing basename: {current_basename} ---")
+
+        
+
+    # Report average inference time
+    if all_inference_times:
+        avg_inference_time = sum(all_inference_times) / len(all_inference_times)
+        print(f"\nAverage inference time over {len(all_inference_times)} pairs: {avg_inference_time:.2f} seconds.")
+    else:
+        print("\nNo inference was performed to calculate an average time.")
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Process image pairs to generate RGB and colored depth map outputs using DUSt3R. \
+                     Accepts one or more specific basenames via --input_pair_basename, \
+                     or scans --image_pair_dir for all pairs if --input_pair_basename is omitted."
+    )
+    
+    model_group = parser.add_mutually_exclusive_group(required=True)
+    model_group.add_argument("--weights", type=str, help="Path to the model weights (.pth file).")
+    model_group.add_argument("--model_name", type=str, help="Name of the model (e.g., 'DUSt3R_ViTLarge_BaseDecoder_512_dpt') for HuggingFace Hub or local cache.")
+
+    parser.add_argument("--input_pair_basename", type=str, nargs='+', default=None, # MODIFIED: nargs='+' for list, still optional
+                        help="Optional. One or more basenames (e.g., 'image_001' 'image_002') of image pairs (_left.png/_right.png). \
+                              If provided, only these pairs will be processed from the --image_pair_dir. \
+                              If omitted, all pairs in --image_pair_dir will be scanned and processed.")
+    parser.add_argument("--image_pair_dir", type=str, required=True, 
+                        help="Directory containing the image pairs. If --input_pair_basename is given, this is where those pairs are located. \
+                              If --input_pair_basename is omitted, this directory will be scanned for all pairs.")
+    
+    parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the output images.")
+    
+    parser.add_argument("--device", type=str, default='cuda', help="PyTorch device to use ('cuda' or 'cpu'). Default: 'cuda'.")
+    parser.add_argument("--image_size", type=int, default=512, choices=[224, 512], help="Image size for processing. Default: 512.")
+    # niter argument is less relevant if PairViewer mode is always used for individual pairs. Kept for consistency.
+    parser.add_argument("--niter", type=int, default=300, help="Number of iterations for global alignment (used by PointCloudOptimizer mode, less relevant for PairViewer mode per pair).")
+    
+    parsed_args = parser.parse_args()
+    
+    if parsed_args.device == 'cuda' and not torch.cuda.is_available():
+        print("CUDA is not available. Switching to CPU.")
+        parsed_args.device = 'cpu'
+
+    # Create output directory once
+    os.makedirs(parsed_args.output_dir, exist_ok=True)
+
+    # Load model once
+    print(f"Loading model... Device: {parsed_args.device}")
+    if parsed_args.weights:
+        model = load_model(parsed_args.weights, parsed_args.device)
+    else: # parsed_args.model_name must be set
+        model = AsymmetricCroCo3DStereo.from_pretrained(parsed_args.model_name, device=parsed_args.device)
+    print("Model loaded.")
+
+    basenames_to_process = []
+    if parsed_args.input_pair_basename:
+        # If specific basenames are provided (it's now a list)
+        basenames_to_process = parsed_args.input_pair_basename
+        print(f"Processing specific basenames from arguments: {basenames_to_process}")
+        # Ensure the image_pair_dir is valid
+        if not os.path.isdir(parsed_args.image_pair_dir):
+            print(f"Error: Image pair directory not found or is not a directory: {parsed_args.image_pair_dir}")
+            exit(1)
+    else:
+        # If no specific basenames are provided, scan the directory for all pairs
+        print(f"No specific input_pair_basename provided. Scanning directory: {parsed_args.image_pair_dir}")
+        image_input_dir = parsed_args.image_pair_dir
+        if not os.path.isdir(image_input_dir):
+            print(f"Error: Image pair directory not found or is not a directory: {image_input_dir}")
+            exit(1) 
+
+        found_basenames_set = set()
+        for filename in os.listdir(image_input_dir):
+            if filename.endswith("_left.png"):
+                basename = filename[:-9] # len("_left.png") == 9
+                # Also check if corresponding _right.png exists
+                right_file_path = os.path.join(image_input_dir, f"{basename}_right.png")
+                if basename and os.path.exists(right_file_path):
+                    found_basenames_set.add(basename)
+            # No need to check _right.png separately if we ensure _left implies _right check
+            # elif filename.endswith("_right.png"):
+            #     basename = filename[:-10] # len("_right.png") == 10
+            #     # Also check if corresponding _left.png exists
+            #     left_file_path = os.path.join(image_input_dir, f"{basename}_left.png")
+            #     if basename and os.path.exists(left_file_path):
+            #         found_basenames_set.add(basename)
+        
+        if not found_basenames_set:
+            print(f"No valid image pairs (e.g., xxx_left.png and xxx_right.png) found in {image_input_dir}")
+            exit(1)
+
+        basenames_to_process = sorted(list(found_basenames_set))
+        print(f"Found {len(basenames_to_process)} unique image pair basenames in '{image_input_dir}': {basenames_to_process}")
+
+    # Call main once with the list of basenames
+    if basenames_to_process:
+        main(model, parsed_args, basenames_to_process)
+        print("\n--- All specified basenames processed. ---")
+    else:
+        print("No basenames were identified for processing.")
diff --git a/metadata/depth_live_1724981057/e9c0f965f4ad4bcf9059f4822c19cd12-0.parquet b/metadata/depth_live_1724981057/e9c0f965f4ad4bcf9059f4822c19cd12-0.parquet
new file mode 100644
index 0000000..f94b7fe
Binary files /dev/null and b/metadata/depth_live_1724981057/e9c0f965f4ad4bcf9059f4822c19cd12-0.parquet differ