diff --git a/.gitignore b/.gitignore index 194e236..ce58235 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ data/ +output/ checkpoints/ # Byte-compiled / optimized / DLL files diff --git a/demo.py b/demo.py index 326c6e5..e6cc45b 100644 --- a/demo.py +++ b/demo.py @@ -37,6 +37,8 @@ else: weights_path = "naver/" + args.model_name model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(args.device) + + print(f"Hosting on {server_name}:{args.server_port}") # dust3r will write the 3D model inside tmpdirname with tempfile.TemporaryDirectory(suffix='dust3r_gradio_demo') as tmpdirname: diff --git a/docker/docker-compose-cuda.yml b/docker/docker-compose-cuda.yml index 85710af..d2fd41a 100644 --- a/docker/docker-compose-cuda.yml +++ b/docker/docker-compose-cuda.yml @@ -9,8 +9,12 @@ services: environment: - DEVICE=cuda - MODEL=${MODEL:-DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth} + - PYTHONPATH=/dust3r # <--- ADD THIS LINE volumes: - - ./files/checkpoints:/dust3r/checkpoints + - ./files/checkpoints:/checkpoints + - ./files/data:/data + - ./files/output:/output + - ../:/dust3r cap_add: - IPC_LOCK - SYS_RESOURCE diff --git a/docker/files/cuda.Dockerfile b/docker/files/cuda.Dockerfile index a1d2edc..473f753 100644 --- a/docker/files/cuda.Dockerfile +++ b/docker/files/cuda.Dockerfile @@ -20,6 +20,8 @@ RUN pip install opencv-python==4.8.0.74 WORKDIR /dust3r/croco/models/curope/ RUN python setup.py build_ext --inplace +RUN pip install boto3 zstandard + WORKDIR /dust3r COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/docker/files/entrypoint.sh b/docker/files/entrypoint.sh index 9637072..bb2f3db 100644 --- a/docker/files/entrypoint.sh +++ b/docker/files/entrypoint.sh @@ -5,4 +5,5 @@ set -eux DEVICE=${DEVICE:-cuda} MODEL=${MODEL:-DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth} -exec python3 demo.py --weights "checkpoints/$MODEL" --device "$DEVICE" --local_network "$@" +# Keep the container running for debugging +tail -f /dev/null diff --git a/docker/run.sh b/docker/run.sh index 6c92036..420252e 100755 --- a/docker/run.sh +++ b/docker/run.sh @@ -35,10 +35,12 @@ set_dcomp() { run_docker() { export MODEL=${model_name} if [ "$with_cuda" -eq 1 ]; then - $dcomp -f docker-compose-cuda.yml up --build + $dcomp -f docker-compose-cuda.yml up --build -d else - $dcomp -f docker-compose-cpu.yml up --build + $dcomp -f docker-compose-cpu.yml up --build -d fi + echo "Docker container started in detached mode." + echo "To attach to the container, run: $dcomp exec dust3r-demo /bin/bash" } with_cuda=0 diff --git a/dust3r/analysis_scale.py b/dust3r/analysis_scale.py new file mode 100644 index 0000000..2bab52b --- /dev/null +++ b/dust3r/analysis_scale.py @@ -0,0 +1,67 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import argparse + +def plot_scale_statistics(results_path): + """Plot scale statistics from benchmark results. + + Args: + results_path: Path to the CSV file containing benchmark results + """ + # Read the results + df = pd.read_csv(results_path) + + # Create figure with subplots + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10)) + + # Plot 1: Scale factors over items + ax1.plot(df['item_id'], df['optimal_scale'], 'b.', alpha=0.5, label='Scale factors') + ax1.axhline(y=df['optimal_scale'].mean(), color='r', linestyle='--', label=f'Mean: {df["optimal_scale"].mean():.2f}') + ax1.set_xlabel('Item ID') + ax1.set_ylabel('Optimal Scale') + ax1.set_title('Scale Factors Distribution') + ax1.legend() + ax1.grid(True) + + # Plot 2: Histogram of scale factors + ax2.hist(df['optimal_scale'], bins=30, alpha=0.7, color='b') + ax2.axvline(x=df['optimal_scale'].mean(), color='r', linestyle='--', + label=f'Mean: {df["optimal_scale"].mean():.2f}') + ax2.axvline(x=df['optimal_scale'].median(), color='g', linestyle='--', + label=f'Median: {df["optimal_scale"].median():.2f}') + ax2.set_xlabel('Scale Factor') + ax2.set_ylabel('Frequency') + ax2.set_title('Histogram of Scale Factors') + ax2.legend() + ax2.grid(True) + + plt.tight_layout() + output_path = os.path.join(os.path.dirname(results_path), 'scale_statistics.png') + plt.savefig(output_path) + plt.close() + + # Print statistics + print("\nScale Factor Statistics:") + print(f"Mean: {df['optimal_scale'].mean():.3f}") + print(f"Median: {df['optimal_scale'].median():.3f}") + print(f"Std: {df['optimal_scale'].std():.3f}") + print(f"Min: {df['optimal_scale'].min():.3f}") + print(f"Max: {df['optimal_scale'].max():.3f}") + print(f"25th percentile: {df['optimal_scale'].quantile(0.25):.3f}") + print(f"75th percentile: {df['optimal_scale'].quantile(0.75):.3f}") + +def main(): + parser = argparse.ArgumentParser(description="Analyze scale factors from DUSt3R benchmark results.") + parser.add_argument('--results', type=str, required=True, help='Path to the benchmark results CSV file.') + + args = parser.parse_args() + + if not os.path.exists(args.results): + print(f"Results file not found at {args.results}") + return + + plot_scale_statistics(args.results) + +if __name__ == "__main__": + main() diff --git a/dust3r/benchmark_ts_depth.py b/dust3r/benchmark_ts_depth.py new file mode 100644 index 0000000..2c34d88 --- /dev/null +++ b/dust3r/benchmark_ts_depth.py @@ -0,0 +1,588 @@ +import os,sys +code_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(f'{code_dir}/../') +import pandas as pd +from dataclasses import dataclass +import json +from typing import List, Dict +import os +import argparse +import torch +import logging +import time +import random +import numpy as np +import matplotlib.pyplot as plt +import imageio +from PIL import Image + +# dust3r imports +from dust3r.model import AsymmetricCroCo3DStereo, load_model as load_dust3r_model +from dust3r.inference import inference +from dust3r.image_pairs import make_pairs +from dust3r.utils.device import to_numpy +from dust3r.cloud_opt import global_aligner, GlobalAlignerMode +from dust3r.utils.image import load_images, ImgNorm +from dust3r.deserialize_depth_dataset import Boto3ResourceManager, deserialize_and_download_image, deserialize_and_download_tensor +from dust3r.demo import get_3D_model_from_scene + + +def load_model(args): + """Loads the DUSt3R model.""" + if args.weights: + model = load_dust3r_model(args.weights, args.device) + elif args.model_name: + model = AsymmetricCroCo3DStereo.from_pretrained(args.model_name).to(args.device) + else: + raise ValueError("Either --model_name or --weights must be provided.") + logging.info(f"Loaded DUSt3R model on {args.device}") + model.eval() + return model + + +@dataclass +class DepthData: + dataset_creator: str + camera_names: List[str] + item_id: int + split: str + image_paths: Dict[str, str] + depth_map_paths: Dict[str, str] + normal_map_paths: Dict[str, str] + visible_mask_paths: Dict[str, str] + world_from_camera_transforms_path: str + camera_intrinsics_path: str + + @classmethod + def from_row(cls, row): + return cls( + dataset_creator=row[0], + camera_names=list(row[1]), + item_id=row[2], + split=row[3], + image_paths=json.loads(row[4]), + depth_map_paths=json.loads(row[5]), + normal_map_paths=json.loads(row[6]), + visible_mask_paths=json.loads(row[7]), + world_from_camera_transforms_path=row[8], + camera_intrinsics_path=row[9], + ) + + +def deserialize_data(data: DepthData, resource_manager: Boto3ResourceManager, args): + """Deserialize all the data we need for a single benchmark item.""" + camera_ids = list(data.image_paths.keys()) + if len(camera_ids) < 2: + raise ValueError( + f"Need at least two images for inference, but got {len(camera_ids)}.") + + cam1_id, cam2_id = random.sample(camera_ids, 2) + logging.info(f"Randomly selected cameras: {cam1_id}, {cam2_id}") + + # It's conventional to use bit_depth=8 for RGB images. + print(f"data.image_paths[cam1_id]: {data.image_paths[cam1_id]}") + print(f"data.image_paths[cam2_id]: {data.image_paths[cam2_id]}") + img1 = deserialize_and_download_image( + data.image_paths[cam1_id], bit_depth=8, resource_manager=resource_manager, dtype=torch.float32) * 255 + img2 = deserialize_and_download_image( + data.image_paths[cam2_id], bit_depth=8, resource_manager=resource_manager, dtype=torch.float32) * 255 + img1 = img1.cuda() + img2 = img2.cuda() + + + # With imageio + + # img1 = imageio.imread("data/20250611171250_left.png") + # img2 = imageio.imread("data/20250611171250_right.png") + # if img1.shape[-1] == 4: + # img1 = img1[..., :3] + # if img2.shape[-1] == 4: + # img2 = img2[..., :3] + # img1 = torch.as_tensor(img1).cuda().float().permute(2, 0, 1) + # img2 = torch.as_tensor(img2).cuda().float().permute(2, 0, 1) + # print(f"After img1.shape: {img1.shape} {img1.dtype=} {img1.min()=} {img1.max()=}") + # print(f"After img2.shape: {img2.shape} {img2.dtype=} {img2.min()=} {img2.max()=}") + + + depth_gt = deserialize_and_download_tensor( + data.depth_map_paths[cam1_id], resource_manager=resource_manager) + depth_gt = depth_gt + print(f"GT depth image max : {depth_gt.max()}, min: {depth_gt.min()}") + print(f"img1.shape before crop: {img1.shape}") + + all_intrinsics = deserialize_and_download_tensor( + data.camera_intrinsics_path, resource_manager=resource_manager) + + cam1_idx = data.camera_names.index(cam1_id) + intrinsics = all_intrinsics[cam1_idx] + + all_world_from_camera_transforms = deserialize_and_download_tensor( + data.world_from_camera_transforms_path, resource_manager=resource_manager) + cam2_idx = data.camera_names.index(cam2_id) + + transform1 = all_world_from_camera_transforms[cam1_idx] + transform2 = all_world_from_camera_transforms[cam2_idx] + + + # The translation vector is the last column of the 4x4 matrix + t1 = transform1[:3, 3] + t2 = transform2[:3, 3] + + gt_baseline = torch.linalg.norm(t1 - t2).item() + refactored_intrinsics = intrinsics.clone() + print(f"Original intrinsics: \n{intrinsics}") + print(f"GT Baseline: {gt_baseline}") + C, H, W = img1.shape[-3:] + # dust3r works well with smaller images, let's not crop to a large size + # target_h, target_w = 1200, 1600 + + # if H > target_h: + # y_offset = (H - target_h) // 2 + # img1 = img1[..., y_offset:y_offset + target_h, :] + # img2 = img2[..., y_offset:y_offset + target_h, :] + # depth_gt = depth_gt[..., y_offset:y_offset + target_h, :] + # # adjust intrinsics. cy is usually intrinsics[..., 1, 2] + # refactored_intrinsics[..., 1, 2] -= y_offset + + # if W > target_w: + # x_offset = (W - target_w) // 2 + # img1 = img1[..., :, x_offset:x_offset + target_w] + # img2 = img2[..., :, x_offset:x_offset + target_w] + # depth_gt = depth_gt[..., :, x_offset:x_offset + target_w] + # # adjust intrinsics. cx is usually intrinsics[..., 0, 2] + # refactored_intrinsics[..., 0, 2] -= x_offset + + + # The model expects a batch dimension + return img1[None], img2[None], refactored_intrinsics, depth_gt, gt_baseline, transform1, transform2 + + +def _resize_image(image_data, size): + """Helper to resize image and adjust focals, inspired by dust3r.utils.image""" + rgb = image_data['rgb'] + old_h, old_w = rgb.shape[:2] + + if isinstance(size, int): + new_w, new_h = size, size + else: + new_w, new_h = size + + pil_img = Image.fromarray(rgb) + pil_img_resized = pil_img.resize((new_w, new_h), Image.Resampling.LANCZOS) + + resized_rgb = np.array(pil_img_resized) + + fx, fy = image_data['focals'] + new_fx = fx * new_w / old_w + new_fy = fy * new_h / old_h + + return {'rgb': resized_rgb, 'focals': (new_fx, new_fy), 'path': image_data['path']} + + +def prepare_image_for_dust3r(img_tensor, size, idx=0): + """Prepare image tensor for dust3r input format. + + Args: + img_tensor: Input tensor (1, C, H, W) + size: Target size for resizing + idx: Index of the image in the sequence + + Returns: + dict: Image data in dust3r format with img, true_shape, idx, and instance + """ + # Convert to numpy and permute to HWC + img_np = img_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy().astype(np.uint8) + + # Convert to PIL Image for resizing + pil_img = Image.fromarray(img_np) + W1, H1 = pil_img.size + + # Resize according to dust3r's logic + if size == 224: + # resize short side to 224 (then crop) + pil_img = _resize_pil_image(pil_img, round(size * max(W1/H1, H1/W1))) + else: + # resize long side to 512 + pil_img = _resize_pil_image(pil_img, size) + + # Center crop + W, H = pil_img.size + cx, cy = W//2, H//2 + if size == 224: + half = min(cx, cy) + pil_img = pil_img.crop((cx-half, cy-half, cx+half, cy+half)) + else: + halfw, halfh = ((2*cx)//16)*8, ((2*cy)//16)*8 + if W == H: # if square + halfh = 3*halfw/4 + pil_img = pil_img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh)) + + # Convert to dust3r format + img_norm = ImgNorm(pil_img) + return { + 'img': img_norm[None], # Remove [None] as it's handled by collate_with_cat + 'true_shape': np.int32([pil_img.size[::-1]]), + 'idx': idx, + 'instance': str(idx) + } + + +def _resize_pil_image(img, size): + """Resize PIL image maintaining aspect ratio.""" + W, H = img.size + if W > H: + new_W = size + new_H = int(H * size / W) + else: + new_H = size + new_W = int(W * size / H) + return img.resize((new_W, new_H), Image.Resampling.LANCZOS) + + +def find_optimal_scale(pred_depth, gt_depth): + """Find the optimal scale factor between predicted and ground truth depth. + + Args: + pred_depth: Predicted depth map (numpy array or torch tensor) + gt_depth: Ground truth depth map (numpy array or torch tensor) + + Returns: + scale: Optimal scale factor + error: Mean absolute error after scaling + """ + # Convert to numpy if needed + if torch.is_tensor(pred_depth): + pred_depth = pred_depth.cpu().numpy() + if torch.is_tensor(gt_depth): + gt_depth = gt_depth.cpu().numpy() + + # Remove invalid values + valid_mask = (gt_depth > 0) & np.isfinite(gt_depth) & np.isfinite(pred_depth) + if not np.any(valid_mask): + return 1.0, float('inf') + + # Compute scale using median ratio + ratios = gt_depth[valid_mask] / (pred_depth[valid_mask] + 1e-6) + scale = np.median(ratios) + + # Compute error after scaling + scaled_pred = pred_depth * scale + error = np.mean(np.abs(scaled_pred[valid_mask] - gt_depth[valid_mask])) + + return scale, error + + +def run_dust3r_inference(model, img1, img2, intrinsics, args, gt_pose1=None, gt_pose2=None, niter=300, schedule='cosine', lr=0.01): + """Runs inference on a pair of image tensors using DUSt3R. + + Args: + model: The DUSt3R model. + img1: Left image tensor (1, C, H, W). + img2: Right image tensor (1, C, H, W). + intrinsics: Camera intrinsics tensor. + args: Command-line arguments. + gt_pose1: Ground truth pose for first camera (4x4 matrix) + gt_pose2: Ground truth pose for second camera (4x4 matrix) + + Returns: + pred_depth: The predicted depth map (H, W). + inference_time: The time taken for inference. + pred_baseline: The predicted baseline between cameras + """ + # Prepare images for dust3r + print(f"img1.shape: {img1.shape}") + img1_data = prepare_image_for_dust3r(img1, args.image_size, idx=0) + img2_data = prepare_image_for_dust3r(img2, args.image_size, idx=1) + # print(f"img1_data: {img1_data}") + + print(f"img1_data: {img1_data['img'].shape}") + + # Calculate scale factor from image shapes + original_h, original_w = img1.shape[2:] # Get H, W from (1, C, H, W) + resized_h, resized_w = img1_data['img'].shape[2:] # Get H, W from resized image + scale = min(original_h/resized_h, original_w/resized_w) + print(f"Scale factor: {scale}") + # Get focal lengths from intrinsics + focals = (intrinsics[0, 0].item()/scale, intrinsics[1, 1].item()/scale) + + gt_poses = [gt_pose1, gt_pose2] + + # Create list of images in dust3r format + loaded_imgs = [img1_data, img2_data] + pairs = make_pairs(loaded_imgs, prefilter=None, symmetrize=True) + + start_time = time.time() + with torch.cuda.amp.autocast(True): + output = inference(pairs, model, args.device, batch_size=1) + inference_time = time.time() - start_time + + # Enable gradients for optimization + torch.autograd.set_grad_enabled(True) + + scene = global_aligner(output, device=args.device, mode=GlobalAlignerMode.ModularPointCloudOptimizer, optimize_pp=True) + + scene.preset_pose([pose for pose in gt_poses], [True, True]) + scene.preset_focal([focals[0], focals[1]], [True, True]) + loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr) + print(f"Focals: {scene.get_focals()}") + # Disable gradients after optimization + torch.autograd.set_grad_enabled(False) + + depth_maps = to_numpy(scene.get_depthmaps()) + pred_depth = depth_maps[0] # Depth for the first image + + # Get predicted camera poses + pred_poses = scene.get_im_poses() + pred_pose1 = pred_poses[0] # First camera pose + pred_pose2 = pred_poses[1] # Second camera pose + + # Print predicted camera poses + # print("\nPredicted Camera Poses:") + # print("Camera 1 (Reference):") + # print(pred_pose1) + # print("\nCamera 2:") + # print(pred_pose2) + + # Calculate predicted baseline from camera poses + pred_t1 = pred_pose1[:3, 3] # Translation vector of first camera + pred_t2 = pred_pose2[:3, 3] # Translation vector of second camera + pred_baseline = np.linalg.norm(pred_t2.cpu().numpy() - pred_t1.cpu().numpy()) + print(f"Predicted Baseline: {pred_baseline}") + + return pred_depth, inference_time, pred_baseline, scene + + +def compare_and_visualize(img1, pred_depth, depth_gt, item_id, out_dir): + """ + Creates a 2x2 plot comparing predicted depth with ground truth. + Saves the plot to a file. + """ + if isinstance(img1, torch.Tensor): + img1 = img1.squeeze(0).permute(1, 2, 0).cpu().numpy().astype(np.uint8) + if isinstance(depth_gt, torch.Tensor): + depth_gt = depth_gt.cpu().numpy() + + + # Squeeze out channel dimension if it exists + if depth_gt.ndim == 3 and depth_gt.shape[0] == 1: + depth_gt = np.squeeze(depth_gt, axis=0) + + # Handle invalid values in predicted depth + pred_depth = np.nan_to_num(pred_depth, nan=0.0, posinf=0.0, neginf=0.0) + + depth_diff = np.abs(pred_depth - depth_gt) + + fig, axes = plt.subplots(3, 2, figsize=(16, 18)) + fig.suptitle(f"Item ID: {item_id}") + + im = axes[0, 0].imshow(img1) + axes[0, 0].set_title("Original Image") + axes[0, 0].axis('off') + + im = axes[0, 1].imshow(depth_diff, cmap='hot', vmin=0, vmax=0.5) + axes[0, 1].set_title("Depth Difference (abs)") + axes[0, 1].axis('off') + fig.colorbar(im, ax=axes[0, 1]) + + # Determine shared color range for depth plots + valid_pred_depth = pred_depth[np.isfinite(pred_depth)] + valid_depth_gt = depth_gt[np.isfinite(depth_gt)] + + vmin, vmax = None, None + if valid_pred_depth.size > 0 and valid_depth_gt.size > 0: + vmin = min(np.min(valid_pred_depth), np.min(valid_depth_gt)) + vmax = max(np.max(valid_pred_depth), np.max(valid_depth_gt)) + print(f"Using visualization range: vmin={vmin}, vmax={vmax}") + else: + logging.warning(f"Could not determine a valid color range for item {item_id}. Using separate color bars.") + print("Warning: Could not determine valid color range") + + im = axes[1, 0].imshow(depth_gt, cmap='viridis', vmin=vmin, vmax=vmax) + axes[1, 0].set_title("Ground Truth Depth") + axes[1, 0].axis('off') + fig.colorbar(im, ax=axes[1, 0]) + + im = axes[1, 1].imshow(pred_depth, cmap='viridis', vmin=vmin, vmax=vmax) + axes[1, 1].set_title("Predicted Depth") + axes[1, 1].axis('off') + fig.colorbar(im, ax=axes[1, 1]) + + # Add side-by-side histograms + valid_pred_flat = valid_pred_depth.flatten() + valid_gt_flat = valid_depth_gt.flatten() + + if valid_pred_flat.size > 0 and valid_gt_flat.size > 0: + all_valid_depths = np.concatenate([valid_pred_flat, valid_gt_flat]) + # Use percentiles to avoid extreme outliers skewing the histogram range + bins = np.linspace(np.percentile(all_valid_depths, 1), np.percentile(all_valid_depths, 99), 100) + + axes[2, 0].hist(valid_gt_flat, bins=bins, color='blue', alpha=0.7) + axes[2, 0].set_title("Ground Truth Depth Histogram") + axes[2, 0].set_xlabel("Depth") + axes[2, 0].set_ylabel("Frequency") + + axes[2, 1].hist(valid_pred_flat, bins=bins, color='green', alpha=0.7) + axes[2, 1].set_title("Predicted Depth Histogram") + axes[2, 1].set_xlabel("Depth") + axes[2, 1].sharey(axes[2, 0]) # Share y-axis for better comparison + else: + axes[2, 0].text(0.5, 0.5, "No valid GT data for histogram", ha='center', va='center') + axes[2, 0].axis('off') + axes[2, 1].text(0.5, 0.5, "No valid Pred data for histogram", ha='center', va='center') + axes[2, 1].axis('off') + + plt.tight_layout() + output_path = os.path.join(out_dir, f"{item_id}_depth_comparison.png") + plt.savefig(output_path) + logging.info(f"Saved comparison plot to {output_path}") + plt.close(fig) + + +def main(args): + model = load_model(args) + resource_manager = Boto3ResourceManager() + + # Initialize DataFrame to store results + results_df = pd.DataFrame(columns=[ + 'item_id', 'mean_error', + 'gt_min', 'gt_max', 'gt_mean', + 'pred_min', 'pred_max', 'pred_mean', + 'inference_time' + ]) + + def data_fn(): + df = pd.read_parquet(args.meta_data_path) + # Set random seed for reproducibility + np.random.seed(1) + random.seed(1) + torch.manual_seed(1) + for i in range(len(df)): + idx = np.random.randint(0, len(df)) + yield DepthData.from_row(df.iloc[idx]) + + processed_count = 0 + for data in data_fn(): + try: + if args.limit_num is not None and processed_count >= args.limit_num: + break + + logging.info(f"Processing item {data.item_id}") + try: + img1, img2, intrinsics, depth_gt, gt_baseline, gt_pose1, gt_pose2 = deserialize_data( + data, resource_manager, args) + except ValueError as e: + logging.warning(f"Skipping item {data.item_id} due to: {e}") + continue + + pred_depth_low_res, inference_time, _, scene = run_dust3r_inference( + model, img1, img2, intrinsics, args, gt_pose1, gt_pose2) + + # Save 3D model as PLY + # print(f"Saving 3D model for item {data.item_id}...") + # try: + # model_filename = f"{data.item_id}_model.ply" + # model_output_path = get_3D_model_from_scene( + # outdir=args.out_dir, + # silent=False, + # scene=scene, + # glb_name=model_filename + # ) + # if model_output_path: + # print(f"Saved 3D model to {model_output_path}") + # else: + # print(f"Warning: Could not generate or save 3D model for item {data.item_id}.") + # except Exception as e: + # print(f"Error saving 3D model for item {data.item_id}: {e}") + + # Resize predicted depth to match ground truth depth resolution + H, W = img1.shape[2:] + pred_depth_tensor = torch.from_numpy(pred_depth_low_res).unsqueeze(0).unsqueeze(0) + pred_depth_resized = torch.nn.functional.interpolate(pred_depth_tensor, size=(H, W), mode='bilinear', align_corners=False) + pred_depth = pred_depth_resized.squeeze().cpu().numpy() + + print(f"Pred depth min: {pred_depth.min()}, max: {pred_depth.max()}, mean: {pred_depth.mean()}") + print(f"Pred depth shape: {pred_depth.shape}") + + # Record statistics + stats = { + 'item_id': data.item_id, + 'mean_error': np.mean(np.abs(pred_depth - depth_gt.cpu().numpy())), + 'gt_min': depth_gt.min(), + 'gt_max': depth_gt.max(), + 'gt_mean': depth_gt.mean(), + 'pred_min': pred_depth.min(), + 'pred_max': pred_depth.max(), + 'pred_mean': pred_depth.mean(), + 'inference_time': inference_time + } + results_df = pd.concat([results_df, pd.DataFrame([stats])], ignore_index=True) + + print(f"\nDepth Statistics for item {data.item_id}:") + print(f"Ground Truth - min: {depth_gt.min():.3f}, max: {depth_gt.max():.3f}, mean: {depth_gt.mean():.3f}") + print(f"Predicted - min: {pred_depth.min():.3f}, max: {pred_depth.max():.3f}, mean: {pred_depth.mean():.3f}") + print(f"Mean absolute error: {stats['mean_error']:.3f}") + + if isinstance(pred_depth, torch.Tensor): + pred_depth = pred_depth.cpu().numpy() + + compare_and_visualize(img1, pred_depth, depth_gt, data.item_id, args.out_dir) + + logging.info( + f"Inference time: {inference_time:.4f}s, Predicted depth map shape: {pred_depth.shape}") + + processed_count += 1 + + # Save results every 10 scenes + if processed_count % 10 == 0: + results_path = os.path.join(args.out_dir, 'depth_benchmark_results.csv') + results_df.to_csv(results_path, index=False) + logging.info(f"Saved results to {results_path}") + except Exception as err: + print(f"error handling {data.item_id}") + continue + + + + # Save final results + results_path = os.path.join(args.out_dir, 'depth_benchmark_results.csv') + results_df.to_csv(results_path, index=False) + logging.info(f"Saved final results to {results_path}") + + # Print summary statistics + print("\nSummary Statistics:") + print(f"Total scenes processed: {processed_count}") + print(f"Mean error: {results_df['mean_error'].mean():.3f} ± {results_df['mean_error'].std():.3f}") + print(f"Mean inference time: {results_df['inference_time'].mean():.3f}s ± {results_df['inference_time'].std():.3f}s") + + +if __name__ == "__main__": + code_dir = os.path.dirname(os.path.realpath(__file__)) + parser = argparse.ArgumentParser(description="Run DUSt3R depth estimation and compare with ground truth.") + + # Model arguments + model_group = parser.add_mutually_exclusive_group(required=True) + model_group.add_argument("--weights", type=str, help="Path to DUSt3R model weights (.pth file).") + model_group.add_argument("--model_name", type=str, default="DUSt3R_ViTLarge_BaseDecoder_512_dpt", help="Name of the model from HuggingFace Hub (e.g., 'DUSt3R_ViTLarge_BaseDecoder_512_dpt').") + + # Data arguments + parser.add_argument('--meta_data_path', default="metadata/depth_live_1724981057", type=str, help='Path to metadata parquet file.') + + # Output arguments + parser.add_argument('--out_dir', default=f'{code_dir}/../output/dust3r_benchmark/', type=str, help='The directory to save results.') + + # Inference arguments + parser.add_argument("--device", type=str, default='cuda', help="PyTorch device to use ('cuda' or 'cpu').") + parser.add_argument("--image_size", type=int, default=512, choices=[224, 512], help="Image size for DUSt3R processing. Default: 512.") + parser.add_argument("--limit-num", type=int, help="Limit the number of items to process. If not set, process all items.") + + args = parser.parse_args() + + if args.device == 'cuda' and not torch.cuda.is_available(): + print("CUDA is not available. Switching to CPU.") + args.device = 'cpu' + + print("Starting DUSt3R depth benchmark...") + torch.autograd.set_grad_enabled(False) + os.makedirs(args.out_dir, exist_ok=True) + + main(args) \ No newline at end of file diff --git a/dust3r/demo.py b/dust3r/demo.py index c491be0..3b4c9cc 100644 --- a/dust3r/demo.py +++ b/dust3r/demo.py @@ -65,7 +65,7 @@ def print_with_timestamp(*args, **kwargs): def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05, cam_color=None, as_pointcloud=False, - transparent_cams=False, silent=False): + transparent_cams=False, silent=False, glb_name='scene.glb'): assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals) pts3d = to_numpy(pts3d) imgs = to_numpy(imgs) @@ -79,7 +79,9 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, pts = np.concatenate([p[m] for p, m in zip(pts3d, mask)]) col = np.concatenate([p[m] for p, m in zip(imgs, mask)]) pct = trimesh.PointCloud(pts.reshape(-1, 3), colors=col.reshape(-1, 3)) + scene.add_geometry(pct) + print(f"Added point cloud to scene. Number of points: {len(pts)}") else: meshes = [] for i in range(len(imgs)): @@ -100,15 +102,18 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, rot = np.eye(4) rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix() scene.apply_transform(np.linalg.inv(cams2world[0] @ OPENGL @ rot)) - outfile = os.path.join(outdir, 'scene.glb') + pct.apply_transform(np.linalg.inv(cams2world[0] @ OPENGL @ rot)) + combined_mesh = trimesh.util.concatenate([mesh for mesh in scene.geometry.values()]) + outfile = os.path.join(outdir, glb_name) if not silent: print('(exporting 3D scene to', outfile, ')') - scene.export(file_obj=outfile) + # combined_mesh.export(file_obj=outfile) + pct.export(file_obj=outfile) return outfile -def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False, - clean_depth=False, transparent_cams=False, cam_size=0.05): +def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud=True, mask_sky=False, + clean_depth=False, transparent_cams=False, cam_size=0.05, glb_name='scene.glb'): """ extract 3D_model (glb file) from a reconstructed scene """ @@ -129,7 +134,7 @@ def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud scene.min_conf_thr = float(scene.conf_trf(torch.tensor(min_conf_thr))) msk = to_numpy(scene.get_masks()) return _convert_scene_output_to_glb(outdir, rgbimg, pts3d, msk, focals, cams2world, as_pointcloud=as_pointcloud, - transparent_cams=transparent_cams, cam_size=cam_size, silent=silent) + transparent_cams=transparent_cams, cam_size=cam_size, silent=silent, glb_name=glb_name) def get_reconstructed_scene(outdir, model, device, silent, image_size, filelist, schedule, niter, min_conf_thr, @@ -280,4 +285,4 @@ def main_demo(tmpdirname, model, device, image_size, server_name, server_port, s inputs=[scene, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size], outputs=outmodel) - demo.launch(share=False, server_name=server_name, server_port=server_port) + demo.launch(share=True, server_name=server_name, server_port=server_port) diff --git a/dust3r/deserialize_depth_dataset.py b/dust3r/deserialize_depth_dataset.py new file mode 100644 index 0000000..141ce4b --- /dev/null +++ b/dust3r/deserialize_depth_dataset.py @@ -0,0 +1,113 @@ +import abc +import boto3 +from urllib.parse import urlparse +import cv2 +import numpy as np +import torch +from io import BytesIO +from zstandard import ZstdCompressor, ZstdDecompressor + + +class AbstractResourceManager(abc.ABC): + @abc.abstractmethod + def get(self, uri: str) -> bytes: + raise NotImplementedError + + +class Boto3ResourceManager(AbstractResourceManager): + def __init__(self): + self.s3_client = boto3.client("s3") + + def get(self, s3_uri: str) -> bytes: + parsed_uri = urlparse(s3_uri) + if parsed_uri.scheme != "s3": + raise ValueError(f"URI scheme must be s3, not {parsed_uri.scheme}") + bucket = parsed_uri.netloc + key = parsed_uri.path.lstrip("/") + response = self.s3_client.get_object(Bucket=bucket, Key=key) + return response["Body"].read() + + +def unpack_bytes_np(compressed_bytes: bytes): + return np.load(BytesIO(compressed_bytes), allow_pickle=True) + +def zstd_decompress_bytes(compressed_bytes: bytes) -> bytes: + return ZstdDecompressor().decompress(compressed_bytes) + +def zstd_decompress_arr(compressed_bytes: bytes) -> np.ndarray: + return unpack_bytes_np(zstd_decompress_bytes(compressed_bytes)) + +def deserialize_and_download_image( + s3_uri: str, bit_depth: int, resource_manager: AbstractResourceManager, dtype: torch.dtype +) -> torch.Tensor: + """Shared utility for DeserializedObjectView and DeserializedImage. + + Look at those class docstrings for more information. + """ + image_bytes = resource_manager.get(s3_uri) + if bit_depth == 8: + image_np = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR) + image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) + elif bit_depth > 8 and bit_depth <= 16: + # note that torch half starts losing precision for bit_depth > 11; it becomes a choice for the user to + # tradeoff loading speed vs precision. For bit_depth=12, the max error is 1px (out of 4096 slots). + if dtype not in {torch.float, torch.half}: + raise ValueError(f"dtype must be torch.float or torch.half if bit_depth > 8, not {dtype}") + image_np = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) + image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) + # have to convert to float16 or float32 first, since np.uint16 is not supported by pytorch + dtype_np = np.float16 if dtype == torch.half else np.float32 + image_np = image_np.astype(dtype_np) + else: + raise ValueError(f"bit_depth must be in the range [8, 16], not {bit_depth}!") + + image = torch.from_numpy(image_np).permute(2, 0, 1).to(dtype) + if dtype in {torch.float, torch.half}: + image = image / (2**bit_depth - 1) + + return image + +def deserialize_and_download_tensor(s3_uri: str, resource_manager: AbstractResourceManager) -> torch.Tensor: + """Deserialize and download a tensor from S3. + + Parameters + ---------- + s3_uri : str + The S3 URI of the tensor. + resource_manager : ResourceManager + + Returns + ------- + torch.Tensor + """ + tensor_bytes = resource_manager.get(s3_uri) + tensor_np = zstd_decompress_arr(tensor_bytes) + tensor = torch.from_numpy(tensor_np) + return tensor + + +def test_deserialize_and_download_image(): + test_image_file = "s3://covariant-annotation-pipeline/resource_root/sim_scene_annotations/images-camera_array_01/409a/409ad6ba22b2cb129609ecbd52e5446e5f90d9920563103763e558eb576ffcf5.png" + resource_manager = Boto3ResourceManager() + tensor = deserialize_and_download_image(test_image_file, bit_depth=8, resource_manager=resource_manager, dtype=torch.float32) + tensor = (tensor * 255).to(torch.uint8) + cv2.imwrite("test_image.png", tensor.permute(1, 2, 0).cpu().numpy()) + +def test_deserialize_and_download_tensor(): + test_tensor_file = "s3://covariant-annotation-pipeline/resource_root/sim_scene_annotations/depth_maps-camera_array_01/fc64/fc64581dc26ef911ed77bb674b8736749351c832a5ded9d407d812da733304e9.blob" + resource_manager = Boto3ResourceManager() + tensor = deserialize_and_download_tensor(test_tensor_file, resource_manager) + # Scale depth values to 0-255 range for visualization + depth_min = tensor.min() + depth_max = tensor.max() + depth_normalized = ((tensor - depth_min) / (depth_max - depth_min) * 255).to(torch.uint8) + + # Save as grayscale image + depth_image = depth_normalized.cpu().numpy() + cv2.imwrite("test_depth.png", depth_image) + print(f"Saved depth image with range [{depth_min:.2f}, {depth_max:.2f}]") + + +if __name__ == "__main__": + test_deserialize_and_download_tensor() + diff --git a/dust3r/test.py b/dust3r/test.py new file mode 100644 index 0000000..744118d --- /dev/null +++ b/dust3r/test.py @@ -0,0 +1,260 @@ +import argparse +import os +import torch +import numpy as np +import copy +import glob +import json +from scipy.spatial.transform import Rotation +import matplotlib.pyplot as plt +import time + +# DUSt3R imports +from dust3r.model import load_model, AsymmetricCroCo3DStereo +from dust3r.inference import inference +from dust3r.image_pairs import make_pairs +from dust3r.utils.image import load_images +from dust3r.utils.device import to_numpy +from dust3r.cloud_opt import global_aligner, GlobalAlignerMode +from dust3r.demo import get_3D_model_from_scene + +# MODIFIED: main now takes model and a list of basenames +def main(model, args, basenames_list): + + if not basenames_list: + print("No image pair basenames to process.") + return + + all_inference_times = [] + + for current_basename in basenames_list: + print(f"\n--- Processing basename: {current_basename} ---") + + # Specific pair mode is now the only mode + # base_name = current_basename # This was the old parameter, now using current_basename directly + pair_dir = args.image_pair_dir + left_path = os.path.join(pair_dir, f"{current_basename}_left.png") + right_path = os.path.join(pair_dir, f"{current_basename}_right.png") + + print(f"Processing specific pair: \n Left: {left_path}\n Right: {right_path}") + + if not os.path.exists(left_path) or not os.path.exists(right_path): + print(f"Error: One or both images for the pair not found. Searched for:\n {left_path}\n {right_path}") + print("Please ensure both files exist and the paths are correct. Skipping this pair.") + continue # Skip to the next basename + + # Model is already loaded and passed as an argument + # os.makedirs(args.output_dir, exist_ok=True) # Output dir created once in __main__ + + loaded_imgs_all = load_images([left_path, right_path], size=args.image_size, verbose=True) + print(f"loaded_imgs_all: {loaded_imgs_all[0]['img'].shape} {loaded_imgs_all[0]['img'].dtype}") + pairs = make_pairs(loaded_imgs_all, prefilter=None, symmetrize=True) + + print(f"Starting inference for {current_basename}...") + start_time = time.time() + output = inference(pairs, model, args.device, batch_size=1, verbose=True) + end_time = time.time() + inference_duration = end_time - start_time + all_inference_times.append(inference_duration) + print(f"Inference for {current_basename} took {inference_duration:.2f} seconds.") + + print("Performing global alignment...") + # For a single pair, PairViewer mode is appropriate. + # If multiple pairs were processed for a single scene, PointCloudOptimizer might be used, + # but here each pair is processed independently. + mode = GlobalAlignerMode.PairViewer + scene = global_aligner(output, device=args.device, mode=mode, verbose=True) + + # Global alignment optimization is typically for >2 images. + # Since we process pairs independently, full optimization per pair might be much. + # The original logic for niter was conditional on len(loaded_imgs_all) > 2 + # which for a single pair is false. The demo.py uses PairViewer for 2 images. + # If optimization for each pair is desired, it can be added here. + # For now, sticking to PairViewer for individual pair processing. + + # Save camera parameters (intrinsics and poses) + if scene.get_intrinsics() is not None and scene.get_im_poses() is not None: + intrinsics_list = to_numpy(scene.get_intrinsics()).tolist() + im_poses_list = to_numpy(scene.get_im_poses()).tolist() + + camera_params = { + "intrinsics": intrinsics_list, + "im_poses": im_poses_list + } + + json_output_path = os.path.join(args.output_dir, f"{current_basename}_camera_parameters.json") + try: + with open(json_output_path, 'w') as f: + json.dump(camera_params, f, indent=4) + print(f"Saved camera parameters to {json_output_path}") + except Exception as e: + print(f"Error saving camera parameters for {current_basename} to {json_output_path}: {e}") + else: + print(f"Warning: Could not retrieve intrinsics or poses for {current_basename}. Skipping camera parameter saving.") + + # Save 3D model + print(f"Saving 3D model for {current_basename}...") + try: + # Parameters for get_3D_model_from_scene: + # outdir, silent, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False, + # clean_depth=False, transparent_cams=False, cam_size=0.05, glb_name='scene.glb' + # We can make some of these configurable via args if needed later. + # For now, use sensible defaults. + # The demo.py uses silent=False by default. + # min_conf_thr is not in parsed_args, using default 3. + # as_pointcloud is not in parsed_args, using default False. + model_filename = f"{current_basename}_pct.ply" + model_output_path = get_3D_model_from_scene( + outdir=args.output_dir, + silent=False, # Or True if less verbose output is desired + scene=scene, + glb_name=model_filename + # min_conf_thr, as_pointcloud, etc., will use their defaults + ) + if model_output_path: + print(f"Saved 3D model to {model_output_path}") + else: + print(f"Warning: Could not generate or save 3D model for {current_basename}.") + except Exception as e: + print(f"Error saving 3D model for {current_basename}: {e}") + + print("Saving RGB and Depth images...") + rgb_images = scene.imgs + depth_maps_tensor = scene.get_depthmaps() + if depth_maps_tensor is None or len(depth_maps_tensor) == 0: + print(f"Error: No depth maps found for basename {current_basename}. Cannot save depth images.") + continue # Skip to the next basename + depth_maps = to_numpy(depth_maps_tensor) + + if len(rgb_images) == 2 and len(depth_maps) == 2: + left_rgb_path = os.path.join(args.output_dir, f"{current_basename}_left_rgb.png") + right_rgb_path = os.path.join(args.output_dir, f"{current_basename}_right_rgb.png") + left_depth_path = os.path.join(args.output_dir, f"{current_basename}_left_depth_colored.png") + right_depth_path = os.path.join(args.output_dir, f"{current_basename}_right_depth_colored.png") + + plt.imsave(left_rgb_path, rgb_images[0]) + print(f"Saved left RGB image to {left_rgb_path}") + plt.imsave(right_rgb_path, rgb_images[1]) + print(f"Saved right RGB image to {right_rgb_path}") + + # Save colored depth maps + plt.imsave(left_depth_path, depth_maps[0], cmap='viridis') + print(f"Saved left colored depth image to {left_depth_path}") + plt.imsave(right_depth_path, depth_maps[1], cmap='viridis') + print(f"Saved right colored depth image to {right_depth_path}") + + # Save raw depth maps + left_depth_raw_path = os.path.join(args.output_dir, f"{current_basename}_left_depth_raw.png") + right_depth_raw_path = os.path.join(args.output_dir, f"{current_basename}_right_depth_raw.png") + + plt.imsave(left_depth_raw_path, depth_maps[0], cmap='gray') + print(f"Saved left raw depth image to {left_depth_raw_path}") + plt.imsave(right_depth_raw_path, depth_maps[1], cmap='gray') + print(f"Saved right raw depth image to {right_depth_raw_path}") + + print(f"Images saved successfully for {current_basename}.") + else: + print(f"Error: Expected 2 RGB images and 2 depth maps for {current_basename}, but found {len(rgb_images)} RGBs and {len(depth_maps)} depths.") + # print("RGB images content:", rgb_images) # Potentially large output + # print("Depth maps content:", depth_maps) # Potentially large output + + print(f"--- Finished processing basename: {current_basename} ---") + + + + # Report average inference time + if all_inference_times: + avg_inference_time = sum(all_inference_times) / len(all_inference_times) + print(f"\nAverage inference time over {len(all_inference_times)} pairs: {avg_inference_time:.2f} seconds.") + else: + print("\nNo inference was performed to calculate an average time.") + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Process image pairs to generate RGB and colored depth map outputs using DUSt3R. \ + Accepts one or more specific basenames via --input_pair_basename, \ + or scans --image_pair_dir for all pairs if --input_pair_basename is omitted." + ) + + model_group = parser.add_mutually_exclusive_group(required=True) + model_group.add_argument("--weights", type=str, help="Path to the model weights (.pth file).") + model_group.add_argument("--model_name", type=str, help="Name of the model (e.g., 'DUSt3R_ViTLarge_BaseDecoder_512_dpt') for HuggingFace Hub or local cache.") + + parser.add_argument("--input_pair_basename", type=str, nargs='+', default=None, # MODIFIED: nargs='+' for list, still optional + help="Optional. One or more basenames (e.g., 'image_001' 'image_002') of image pairs (_left.png/_right.png). \ + If provided, only these pairs will be processed from the --image_pair_dir. \ + If omitted, all pairs in --image_pair_dir will be scanned and processed.") + parser.add_argument("--image_pair_dir", type=str, required=True, + help="Directory containing the image pairs. If --input_pair_basename is given, this is where those pairs are located. \ + If --input_pair_basename is omitted, this directory will be scanned for all pairs.") + + parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the output images.") + + parser.add_argument("--device", type=str, default='cuda', help="PyTorch device to use ('cuda' or 'cpu'). Default: 'cuda'.") + parser.add_argument("--image_size", type=int, default=512, choices=[224, 512], help="Image size for processing. Default: 512.") + # niter argument is less relevant if PairViewer mode is always used for individual pairs. Kept for consistency. + parser.add_argument("--niter", type=int, default=300, help="Number of iterations for global alignment (used by PointCloudOptimizer mode, less relevant for PairViewer mode per pair).") + + parsed_args = parser.parse_args() + + if parsed_args.device == 'cuda' and not torch.cuda.is_available(): + print("CUDA is not available. Switching to CPU.") + parsed_args.device = 'cpu' + + # Create output directory once + os.makedirs(parsed_args.output_dir, exist_ok=True) + + # Load model once + print(f"Loading model... Device: {parsed_args.device}") + if parsed_args.weights: + model = load_model(parsed_args.weights, parsed_args.device) + else: # parsed_args.model_name must be set + model = AsymmetricCroCo3DStereo.from_pretrained(parsed_args.model_name, device=parsed_args.device) + print("Model loaded.") + + basenames_to_process = [] + if parsed_args.input_pair_basename: + # If specific basenames are provided (it's now a list) + basenames_to_process = parsed_args.input_pair_basename + print(f"Processing specific basenames from arguments: {basenames_to_process}") + # Ensure the image_pair_dir is valid + if not os.path.isdir(parsed_args.image_pair_dir): + print(f"Error: Image pair directory not found or is not a directory: {parsed_args.image_pair_dir}") + exit(1) + else: + # If no specific basenames are provided, scan the directory for all pairs + print(f"No specific input_pair_basename provided. Scanning directory: {parsed_args.image_pair_dir}") + image_input_dir = parsed_args.image_pair_dir + if not os.path.isdir(image_input_dir): + print(f"Error: Image pair directory not found or is not a directory: {image_input_dir}") + exit(1) + + found_basenames_set = set() + for filename in os.listdir(image_input_dir): + if filename.endswith("_left.png"): + basename = filename[:-9] # len("_left.png") == 9 + # Also check if corresponding _right.png exists + right_file_path = os.path.join(image_input_dir, f"{basename}_right.png") + if basename and os.path.exists(right_file_path): + found_basenames_set.add(basename) + # No need to check _right.png separately if we ensure _left implies _right check + # elif filename.endswith("_right.png"): + # basename = filename[:-10] # len("_right.png") == 10 + # # Also check if corresponding _left.png exists + # left_file_path = os.path.join(image_input_dir, f"{basename}_left.png") + # if basename and os.path.exists(left_file_path): + # found_basenames_set.add(basename) + + if not found_basenames_set: + print(f"No valid image pairs (e.g., xxx_left.png and xxx_right.png) found in {image_input_dir}") + exit(1) + + basenames_to_process = sorted(list(found_basenames_set)) + print(f"Found {len(basenames_to_process)} unique image pair basenames in '{image_input_dir}': {basenames_to_process}") + + # Call main once with the list of basenames + if basenames_to_process: + main(model, parsed_args, basenames_to_process) + print("\n--- All specified basenames processed. ---") + else: + print("No basenames were identified for processing.") diff --git a/metadata/depth_live_1724981057/e9c0f965f4ad4bcf9059f4822c19cd12-0.parquet b/metadata/depth_live_1724981057/e9c0f965f4ad4bcf9059f4822c19cd12-0.parquet new file mode 100644 index 0000000..f94b7fe Binary files /dev/null and b/metadata/depth_live_1724981057/e9c0f965f4ad4bcf9059f4822c19cd12-0.parquet differ