diff --git a/.gitignore b/.gitignore index 6b8e127..b939320 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,9 @@ best.pt .venv-formatters autonomy/src/autonomy.egg-info hydrus_software_stack.egg-info + +# PyTorch Models and YOLO weights +*.pt +*.pth +*.onnx +yolo11n.pt diff --git a/autonomy/src/computer_vision/detection_core.py b/autonomy/src/computer_vision/detection_core.py index bfc0a0d..b418a18 100644 --- a/autonomy/src/computer_vision/detection_core.py +++ b/autonomy/src/computer_vision/detection_core.py @@ -229,13 +229,15 @@ def color_filter_detection( ############################ # 3D calculations and transformations ############################ -def calculate_point_3d( +def calculate_3d_bounding_boxes( detections: List[custom_types.Detection], depth_image: np.ndarray, camera_intrinsic: tuple, ): """ - Calculate 3D points for detections using depth information. + Calculate 3D bounding boxes for detections using depth information. + This extends the existing point calculation to provide full 3D bounding boxes. + :param detections: List of Detection objects to update :param depth_image: Depth image as numpy array :param camera_intrinsic: Camera intrinsic parameters (fx, fy, cx, cy) @@ -247,6 +249,7 @@ def calculate_point_3d( detection.x2, detection.y2, ) + if depth_image is not None: x_min_int = int(x_min) x_max_int = int(x_max) @@ -255,25 +258,104 @@ def calculate_point_3d( # Extract the depth values within the bounding box bbox_depth = depth_image[y_min_int:y_max_int, x_min_int:x_max_int] + if bbox_depth.size > 0: - mean_depth = float(np.nanmean(bbox_depth)) # type: ignore - if not np.isnan(mean_depth): + # Filter out invalid depth values (NaN and zeros) + valid_depths = bbox_depth[~np.isnan(bbox_depth) & (bbox_depth > 0)] + + if len(valid_depths) > 0: fx, fy, cx, cy = camera_intrinsic - z = mean_depth - detection.depth = z + + # Calculate center point in 3D (same as before for backward compatibility) + mean_depth = float(np.mean(valid_depths)) + detection.depth = mean_depth x_center = (x_min + x_max) / 2.0 y_center = (y_min + y_max) / 2.0 - x = (x_center - cx) * z / fx - y = (y_center - cy) * z / fy - - detection.point = custom_types.Point3D(x=x, y=y, z=z) + x_3d = (x_center - cx) * mean_depth / fx + y_3d = (y_center - cy) * mean_depth / fy + z_3d = mean_depth + + detection.point = custom_types.Point3D(x=x_3d, y=y_3d, z=z_3d) + + # Calculate 3D bounding box dimensions + # Project 2D bounding box corners to 3D space using depth information + + # Sample depth at different points in the bounding box for better estimation + depth_samples = [] + sample_points = [ + (x_min_int, y_min_int), # top-left + (x_max_int, y_min_int), # top-right + (x_min_int, y_max_int), # bottom-left + (x_max_int, y_max_int), # bottom-right + (int(x_center), int(y_center)), # center + ] + + for px, py in sample_points: + # Ensure we don't go out of bounds + px = max(0, min(px, depth_image.shape[1] - 1)) + py = max(0, min(py, depth_image.shape[0] - 1)) + depth_val = depth_image[py, px] + if not np.isnan(depth_val) and depth_val > 0: + depth_samples.append(depth_val) + + # Use range of depths to estimate 3D box depth, or use mean if not enough samples + if len(depth_samples) >= 2: + min_depth = float(np.min(depth_samples)) + max_depth = float(np.max(depth_samples)) + depth_3d = max_depth - min_depth + # Use a minimum depth to avoid unrealistically thin objects + depth_3d = max(depth_3d, 0.1) + else: + # Fallback: estimate depth as a fraction of the mean depth + depth_3d = ( + mean_depth * 0.1 + ) # Assume object is 10% of its distance in depth + + # Calculate 3D width and height by projecting 2D box corners + # Project corners to 3D space + x1_3d = (x_min - cx) * mean_depth / fx + y1_3d = (y_min - cy) * mean_depth / fy + x2_3d = (x_max - cx) * mean_depth / fx + y2_3d = (y_max - cy) * mean_depth / fy + + width_3d = abs(x2_3d - x1_3d) + height_3d = abs(y2_3d - y1_3d) + + # Create the 3D bounding box + center_3d = custom_types.Point3D(x=x_3d, y=y_3d, z=z_3d) + detection.bbox_3d = custom_types.BoundingBox3D( + center=center_3d, + width=width_3d, + height=height_3d, + depth=depth_3d, + ) else: + # No valid depth data detection.point = custom_types.Point3D(x=0, y=0, z=0) detection.depth = 0 + detection.bbox_3d = None else: + # Empty bounding box region detection.point = custom_types.Point3D(x=0, y=0, z=0) detection.depth = 0 + detection.bbox_3d = None + + +def calculate_point_3d( + detections: List[custom_types.Detection], + depth_image: np.ndarray, + camera_intrinsic: tuple, +): + """ + Calculate 3D points for detections using depth information. + This function is kept for backward compatibility and now also calculates 3D bounding boxes. + :param detections: List of Detection objects to update + :param depth_image: Depth image as numpy array + :param camera_intrinsic: Camera intrinsic parameters (fx, fy, cx, cy) + """ + # Call the new 3D bounding box function which also calculates the point + calculate_3d_bounding_boxes(detections, depth_image, camera_intrinsic) def quaternion_to_transform_matrix(rotation: custom_types.Rotation3D) -> np.ndarray: @@ -314,7 +396,7 @@ def transform_to_global( imu_rotation: custom_types.Rotation3D, ): """ - Transform detection points from camera frame to global frame. + Transform detection points and 3D bounding boxes from camera frame to global frame. :param detections: List of Detection objects to transform :param imu_point: IMU position in global frame :param imu_rotation: IMU orientation as quaternion @@ -323,6 +405,7 @@ def transform_to_global( transform_matrix[0:3, 3] = [imu_point.x, imu_point.y, imu_point.z] for detection in detections: + # Transform the centroid point if detection.point is not None: point_homogeneous = np.array( [detection.point.x, detection.point.y, detection.point.z, 1.0] @@ -332,6 +415,30 @@ def transform_to_global( x=point_global[0], y=point_global[1], z=point_global[2] ) + # Transform the 3D bounding box center + if detection.bbox_3d is not None: + center_homogeneous = np.array( + [ + detection.bbox_3d.center.x, + detection.bbox_3d.center.y, + detection.bbox_3d.center.z, + 1.0, + ] + ) + center_global = np.dot(transform_matrix, center_homogeneous) + + # Create new 3D bounding box with transformed center + # Note: dimensions remain the same, only center is transformed + # In a more advanced implementation, we might also transform the orientation + detection.bbox_3d = custom_types.BoundingBox3D( + center=custom_types.Point3D( + x=center_global[0], y=center_global[1], z=center_global[2] + ), + width=detection.bbox_3d.width, + height=detection.bbox_3d.height, + depth=detection.bbox_3d.depth, + ) + ############################ # Detection Pipeline Manager @@ -387,9 +494,9 @@ def run_detections( # Process all detections for 3D calculations and transformations for detector_name, detections in pipelines_results: - # Calculate 3D points if depth and camera info available + # Calculate 3D points and bounding boxes if depth and camera info available if camera_intrinsics is not None and depth_image is not None: - calculate_point_3d(detections, depth_image, camera_intrinsics) + calculate_3d_bounding_boxes(detections, depth_image, camera_intrinsics) # Transform to global frame if IMU data available if imu_point is not None and imu_rotation is not None: diff --git a/autonomy/src/computer_vision/example_3d_bounding_boxes.py b/autonomy/src/computer_vision/example_3d_bounding_boxes.py new file mode 100644 index 0000000..a27f630 --- /dev/null +++ b/autonomy/src/computer_vision/example_3d_bounding_boxes.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +""" +Example usage of the 3D bounding box system. +This script demonstrates how to use the new 3D bounding box functionality. +""" + +import sys +import os + +# Add the autonomy src to the path +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +import numpy as np +import custom_types +from computer_vision.detection_core import calculate_3d_bounding_boxes + + +def example_3d_bounding_box_usage(): + """Example showing how to use the 3D bounding box system.""" + print("=== 3D Bounding Box Example ===") + + # Create a mock detection (normally from YOLO) + detection = custom_types.Detection( + x1=200, y1=150, x2=400, y2=350, cls=0, conf=0.9 # 2D bounding box + ) + + # Create a mock depth image (normally from depth camera) + depth_image = np.full((480, 640), 5.0, dtype=np.float32) + depth_image[150:350, 200:400] = 3.0 # Object at 3 meters + + # Camera intrinsics (normally from camera calibration) + camera_intrinsics = (525.0, 525.0, 320.0, 240.0) # fx, fy, cx, cy + + # Calculate 3D bounding box + calculate_3d_bounding_boxes([detection], depth_image, camera_intrinsics) + + if detection.bbox_3d is not None: + print("✅ 3D Bounding Box calculated!") + print( + f"Center: ({detection.bbox_3d.center.x:.2f}, {detection.bbox_3d.center.y:.2f}, {detection.bbox_3d.center.z:.2f}) meters" + ) + print( + f"Size: {detection.bbox_3d.width:.2f} x {detection.bbox_3d.height:.2f} x {detection.bbox_3d.depth:.2f} meters" + ) + + # Get the 8 corner points + corners = detection.bbox_3d.get_corners() + print(f"Corner points: {len(corners)} corners calculated") + for i, corner in enumerate(corners[:4]): # Show first 4 corners + print(f" Corner {i+1}: ({corner.x:.2f}, {corner.y:.2f}, {corner.z:.2f})") + print(" ...") + + # Compare with legacy centroid point + print( + f"Legacy centroid: ({detection.point.x:.2f}, {detection.point.y:.2f}, {detection.point.z:.2f}) meters" + ) + else: + print("❌ Failed to calculate 3D bounding box") + + +if __name__ == "__main__": + example_3d_bounding_box_usage() diff --git a/autonomy/src/computer_vision/test_3d_bounding_boxes.py b/autonomy/src/computer_vision/test_3d_bounding_boxes.py new file mode 100644 index 0000000..98b30d7 --- /dev/null +++ b/autonomy/src/computer_vision/test_3d_bounding_boxes.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Test script for 3D bounding box functionality. +""" + +import os +import sys + +# Add the parent directory to the path to import custom_types and detection_core +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +import custom_types +import numpy as np + +from computer_vision.detection_core import ( + DetectionPipelineManager, + calculate_3d_bounding_boxes, +) + + +def test_bounding_box_3d_creation(): + """Test basic 3D bounding box creation and corner calculation.""" + print("Testing BoundingBox3D creation...") + + center = custom_types.Point3D(x=1.0, y=2.0, z=3.0) + bbox_3d = custom_types.BoundingBox3D( + center=center, width=2.0, height=4.0, depth=6.0 + ) + + corners = bbox_3d.get_corners() + print(f"Created 3D bounding box with center: {center}") + print( + f"Dimensions: width={bbox_3d.width}, height={bbox_3d.height}, depth={bbox_3d.depth}" + ) + print(f"Number of corners: {len(corners)}") + + # Verify we have 8 corners + assert len(corners) == 8, f"Expected 8 corners, got {len(corners)}" + + # Verify corner positions (basic sanity check) + expected_x_values = [0.0, 2.0] # center(1.0) +/- width/2(1.0) + expected_y_values = [0.0, 4.0] # center(2.0) +/- height/2(2.0) + expected_z_values = [0.0, 6.0] # center(3.0) +/- depth/2(3.0) + + for corner in corners: + assert corner.x in expected_x_values, f"Unexpected X value: {corner.x}" + assert corner.y in expected_y_values, f"Unexpected Y value: {corner.y}" + assert corner.z in expected_z_values, f"Unexpected Z value: {corner.z}" + + print("✓ BoundingBox3D creation test passed!") + + +def test_3d_bounding_box_calculation(): + """Test 3D bounding box calculation from 2D detections and depth.""" + print("\nTesting 3D bounding box calculation...") + + # Create test detection (2D bounding box) + detection = custom_types.Detection( + x1=100, y1=50, x2=200, y2=150, cls=0, conf=0.9 # 100x100 pixel box + ) + + # Create synthetic depth image with constant depth of 5 meters + depth_image = np.full((480, 640), 5.0, dtype=np.float32) + + # Add some depth variation in the detection region to test depth estimation + depth_image[50:150, 100:200] = 4.8 # Main object at 4.8m + depth_image[60:80, 120:140] = 5.2 # Some variation + + # Camera intrinsics (typical values) + camera_intrinsics = (500.0, 500.0, 320.0, 240.0) # fx, fy, cx, cy + + # Calculate 3D bounding box + calculate_3d_bounding_boxes([detection], depth_image, camera_intrinsics) + + # Verify results + assert detection.point is not None, "3D point should be calculated" + assert detection.bbox_3d is not None, "3D bounding box should be calculated" + + print( + f"3D point: x={detection.point.x:.3f}, y={detection.point.y:.3f}, z={detection.point.z:.3f}" + ) + print( + f"3D bbox center: x={detection.bbox_3d.center.x:.3f}, y={detection.bbox_3d.center.y:.3f}, z={detection.bbox_3d.center.z:.3f}" + ) + print( + f"3D bbox dimensions: w={detection.bbox_3d.width:.3f}, h={detection.bbox_3d.height:.3f}, d={detection.bbox_3d.depth:.3f}" + ) + + # Basic sanity checks + assert detection.depth > 0, "Depth should be positive" + assert detection.bbox_3d.width > 0, "Width should be positive" + assert detection.bbox_3d.height > 0, "Height should be positive" + assert detection.bbox_3d.depth > 0, "Depth should be positive" + + print("✓ 3D bounding box calculation test passed!") + + +def test_detection_pipeline_with_3d_boxes(): + """Test the complete detection pipeline with 3D bounding boxes.""" + print("\nTesting detection pipeline with 3D bounding boxes...") + + # Create test image and depth image + test_image = np.zeros((480, 640, 3), dtype=np.uint8) + + # Add a simple pattern to potentially trigger detection + # Create a white rectangle that might be detected + test_image[200:280, 250:350] = 255 + + depth_image = np.full((480, 640), 3.0, dtype=np.float32) + camera_intrinsics = (500.0, 500.0, 320.0, 240.0) + + # Test pipeline manager + manager = DetectionPipelineManager() + + # Run detections with 3D calculation + results = manager.run_detections( + image=test_image, depth_image=depth_image, camera_intrinsics=camera_intrinsics + ) + + print(f"Pipeline returned {len(results)} detector results") + + for detector_name, detections in results: + print(f" {detector_name}: {len(detections)} detections") + for i, detection in enumerate(detections): + if detection.bbox_3d is not None: + print( + f" Detection {i}: 3D bbox center=({detection.bbox_3d.center.x:.2f}, {detection.bbox_3d.center.y:.2f}, {detection.bbox_3d.center.z:.2f})" + ) + print( + f" Detection {i}: 3D bbox dims=({detection.bbox_3d.width:.2f}, {detection.bbox_3d.height:.2f}, {detection.bbox_3d.depth:.2f})" + ) + + print("✓ Detection pipeline with 3D bounding boxes test passed!") + + +def test_corner_calculation(): + """Test the corner calculation functionality.""" + print("\nTesting corner calculation...") + + # Simple test case: unit cube at origin + center = custom_types.Point3D(x=0.0, y=0.0, z=0.0) + bbox_3d = custom_types.BoundingBox3D( + center=center, width=2.0, height=2.0, depth=2.0 + ) + + corners = bbox_3d.get_corners() + + # Expected corners for a 2x2x2 cube centered at origin + expected_corners = [ + (-1, -1, -1), # front-bottom-left + (1, -1, -1), # front-bottom-right + (-1, 1, -1), # front-top-left + (1, 1, -1), # front-top-right + (-1, -1, 1), # back-bottom-left + (1, -1, 1), # back-bottom-right + (-1, 1, 1), # back-top-left + (1, 1, 1), # back-top-right + ] + + for i, (corner, expected) in enumerate(zip(corners, expected_corners)): + assert ( + abs(corner.x - expected[0]) < 1e-6 + ), f"Corner {i} X mismatch: {corner.x} vs {expected[0]}" + assert ( + abs(corner.y - expected[1]) < 1e-6 + ), f"Corner {i} Y mismatch: {corner.y} vs {expected[1]}" + assert ( + abs(corner.z - expected[2]) < 1e-6 + ), f"Corner {i} Z mismatch: {corner.z} vs {expected[2]}" + + print("✓ Corner calculation test passed!") + + +def main(): + """Run all tests.""" + print("Running 3D bounding box tests...\n") + + try: + test_bounding_box_3d_creation() + test_corner_calculation() + test_3d_bounding_box_calculation() + test_detection_pipeline_with_3d_boxes() + + print("\n" + "=" * 50) + print("✅ All 3D bounding box tests passed!") + print("=" * 50) + + except AssertionError as e: + print(f"\n❌ Test failed: {e}") + return 1 + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + return 1 + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/autonomy/src/custom_types.py b/autonomy/src/custom_types.py index f57d007..c99698e 100644 --- a/autonomy/src/custom_types.py +++ b/autonomy/src/custom_types.py @@ -17,6 +17,39 @@ class Rotation3D: w: float +@dataclass +class BoundingBox3D: + """ + Represents a 3D bounding box with center point and dimensions. + """ + + center: Point3D + width: float # X dimension + height: float # Y dimension + depth: float # Z dimension + + def get_corners(self) -> List[Point3D]: + """ + Get the 8 corner points of the 3D bounding box. + Returns corners in the order: [front-bottom-left, front-bottom-right, + front-top-left, front-top-right, back-bottom-left, back-bottom-right, + back-top-left, back-top-right] + """ + cx, cy, cz = self.center.x, self.center.y, self.center.z + w, h, d = self.width / 2, self.height / 2, self.depth / 2 + + return [ + Point3D(cx - w, cy - h, cz - d), # front-bottom-left + Point3D(cx + w, cy - h, cz - d), # front-bottom-right + Point3D(cx - w, cy + h, cz - d), # front-top-left + Point3D(cx + w, cy + h, cz - d), # front-top-right + Point3D(cx - w, cy - h, cz + d), # back-bottom-left + Point3D(cx + w, cy - h, cz + d), # back-bottom-right + Point3D(cx - w, cy + h, cz + d), # back-top-left + Point3D(cx + w, cy + h, cz + d), # back-top-right + ] + + @dataclass class Detection: x1: float @@ -27,6 +60,7 @@ class Detection: conf: float depth: float = 0 point: Optional[Point3D] = None + bbox_3d: Optional[BoundingBox3D] = None @dataclass