mit-rss · alany1 · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025
diff --git a/README.md b/README.md
diff --git a/city_driving/stop_detector/detector.py b/city_driving/stop_detector/detector.py
diff --git a/city_driving/stop_detector/sign.jpg b/city_driving/stop_detector/sign.jpg
diff --git a/media/despicable_me_moon.png b/media/despicable_me_moon.png
diff --git a/media/minion.png b/media/minion.png
diff --git a/media/minion_detection.jpg b/media/minion_detection.jpg
diff --git a/media/minions.jpg b/media/minions.jpg
diff --git a/media/vector.png b/media/vector.png
diff --git a/shrinkray_heist/README.md b/shrinkray_heist/README.md
@@ -0,0 +1,38 @@
+## Setting up YOLO
+
+Start by pulling the latest docker image:
+```bash
+sudo docker pull staffmitrss/racecar-real:latest
+```
+
+Please take a look at `model/detector.py`. We've provided a basic template for a [YOLO11](https://docs.ultralytics.com/models/yolo11/) object detector. It's up to you to decide how you want to use this model to complete the tasks assigned to you by Gru. Feel free to modify the code as needed or dive into the source code to see how you can make the most out of the model!
+
+You should give this file a careful read. But importantly, note that you can control the model's threshold, which determines how confident the model needs to be before it considers an object detected. The default threshold is set to 0.5, but you can adjust it to suit your needs. A lower threshold will yield more detections, while a higher threshold will yield fewer detections. Additionally, observe that the `predict` function gives you the list of `(bounding box, class_label)` pairs. You can see the list of all classes supported by the model with 
+```python
+model = Detector()
+print(model.classes)
+```
+
+To get you started, you can run the demo to detect the bananas around the minion with
+
+```bash
+python3 model/detector.py 
+```
+which will save the output image with bounding boxes (as below) to `model/demo_output.ppg`. 
+
+
+<img src="../media/minion_detection.jpg" width="300"/>
+
+
+**Fun Fact**: Gru has no patience for sluggish models, so he harnessed TensorRT’s quantization and optimizations to supercharge yours! If you're curious / want to learn more about how he did it, check out [this](https://developer.nvidia.com/tensorrt#:~:text=NVIDIA%C2%AE%20TensorRT%E2%84%A2%20is,high%20throughput%20for%20production%20applications) and [this](https://huggingface.co/docs/optimum/en/concept_guides/quantization) as a reference.
+
+**However,** if this is a bit slow for you, you can try the alternative of just putting the model on cuda directly. To do this, replace
+```python
+model = Detector()
+```
+with
+```python
+model = Detector(from_tensor_rt=False)
+model.to('cuda')
+```
+As a sanity check, the model should run at about 10FPS. 
diff --git a/city_driving/basement_point_publisher.py → shrinkray_heist/basement_point_publisher.py b/city_driving/basement_point_publisher.py → shrinkray_heist/basement_point_publisher.py
diff --git a/city_driving/stop_detector/stop_detector.py → shrinkray_heist/model/detection_node.py b/city_driving/stop_detector/stop_detector.py → shrinkray_heist/model/detection_node.py
@@ -1,22 +1,20 @@
 import rclpy
 from rclpy.node import Node
 
-import cv2
-from cv_bridge import CvBridge, CvBridgeError
+from cv_bridge import CvBridge
 
-import numpy as np
 from sensor_msgs.msg import Image
-from detector import StopSignDetector
+from .detector import Detector
 
-class SignDetector(Node):
+class DetectorNode(Node):
     def __init__(self):
-        super().__init__("stop_detector")
-        self.detector = StopSignDetector()
+        super().__init__("detector")
+        self.detector = Detector()
         self.publisher = None #TODO
         self.subscriber = self.create_subscription(Image, "/zed/zed_node/rgb/image_rect_color", self.callback, 1)
         self.bridge = CvBridge()
 
-        self.get_logger().info("Stop Detector Initialized")
+        self.get_logger().info("Detector Initialized")
 
     def callback(self, img_msg):
         # Process image with CV Bridge
@@ -26,9 +24,9 @@ def callback(self, img_msg):
 
 def main(args=None):
     rclpy.init(args=args)
-    detector = SignDetector()
+    detector = DetectorNode()
     rclpy.spin(detector)
     rclpy.shutdown()
 
 if __name__=="__main__":
-    main()
+    main()
diff --git a/shrinkray_heist/model/detector.py b/shrinkray_heist/model/detector.py
@@ -0,0 +1,148 @@
+import random
+
+import cv2
+from PIL import Image, ImageDraw, ImageFont
+
+
+def _label_to_color(label):
+    random.seed(label)  # Use label as seed to generate a stable color
+    r = random.randint(0, 255)
+    g = random.randint(0, 255)
+    b = random.randint(0, 255)
+    return (r, g, b)
+
+
+class Detector:
+    def __init__(self, yolo_dir="/root/yolo", from_tensor_rt=True, threshold=0.5):
+        # local import
+        from ultralytics import YOLO
+        cls = YOLO
+
+        self.threshold = threshold
+        self.yolo_dir = yolo_dir
+        if from_tensor_rt:
+            self.model = cls(f"{self.yolo_dir}/yolo11n.engine", task="detect")
+        else:
+            self.model = cls(f"{self.yolo_dir}/yolo11n.pt", task="detect")
+
+    def to(self, device):
+        self.model.to(device)
+
+    def predict(self, img, silent=True):
+        """
+        Note: img can be any of the following:
+            Union[str, pathlib.Path, int, PIL.Image.Image, list, tuple, numpy.ndarray, torch.Tensor]
+
+            Batch not supported.
+
+        Runs detection on a single image and returns a list of
+        ((xmin, ymin, xmax, ymax), class_label) for each detection
+        above the given confidence threshold.
+        """
+        results = list(self.model(img, verbose=not silent))[0]
+        boxes = results.boxes
+
+        predictions = []
+        # Iterate over the bounding boxes
+        for xyxy, conf, cls_idx in zip(boxes.xyxy, boxes.conf, boxes.cls):
+            if conf.item() >= self.threshold:
+                # Convert bounding box tensor to Python floats
+                x1, y1, x2, y2 = xyxy.tolist()
+                # Map class index to class label using model/ results
+                label = results.names[int(cls_idx.item())]
+                predictions.append(((x1, y1, x2, y2), label))
+
+        #convert original image to rgb
+        original_image = results.orig_img
+        cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB, original_image)
+
+        return dict(predictions=predictions, original_image=original_image)
+
+    def set_threshold(self, threshold):
+        """
+        Sets the confidence threshold for predictions.
+        """
+        self.threshold = threshold
+
+    def draw_box(self, img, predictions, draw_all=False):
+        """
+        Draw bounding boxes on 'img'.
+
+        :param img: The image to draw on (PIL.Image or NumPy array).
+        :param predictions: A list of ((xmin, ymin, xmax, ymax), label).
+        :param draw_all: If True, draw *all* bounding boxes.
+                         If False, draw only the first one.
+        :return: A PIL image with boxes and labels drawn.
+        """
+        if not predictions:
+            return img  # No detections, return as is
+
+        # Convert to PIL.Image if needed
+        if not isinstance(img, Image.Image):
+            img = Image.fromarray(img)
+
+        draw = ImageDraw.Draw(img)
+
+        min_dim = min(img.width, img.height)
+        scale_factor = (
+            min_dim / 600.0
+        )
+
+        line_width = max(
+            1, int(4 * scale_factor)
+        )
+        font_size = max(10, int(20 * scale_factor))
+        text_offset = font_size + 3
+
+        try:
+            font = ImageFont.truetype("arial.ttf", font_size)
+        except IOError:
+            font = ImageFont.load_default()
+
+        print(f"Labels: {[x[-1] for x in predictions]}")
+
+        if draw_all:
+            for (x1, y1, x2, y2), label in predictions:
+                color = _label_to_color(label)
+                draw.rectangle([x1, y1, x2, y2], outline=color, width=line_width)
+                draw.text((x1, y1 - text_offset), label, fill=color, font=font)
+        else:
+            (x1, y1, x2, y2), label = predictions[0]
+            color = _label_to_color(label)
+            draw.rectangle([x1, y1, x2, y2], outline=color, width=line_width)
+            draw.text((x1, y1 - text_offset), label, fill=color, font=font)
+
+        return img
+
+    def id2name(self, i):
+        """
+        Converts a class index to a class name.
+        """
+        return self.model.names[i]
+
+    @property
+    def classes(self):
+        return self.model.names
+
+
+def demo():
+    import os
+    model = Detector()
+    model.set_threshold(0.5)
+
+    img_path = f"{os.path.dirname(__file__)}/../../media/minion.png" 
+
+    img = Image.open(img_path)
+    results = model.predict(img)
+
+    predictions = results["predictions"]
+    original_image = results["original_image"]
+
+    out = model.draw_box(original_image, predictions, draw_all=True)
+
+    save_path = f"{os.path.dirname(__file__)}/demo_output.png"
+    out.save(save_path)
+    print(f"Saved demo to {save_path}!")
+
+if __name__ == '__main__':    
+    demo()
diff --git a/shrinkray_heist/model/speed_test.py b/shrinkray_heist/model/speed_test.py
@@ -0,0 +1,16 @@
+if __name__ == '__main__':
+    from detector import Detector
+    import time
+    import numpy as np
+    N = 100
+    model = Detector()
+
+    t0 = time.perf_counter()
+    for _ in range(N):
+        t = (np.random.uniform(size=(480, 480, 3)) * 255).astype(np.uint8)        
+        model.predict(t)
+    tn = time.perf_counter()
+
+    print(f"Time taken for {N} iterations: {tn - t0:.2f} seconds")
+
+