Skip to content
Open
183 changes: 101 additions & 82 deletions README.md

Large diffs are not rendered by default.

79 changes: 0 additions & 79 deletions city_driving/stop_detector/detector.py

This file was deleted.

Binary file removed city_driving/stop_detector/sign.jpg
Binary file not shown.
Binary file added media/despicable_me_moon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/minion.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/minion_detection.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/minions.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/vector.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
38 changes: 38 additions & 0 deletions shrinkray_heist/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## Setting up YOLO

Start by pulling the latest docker image:
```bash
sudo docker pull staffmitrss/racecar-real:latest
```

Please take a look at `model/detector.py`. We've provided a basic template for a [YOLO11](https://docs.ultralytics.com/models/yolo11/) object detector. It's up to you to decide how you want to use this model to complete the tasks assigned to you by Gru. Feel free to modify the code as needed or dive into the source code to see how you can make the most out of the model!

You should give this file a careful read. But importantly, note that you can control the model's threshold, which determines how confident the model needs to be before it considers an object detected. The default threshold is set to 0.5, but you can adjust it to suit your needs. A lower threshold will yield more detections, while a higher threshold will yield fewer detections. Additionally, observe that the `predict` function gives you the list of `(bounding box, class_label)` pairs. You can see the list of all classes supported by the model with
```python
model = Detector()
print(model.classes)
```

To get you started, you can run the demo to detect the bananas around the minion with

```bash
python3 model/detector.py
```
which will save the output image with bounding boxes (as below) to `model/demo_output.ppg`.


<img src="../media/minion_detection.jpg" width="300"/>


**Fun Fact**: Gru has no patience for sluggish models, so he harnessed TensorRT’s quantization and optimizations to supercharge yours! If you're curious / want to learn more about how he did it, check out [this](https://developer.nvidia.com/tensorrt#:~:text=NVIDIA%C2%AE%20TensorRT%E2%84%A2%20is,high%20throughput%20for%20production%20applications) and [this](https://huggingface.co/docs/optimum/en/concept_guides/quantization) as a reference.

**However,** if this is a bit slow for you, you can try the alternative of just putting the model on cuda directly. To do this, replace
```python
model = Detector()
```
with
```python
model = Detector(from_tensor_rt=False)
model.to('cuda')
```
As a sanity check, the model should run at about 10FPS.
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
import rclpy
from rclpy.node import Node

import cv2
from cv_bridge import CvBridge, CvBridgeError
from cv_bridge import CvBridge

import numpy as np
from sensor_msgs.msg import Image
from detector import StopSignDetector
from .detector import Detector

class SignDetector(Node):
class DetectorNode(Node):
def __init__(self):
super().__init__("stop_detector")
self.detector = StopSignDetector()
super().__init__("detector")
self.detector = Detector()
self.publisher = None #TODO
self.subscriber = self.create_subscription(Image, "/zed/zed_node/rgb/image_rect_color", self.callback, 1)
self.bridge = CvBridge()

self.get_logger().info("Stop Detector Initialized")
self.get_logger().info("Detector Initialized")

def callback(self, img_msg):
# Process image with CV Bridge
Expand All @@ -26,9 +24,9 @@ def callback(self, img_msg):

def main(args=None):
rclpy.init(args=args)
detector = SignDetector()
detector = DetectorNode()
rclpy.spin(detector)
rclpy.shutdown()

if __name__=="__main__":
main()
main()
148 changes: 148 additions & 0 deletions shrinkray_heist/model/detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import random

import cv2
from PIL import Image, ImageDraw, ImageFont


def _label_to_color(label):
random.seed(label) # Use label as seed to generate a stable color
r = random.randint(0, 255)
g = random.randint(0, 255)
b = random.randint(0, 255)
return (r, g, b)


class Detector:
def __init__(self, yolo_dir="/root/yolo", from_tensor_rt=True, threshold=0.5):
# local import
from ultralytics import YOLO
cls = YOLO

self.threshold = threshold
self.yolo_dir = yolo_dir
if from_tensor_rt:
self.model = cls(f"{self.yolo_dir}/yolo11n.engine", task="detect")
else:
self.model = cls(f"{self.yolo_dir}/yolo11n.pt", task="detect")

def to(self, device):
self.model.to(device)

def predict(self, img, silent=True):
"""
Note: img can be any of the following:
Union[str, pathlib.Path, int, PIL.Image.Image, list, tuple, numpy.ndarray, torch.Tensor]

Batch not supported.

Runs detection on a single image and returns a list of
((xmin, ymin, xmax, ymax), class_label) for each detection
above the given confidence threshold.
"""
results = list(self.model(img, verbose=not silent))[0]
boxes = results.boxes

predictions = []
# Iterate over the bounding boxes
for xyxy, conf, cls_idx in zip(boxes.xyxy, boxes.conf, boxes.cls):
if conf.item() >= self.threshold:
# Convert bounding box tensor to Python floats
x1, y1, x2, y2 = xyxy.tolist()
# Map class index to class label using model/ results
label = results.names[int(cls_idx.item())]
predictions.append(((x1, y1, x2, y2), label))

#convert original image to rgb
original_image = results.orig_img
cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB, original_image)

return dict(predictions=predictions, original_image=original_image)

def set_threshold(self, threshold):
"""
Sets the confidence threshold for predictions.
"""
self.threshold = threshold

def draw_box(self, img, predictions, draw_all=False):
"""
Draw bounding boxes on 'img'.

:param img: The image to draw on (PIL.Image or NumPy array).
:param predictions: A list of ((xmin, ymin, xmax, ymax), label).
:param draw_all: If True, draw *all* bounding boxes.
If False, draw only the first one.
:return: A PIL image with boxes and labels drawn.
"""
if not predictions:
return img # No detections, return as is

# Convert to PIL.Image if needed
if not isinstance(img, Image.Image):
img = Image.fromarray(img)

draw = ImageDraw.Draw(img)

min_dim = min(img.width, img.height)
scale_factor = (
min_dim / 600.0
)

line_width = max(
1, int(4 * scale_factor)
)
font_size = max(10, int(20 * scale_factor))
text_offset = font_size + 3

try:
font = ImageFont.truetype("arial.ttf", font_size)
except IOError:
font = ImageFont.load_default()

print(f"Labels: {[x[-1] for x in predictions]}")

if draw_all:
for (x1, y1, x2, y2), label in predictions:
color = _label_to_color(label)
draw.rectangle([x1, y1, x2, y2], outline=color, width=line_width)
draw.text((x1, y1 - text_offset), label, fill=color, font=font)
else:
(x1, y1, x2, y2), label = predictions[0]
color = _label_to_color(label)
draw.rectangle([x1, y1, x2, y2], outline=color, width=line_width)
draw.text((x1, y1 - text_offset), label, fill=color, font=font)

return img

def id2name(self, i):
"""
Converts a class index to a class name.
"""
return self.model.names[i]

@property
def classes(self):
return self.model.names


def demo():
import os
model = Detector()
model.set_threshold(0.5)

img_path = f"{os.path.dirname(__file__)}/../../media/minion.png"

img = Image.open(img_path)
results = model.predict(img)

predictions = results["predictions"]
original_image = results["original_image"]

out = model.draw_box(original_image, predictions, draw_all=True)

save_path = f"{os.path.dirname(__file__)}/demo_output.png"
out.save(save_path)
print(f"Saved demo to {save_path}!")

if __name__ == '__main__':
demo()
16 changes: 16 additions & 0 deletions shrinkray_heist/model/speed_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
if __name__ == '__main__':
from detector import Detector
import time
import numpy as np
N = 100
model = Detector()

t0 = time.perf_counter()
for _ in range(N):
t = (np.random.uniform(size=(480, 480, 3)) * 255).astype(np.uint8)
model.predict(t)
tn = time.perf_counter()

print(f"Time taken for {N} iterations: {tn - t0:.2f} seconds")