cornellnexus · ahanbm · Mar 2, 2024 · Apr 20, 2024 · Apr 20, 2024 · Sep 28, 2025
diff --git a/obstacle_avoidance/SpatialDetection/collision_avoidance.py b/obstacle_avoidance/SpatialDetection/collision_avoidance.py
@@ -0,0 +1,85 @@
+from depthai_sdk import OakCamera
+from depthai_sdk.visualize.configs import StereoColor
+from depthai_sdk.classes.packets import DisparityDepthPacket
+import math
+import depthai as dai
+import cv2
+
+# User-defined constants (distance detection variables)
+WARNING = 2000 # 2m, orange
+CRITICAL = 1000 # 1m, red
+
+slc_data = []
+
+def cb(packet: DisparityDepthPacket):
+    global slc_data
+    fontType = cv2.FONT_HERSHEY_TRIPLEX
+
+    # Manually normalize and colorize depth frame
+    depth_8bit = cv2.normalize(packet.frame, None, 0, 255, cv2.NORM_MINMAX)
+    depth_8bit = depth_8bit.astype('uint8')
+    depthFrameColor = cv2.applyColorMap(depth_8bit, cv2.COLORMAP_JET)
+
+    for depthData in slc_data:
+        roi = depthData.config.roi
+        roi = roi.denormalize(width=depthFrameColor.shape[1], height=depthFrameColor.shape[0])
+
+        xmin = int(roi.topLeft().x)
+        ymin = int(roi.topLeft().y)
+        xmax = int(roi.bottomRight().x)
+        ymax = int(roi.bottomRight().y)
+
+        coords = depthData.spatialCoordinates
+        distance = math.sqrt(coords.x ** 2 + coords.y ** 2 + coords.z ** 2)
+
+        if distance == 0: # Invalid
+            continue
+
+        if distance < CRITICAL:
+            color = (0, 0, 255)
+            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, thickness=4)
+            cv2.putText(depthFrameColor, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color)
+        elif distance < WARNING:
+            color = (0, 140, 255)
+            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, thickness=2)
+            cv2.putText(depthFrameColor, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color)
+
+    cv2.imshow('0_depth', depthFrameColor)
+
+with OakCamera() as oak:
+    stereo = oak.create_stereo('720p')
+    # We don't need high fill rate, just very accurate depth, that's why we enable some filters, and
+    # set the confidence threshold to 50
+    config = stereo.node.initialConfig.get()
+    config.postProcessing.brightnessFilter.minBrightness = 0
+    config.postProcessing.brightnessFilter.maxBrightness = 255
+    stereo.node.initialConfig.set(config)
+    stereo.config_postprocessing(colorize=StereoColor.GRAY, colormap=cv2.COLORMAP_JET)
+    stereo.config_stereo(confidence=50, lr_check=True, extended=True)
+
+    oak.visualize([stereo], fps=True, callback=cb)
+
+    slc = oak.pipeline.create(dai.node.SpatialLocationCalculator)
+    for x in range(15):
+        for y in range(9):
+            config = dai.SpatialLocationCalculatorConfigData()
+            config.depthThresholds.lowerThreshold = 200
+            config.depthThresholds.upperThreshold = 10000
+            config.roi = dai.Rect(dai.Point2f((x+0.5)*0.0625, (y+0.5)*0.1), dai.Point2f((x+1.5)*0.0625, (y+1.5)*0.1))
+            # TODO: change from median to 10th percentile once supported
+            config.calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MEDIAN
+            slc.initialConfig.addROI(config)
+
+    stereo.depth.link(slc.inputDepth)
+
+    slc_out = oak.pipeline.create(dai.node.XLinkOut)
+    slc_out.setStreamName('slc')
+    slc.out.link(slc_out.input)
+
+    oak.start() # Start the pipeline (upload it to the OAK)
+
+    q = oak.device.getOutputQueue('slc') # Create output queue after calling start()
+    while oak.running():
+        if q.has():
+            slc_data = q.get().getSpatialLocations()
+        oak.poll()
diff --git a/obstacle_avoidance/SpatialDetection/nexus.py b/obstacle_avoidance/SpatialDetection/nexus.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+import sys
+import cv2
+import depthai as dai
+import numpy as np
+import time
+import math
+
+'''
+Spatial Tiny-yolo example
+  Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
+  Can be used for tiny-yolo-v3 or tiny-yolo-v4 networks
+'''
+# User-defined constants
+WARNING = 1000 # 1m, orange
+CRITICAL = 500 # 50cm, red
+fontType = cv2.FONT_HERSHEY_TRIPLEX
+
+# Get argument first
+nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
+if 1 < len(sys.argv):
+    arg = sys.argv[1]
+    if arg == "yolo3":
+        nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
+    elif arg == "yolo4":
+        nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
+    else:
+        nnBlobPath = arg
+else:
+    print("Using Tiny YoloV4 model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'")
+
+if not Path(nnBlobPath).exists():
+    import sys
+    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
+
+# Tiny yolo v3/4 label texts
+labelMap = [
+    "person",         "bicycle",    "car",           "motorbike",     "aeroplane",   "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",   "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",       "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",    "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball", "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",      "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",      "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",       "donut",         "cake",
+    "chair",          "sofa",       "pottedplant",   "bed",           "diningtable", "toilet",        "tvmonitor",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",  "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",       "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush"
+]
+
+syncNN = True
+
+# Create pipeline
+pipeline = dai.Pipeline()
+
+# Define sources and outputs
+camRgb = pipeline.create(dai.node.ColorCamera)
+spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
+monoLeft = pipeline.create(dai.node.MonoCamera)
+monoRight = pipeline.create(dai.node.MonoCamera)
+stereo = pipeline.create(dai.node.StereoDepth)
+nnNetworkOut = pipeline.create(dai.node.XLinkOut)
+
+xoutRgb = pipeline.create(dai.node.XLinkOut)
+xoutNN = pipeline.create(dai.node.XLinkOut)
+xoutDepth = pipeline.create(dai.node.XLinkOut)
+
+xoutRgb.setStreamName("rgb")
+xoutNN.setStreamName("detections")
+xoutDepth.setStreamName("depth")
+nnNetworkOut.setStreamName("nnNetwork")
+
+# Properties
+camRgb.setPreviewSize(416, 416)
+camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
+camRgb.setInterleaved(False)
+camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
+
+monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
+monoLeft.setCamera("left")
+monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
+monoRight.setCamera("right")
+
+# setting node configs
+stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
+# Align depth map to the perspective of RGB camera, on which inference is done
+stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
+stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
+stereo.setSubpixel(True)
+
+spatialDetectionNetwork.setBlobPath(nnBlobPath)
+spatialDetectionNetwork.setConfidenceThreshold(0.5)
+spatialDetectionNetwork.input.setBlocking(False)
+spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
+spatialDetectionNetwork.setDepthLowerThreshold(100)
+spatialDetectionNetwork.setDepthUpperThreshold(5000)
+
+# Yolo specific parameters
+spatialDetectionNetwork.setNumClasses(80)
+spatialDetectionNetwork.setCoordinateSize(4)
+spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319])
+spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5] })
+spatialDetectionNetwork.setIouThreshold(0.5)
+
+# Linking
+monoLeft.out.link(stereo.left)
+monoRight.out.link(stereo.right)
+
+camRgb.preview.link(spatialDetectionNetwork.input)
+if syncNN:
+    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
+else:
+    camRgb.preview.link(xoutRgb.input)
+
+spatialDetectionNetwork.out.link(xoutNN.input)
+
+stereo.depth.link(spatialDetectionNetwork.inputDepth)
+spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
+spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)
+
+# Connect to device and start pipeline
+with dai.Device(pipeline) as device:
+
+    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
+    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
+    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
+    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
+    networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False)
+
+    startTime = time.monotonic()
+    counter = 0
+    fps = 0
+    color = (255, 255, 255)
+    printOutputLayersOnce = True
+
+    while True:
+        inPreview = previewQueue.get()
+        inDet = detectionNNQueue.get()
+        depth = depthQueue.get()
+        inNN = networkQueue.get()
+
+        if printOutputLayersOnce:
+            toPrint = 'Output layer names:'
+            for ten in inNN.getAllLayerNames():
+                toPrint = f'{toPrint} {ten},'
+            print(toPrint)
+            printOutputLayersOnce = False
+
+        frame = inPreview.getCvFrame()
+        depthFrame = depth.getFrame() # depthFrame values are in millimeters
+
+        depth_downscaled = depthFrame[::4]
+        if np.all(depth_downscaled == 0):
+            min_depth = 0  # Set a default minimum depth value when all elements are zero
+        else:
+            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
+        max_depth = np.percentile(depth_downscaled, 99)
+        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
+        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
+
+        counter+=1
+        current_time = time.monotonic()
+        if (current_time - startTime) > 1 :
+            fps = counter / (current_time - startTime)
+            counter = 0
+            startTime = current_time
+
+        detections = inDet.detections
+
+        # If the frame is available, draw bounding boxes on it and show the frame
+        height = frame.shape[0]
+        width  = frame.shape[1]
+        for detection in detections:
+            roiData = detection.boundingBoxMapping
+            roi = roiData.roi
+            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
+            topLeft = roi.topLeft()
+            bottomRight = roi.bottomRight()
+            xmin = int(topLeft.x)
+            ymin = int(topLeft.y)
+            xmax = int(bottomRight.x)
+            ymax = int(bottomRight.y)
+            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
+
+            # Denormalize bounding box
+            x1 = int(detection.xmin * width)
+            x2 = int(detection.xmax * width)
+            y1 = int(detection.ymin * height)
+            y2 = int(detection.ymax * height)
+            try:
+                label = labelMap[detection.label]
+            except:
+                label = detection.label
+            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
+            cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
+            #cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
+            #cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
+            #cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
+
+            #cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
+            coords = detection.spatialCoordinates
+            distance = math.sqrt(coords.x ** 2 + coords.y ** 2 + coords.z ** 2)
+
+            if distance == 0: # Invalid
+                continue
+
+            if distance < CRITICAL:
+                color = (0, 0, 255)
+                cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
+                cv2.putText(frame, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color)
+                print(f"STOP! object within critical bounds")
+            elif distance < WARNING:
+                if coords.x > 0:
+                    print("Turn left: obstables on right")
+                else:
+                    print("Turn right: obstables on left")
+                color = (0, 140, 255)
+                cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
+                cv2.putText(frame, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color)
+
+        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
+        cv2.imshow("depth", depthFrameColor)
+        cv2.imshow("rgb", frame)
+
+        if cv2.waitKey(1) == ord('q'):
+            break