diff --git a/obstacle_avoidance/SpatialDetection/collision_avoidance.py b/obstacle_avoidance/SpatialDetection/collision_avoidance.py new file mode 100644 index 00000000..e1554cca --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/collision_avoidance.py @@ -0,0 +1,85 @@ +from depthai_sdk import OakCamera +from depthai_sdk.visualize.configs import StereoColor +from depthai_sdk.classes.packets import DisparityDepthPacket +import math +import depthai as dai +import cv2 + +# User-defined constants (distance detection variables) +WARNING = 2000 # 2m, orange +CRITICAL = 1000 # 1m, red + +slc_data = [] + +def cb(packet: DisparityDepthPacket): + global slc_data + fontType = cv2.FONT_HERSHEY_TRIPLEX + + # Manually normalize and colorize depth frame + depth_8bit = cv2.normalize(packet.frame, None, 0, 255, cv2.NORM_MINMAX) + depth_8bit = depth_8bit.astype('uint8') + depthFrameColor = cv2.applyColorMap(depth_8bit, cv2.COLORMAP_JET) + + for depthData in slc_data: + roi = depthData.config.roi + roi = roi.denormalize(width=depthFrameColor.shape[1], height=depthFrameColor.shape[0]) + + xmin = int(roi.topLeft().x) + ymin = int(roi.topLeft().y) + xmax = int(roi.bottomRight().x) + ymax = int(roi.bottomRight().y) + + coords = depthData.spatialCoordinates + distance = math.sqrt(coords.x ** 2 + coords.y ** 2 + coords.z ** 2) + + if distance == 0: # Invalid + continue + + if distance < CRITICAL: + color = (0, 0, 255) + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, thickness=4) + cv2.putText(depthFrameColor, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color) + elif distance < WARNING: + color = (0, 140, 255) + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, thickness=2) + cv2.putText(depthFrameColor, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color) + + cv2.imshow('0_depth', depthFrameColor) + +with OakCamera() as oak: + stereo = oak.create_stereo('720p') + # We don't need high fill rate, just very accurate depth, that's why we enable some filters, and + # set the confidence threshold to 50 + config = stereo.node.initialConfig.get() + config.postProcessing.brightnessFilter.minBrightness = 0 + config.postProcessing.brightnessFilter.maxBrightness = 255 + stereo.node.initialConfig.set(config) + stereo.config_postprocessing(colorize=StereoColor.GRAY, colormap=cv2.COLORMAP_JET) + stereo.config_stereo(confidence=50, lr_check=True, extended=True) + + oak.visualize([stereo], fps=True, callback=cb) + + slc = oak.pipeline.create(dai.node.SpatialLocationCalculator) + for x in range(15): + for y in range(9): + config = dai.SpatialLocationCalculatorConfigData() + config.depthThresholds.lowerThreshold = 200 + config.depthThresholds.upperThreshold = 10000 + config.roi = dai.Rect(dai.Point2f((x+0.5)*0.0625, (y+0.5)*0.1), dai.Point2f((x+1.5)*0.0625, (y+1.5)*0.1)) + # TODO: change from median to 10th percentile once supported + config.calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MEDIAN + slc.initialConfig.addROI(config) + + stereo.depth.link(slc.inputDepth) + + slc_out = oak.pipeline.create(dai.node.XLinkOut) + slc_out.setStreamName('slc') + slc.out.link(slc_out.input) + + oak.start() # Start the pipeline (upload it to the OAK) + + q = oak.device.getOutputQueue('slc') # Create output queue after calling start() + while oak.running(): + if q.has(): + slc_data = q.get().getSpatialLocations() + oak.poll() diff --git a/obstacle_avoidance/SpatialDetection/nexus.py b/obstacle_avoidance/SpatialDetection/nexus.py new file mode 100644 index 00000000..72d9da67 --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/nexus.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import cv2 +import depthai as dai +import numpy as np +import time +import math + +''' +Spatial Tiny-yolo example + Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map. + Can be used for tiny-yolo-v3 or tiny-yolo-v4 networks +''' +# User-defined constants +WARNING = 1000 # 1m, orange +CRITICAL = 500 # 50cm, red +fontType = cv2.FONT_HERSHEY_TRIPLEX + +# Get argument first +nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute()) +if 1 < len(sys.argv): + arg = sys.argv[1] + if arg == "yolo3": + nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute()) + elif arg == "yolo4": + nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute()) + else: + nnBlobPath = arg +else: + print("Using Tiny YoloV4 model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'") + +if not Path(nnBlobPath).exists(): + import sys + raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') + +# Tiny yolo v3/4 label texts +labelMap = [ + "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", + "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", + "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", + "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", + "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", + "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", + "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", + "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush" +] + +syncNN = True + +# Create pipeline +pipeline = dai.Pipeline() + +# Define sources and outputs +camRgb = pipeline.create(dai.node.ColorCamera) +spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork) +monoLeft = pipeline.create(dai.node.MonoCamera) +monoRight = pipeline.create(dai.node.MonoCamera) +stereo = pipeline.create(dai.node.StereoDepth) +nnNetworkOut = pipeline.create(dai.node.XLinkOut) + +xoutRgb = pipeline.create(dai.node.XLinkOut) +xoutNN = pipeline.create(dai.node.XLinkOut) +xoutDepth = pipeline.create(dai.node.XLinkOut) + +xoutRgb.setStreamName("rgb") +xoutNN.setStreamName("detections") +xoutDepth.setStreamName("depth") +nnNetworkOut.setStreamName("nnNetwork") + +# Properties +camRgb.setPreviewSize(416, 416) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) +camRgb.setInterleaved(False) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) + +monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoLeft.setCamera("left") +monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoRight.setCamera("right") + +# setting node configs +stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) +# Align depth map to the perspective of RGB camera, on which inference is done +stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A) +stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight()) +stereo.setSubpixel(True) + +spatialDetectionNetwork.setBlobPath(nnBlobPath) +spatialDetectionNetwork.setConfidenceThreshold(0.5) +spatialDetectionNetwork.input.setBlocking(False) +spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) +spatialDetectionNetwork.setDepthLowerThreshold(100) +spatialDetectionNetwork.setDepthUpperThreshold(5000) + +# Yolo specific parameters +spatialDetectionNetwork.setNumClasses(80) +spatialDetectionNetwork.setCoordinateSize(4) +spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319]) +spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5] }) +spatialDetectionNetwork.setIouThreshold(0.5) + +# Linking +monoLeft.out.link(stereo.left) +monoRight.out.link(stereo.right) + +camRgb.preview.link(spatialDetectionNetwork.input) +if syncNN: + spatialDetectionNetwork.passthrough.link(xoutRgb.input) +else: + camRgb.preview.link(xoutRgb.input) + +spatialDetectionNetwork.out.link(xoutNN.input) + +stereo.depth.link(spatialDetectionNetwork.inputDepth) +spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input) +spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input) + +# Connect to device and start pipeline +with dai.Device(pipeline) as device: + + # Output queues will be used to get the rgb frames and nn data from the outputs defined above + previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) + detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False) + depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) + networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False) + + startTime = time.monotonic() + counter = 0 + fps = 0 + color = (255, 255, 255) + printOutputLayersOnce = True + + while True: + inPreview = previewQueue.get() + inDet = detectionNNQueue.get() + depth = depthQueue.get() + inNN = networkQueue.get() + + if printOutputLayersOnce: + toPrint = 'Output layer names:' + for ten in inNN.getAllLayerNames(): + toPrint = f'{toPrint} {ten},' + print(toPrint) + printOutputLayersOnce = False + + frame = inPreview.getCvFrame() + depthFrame = depth.getFrame() # depthFrame values are in millimeters + + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 # Set a default minimum depth value when all elements are zero + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + counter+=1 + current_time = time.monotonic() + if (current_time - startTime) > 1 : + fps = counter / (current_time - startTime) + counter = 0 + startTime = current_time + + detections = inDet.detections + + # If the frame is available, draw bounding boxes on it and show the frame + height = frame.shape[0] + width = frame.shape[1] + for detection in detections: + roiData = detection.boundingBoxMapping + roi = roiData.roi + roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0]) + topLeft = roi.topLeft() + bottomRight = roi.bottomRight() + xmin = int(topLeft.x) + ymin = int(topLeft.y) + xmax = int(bottomRight.x) + ymax = int(bottomRight.y) + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1) + + # Denormalize bounding box + x1 = int(detection.xmin * width) + x2 = int(detection.xmax * width) + y1 = int(detection.ymin * height) + y2 = int(detection.ymax * height) + try: + label = labelMap[detection.label] + except: + label = detection.label + cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + #cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + #cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + #cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + + #cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX) + coords = detection.spatialCoordinates + distance = math.sqrt(coords.x ** 2 + coords.y ** 2 + coords.z ** 2) + + if distance == 0: # Invalid + continue + + if distance < CRITICAL: + color = (0, 0, 255) + cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX) + cv2.putText(frame, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color) + print(f"STOP! object within critical bounds") + elif distance < WARNING: + if coords.x > 0: + print("Turn left: obstables on right") + else: + print("Turn right: obstables on left") + color = (0, 140, 255) + cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX) + cv2.putText(frame, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.5, color) + + cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) + cv2.imshow("depth", depthFrameColor) + cv2.imshow("rgb", frame) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/obstacle_avoidance/SpatialDetection/spatial_calculator_multi_roi.py b/obstacle_avoidance/SpatialDetection/spatial_calculator_multi_roi.py new file mode 100644 index 00000000..c813f17c --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/spatial_calculator_multi_roi.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +import cv2 +import depthai as dai +import math +import numpy as np + +# Create pipeline +pipeline = dai.Pipeline() + +# Define sources and outputs +monoLeft = pipeline.create(dai.node.MonoCamera) +monoRight = pipeline.create(dai.node.MonoCamera) +stereo = pipeline.create(dai.node.StereoDepth) +spatialLocationCalculator = pipeline.create(dai.node.SpatialLocationCalculator) + +xoutDepth = pipeline.create(dai.node.XLinkOut) +xoutSpatialData = pipeline.create(dai.node.XLinkOut) +xinSpatialCalcConfig = pipeline.create(dai.node.XLinkIn) + +xoutDepth.setStreamName("depth") +xoutSpatialData.setStreamName("spatialData") +xinSpatialCalcConfig.setStreamName("spatialCalcConfig") + +# Properties +monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoLeft.setCamera("left") +monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoRight.setCamera("right") + +stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) +stereo.setLeftRightCheck(True) +stereo.setSubpixel(True) +spatialLocationCalculator.inputConfig.setWaitForMessage(False) + +# Create 10 ROIs +for i in range(10): + config = dai.SpatialLocationCalculatorConfigData() + config.depthThresholds.lowerThreshold = 200 + config.depthThresholds.upperThreshold = 10000 + config.roi = dai.Rect(dai.Point2f(i*0.1, 0.45), dai.Point2f((i+1)*0.1, 0.55)) + spatialLocationCalculator.initialConfig.addROI(config) + +# Linking +monoLeft.out.link(stereo.left) +monoRight.out.link(stereo.right) + +spatialLocationCalculator.passthroughDepth.link(xoutDepth.input) +stereo.depth.link(spatialLocationCalculator.inputDepth) + +spatialLocationCalculator.out.link(xoutSpatialData.input) +xinSpatialCalcConfig.out.link(spatialLocationCalculator.inputConfig) + +# Connect to device and start pipeline +with dai.Device(pipeline) as device: + device.setIrLaserDotProjectorBrightness(1000) + + # Output queue will be used to get the depth frames from the outputs defined above + depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) + spatialCalcQueue = device.getOutputQueue(name="spatialData", maxSize=4, blocking=False) + color = (0,200,40) + fontType = cv2.FONT_HERSHEY_TRIPLEX + + while True: + inDepth = depthQueue.get() # Blocking call, will wait until a new data has arrived + + depthFrame = inDepth.getFrame() # depthFrame values are in millimeters + + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 # Set a default minimum depth value when all elements are zero + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + spatialData = spatialCalcQueue.get().getSpatialLocations() + for depthData in spatialData: + roi = depthData.config.roi + roi = roi.denormalize(width=depthFrameColor.shape[1], height=depthFrameColor.shape[0]) + + xmin = int(roi.topLeft().x) + ymin = int(roi.topLeft().y) + xmax = int(roi.bottomRight().x) + ymax = int(roi.bottomRight().y) + + coords = depthData.spatialCoordinates + distance = math.sqrt(coords.x ** 2 + coords.y ** 2 + coords.z ** 2) + + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, thickness=2) + cv2.putText(depthFrameColor, "{:.1f}m".format(distance/1000), (xmin + 10, ymin + 20), fontType, 0.6, color) + # Show the frame + cv2.imshow("depth", depthFrameColor) + + if cv2.waitKey(1) == ord('q'): + break \ No newline at end of file diff --git a/obstacle_avoidance/SpatialDetection/spatial_location_calculator.py b/obstacle_avoidance/SpatialDetection/spatial_location_calculator.py new file mode 100644 index 00000000..89260cd6 --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/spatial_location_calculator.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +import cv2 +import depthai as dai +import numpy as np +stepSize = 0.05 + +newConfig = False + +# Create pipeline +pipeline = dai.Pipeline() + +# Define sources and outputs +monoLeft = pipeline.create(dai.node.MonoCamera) +monoRight = pipeline.create(dai.node.MonoCamera) +stereo = pipeline.create(dai.node.StereoDepth) +spatialLocationCalculator = pipeline.create(dai.node.SpatialLocationCalculator) + +xoutDepth = pipeline.create(dai.node.XLinkOut) +xoutSpatialData = pipeline.create(dai.node.XLinkOut) +xinSpatialCalcConfig = pipeline.create(dai.node.XLinkIn) + +xoutDepth.setStreamName("depth") +xoutSpatialData.setStreamName("spatialData") +xinSpatialCalcConfig.setStreamName("spatialCalcConfig") + +# Properties +monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoLeft.setCamera("left") +monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoRight.setCamera("right") + +stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) +stereo.setLeftRightCheck(True) +stereo.setSubpixel(True) + +# Config +topLeft = dai.Point2f(0.4, 0.4) +bottomRight = dai.Point2f(0.6, 0.6) + +config = dai.SpatialLocationCalculatorConfigData() +config.depthThresholds.lowerThreshold = 100 +config.depthThresholds.upperThreshold = 10000 +calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MEDIAN +config.roi = dai.Rect(topLeft, bottomRight) + +spatialLocationCalculator.inputConfig.setWaitForMessage(False) +spatialLocationCalculator.initialConfig.addROI(config) + +# Linking +monoLeft.out.link(stereo.left) +monoRight.out.link(stereo.right) + +spatialLocationCalculator.passthroughDepth.link(xoutDepth.input) +stereo.depth.link(spatialLocationCalculator.inputDepth) + +spatialLocationCalculator.out.link(xoutSpatialData.input) +xinSpatialCalcConfig.out.link(spatialLocationCalculator.inputConfig) + +# Connect to device and start pipeline +with dai.Device(pipeline) as device: + + # Output queue will be used to get the depth frames from the outputs defined above + depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) + spatialCalcQueue = device.getOutputQueue(name="spatialData", maxSize=4, blocking=False) + spatialCalcConfigInQueue = device.getInputQueue("spatialCalcConfig") + + color = (255, 255, 255) + + print("Use WASD keys to move ROI!") + + while True: + inDepth = depthQueue.get() # Blocking call, will wait until a new data has arrived + + depthFrame = inDepth.getFrame() # depthFrame values are in millimeters + + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 # Set a default minimum depth value when all elements are zero + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + spatialData = spatialCalcQueue.get().getSpatialLocations() + for depthData in spatialData: + roi = depthData.config.roi + roi = roi.denormalize(width=depthFrameColor.shape[1], height=depthFrameColor.shape[0]) + xmin = int(roi.topLeft().x) + ymin = int(roi.topLeft().y) + xmax = int(roi.bottomRight().x) + ymax = int(roi.bottomRight().y) + + depthMin = depthData.depthMin + depthMax = depthData.depthMax + + fontType = cv2.FONT_HERSHEY_TRIPLEX + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1) + cv2.putText(depthFrameColor, f"X: {int(depthData.spatialCoordinates.x)} mm", (xmin + 10, ymin + 20), fontType, 0.5, color) + cv2.putText(depthFrameColor, f"Y: {int(depthData.spatialCoordinates.y)} mm", (xmin + 10, ymin + 35), fontType, 0.5, color) + cv2.putText(depthFrameColor, f"Z: {int(depthData.spatialCoordinates.z)} mm", (xmin + 10, ymin + 50), fontType, 0.5, color) + # Show the frame + cv2.imshow("depth", depthFrameColor) + + key = cv2.waitKey(1) + if key == ord('q'): + break + elif key == ord('w'): + if topLeft.y - stepSize >= 0: + topLeft.y -= stepSize + bottomRight.y -= stepSize + newConfig = True + elif key == ord('a'): + if topLeft.x - stepSize >= 0: + topLeft.x -= stepSize + bottomRight.x -= stepSize + newConfig = True + elif key == ord('s'): + if bottomRight.y + stepSize <= 1: + topLeft.y += stepSize + bottomRight.y += stepSize + newConfig = True + elif key == ord('d'): + if bottomRight.x + stepSize <= 1: + topLeft.x += stepSize + bottomRight.x += stepSize + newConfig = True + elif key == ord('1'): + calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MEAN + print('Switching calculation algorithm to MEAN!') + newConfig = True + elif key == ord('2'): + calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MIN + print('Switching calculation algorithm to MIN!') + newConfig = True + elif key == ord('3'): + calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MAX + print('Switching calculation algorithm to MAX!') + newConfig = True + elif key == ord('4'): + calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MODE + print('Switching calculation algorithm to MODE!') + newConfig = True + elif key == ord('5'): + calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MEDIAN + print('Switching calculation algorithm to MEDIAN!') + newConfig = True + + if newConfig: + config.roi = dai.Rect(topLeft, bottomRight) + config.calculationAlgorithm = calculationAlgorithm + cfg = dai.SpatialLocationCalculatorConfig() + cfg.addROI(config) + spatialCalcConfigInQueue.send(cfg) + newConfig = False \ No newline at end of file diff --git a/obstacle_avoidance/SpatialDetection/spatial_mobilenet.py b/obstacle_avoidance/SpatialDetection/spatial_mobilenet.py new file mode 100644 index 00000000..2c19db7d --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/spatial_mobilenet.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import cv2 +import depthai as dai +import numpy as np +import time + +''' +Spatial detection network demo. + Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map. +''' + +# Get argument first +nnBlobPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute()) +if len(sys.argv) > 1: + nnBlobPath = sys.argv[1] + +if not Path(nnBlobPath).exists(): + import sys + raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') + +# MobilenetSSD label texts +labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +syncNN = True + +# Create pipeline +pipeline = dai.Pipeline() + +# Define sources and outputs +camRgb = pipeline.create(dai.node.ColorCamera) +spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) +monoLeft = pipeline.create(dai.node.MonoCamera) +monoRight = pipeline.create(dai.node.MonoCamera) +stereo = pipeline.create(dai.node.StereoDepth) + +xoutRgb = pipeline.create(dai.node.XLinkOut) +xoutNN = pipeline.create(dai.node.XLinkOut) +xoutDepth = pipeline.create(dai.node.XLinkOut) + +xoutRgb.setStreamName("rgb") +xoutNN.setStreamName("detections") +xoutDepth.setStreamName("depth") + +# Properties +camRgb.setPreviewSize(300, 300) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) +camRgb.setInterleaved(False) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) + +monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoLeft.setCamera("left") +monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoRight.setCamera("right") + +# Setting node configs +stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) +# Align depth map to the perspective of RGB camera, on which inference is done +stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A) +stereo.setSubpixel(True) +stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight()) + +spatialDetectionNetwork.setBlobPath(nnBlobPath) +spatialDetectionNetwork.setConfidenceThreshold(0.5) +spatialDetectionNetwork.input.setBlocking(False) +spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) +spatialDetectionNetwork.setDepthLowerThreshold(100) +spatialDetectionNetwork.setDepthUpperThreshold(5000) + +# Linking +monoLeft.out.link(stereo.left) +monoRight.out.link(stereo.right) + +camRgb.preview.link(spatialDetectionNetwork.input) +if syncNN: + spatialDetectionNetwork.passthrough.link(xoutRgb.input) +else: + camRgb.preview.link(xoutRgb.input) + +spatialDetectionNetwork.out.link(xoutNN.input) + +stereo.depth.link(spatialDetectionNetwork.inputDepth) +spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input) + +# Connect to device and start pipeline +with dai.Device(pipeline) as device: + + # Output queues will be used to get the rgb frames and nn data from the outputs defined above + previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) + detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False) + depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) + + startTime = time.monotonic() + counter = 0 + fps = 0 + color = (255, 255, 255) + + while True: + inPreview = previewQueue.get() + inDet = detectionNNQueue.get() + depth = depthQueue.get() + + counter+=1 + current_time = time.monotonic() + if (current_time - startTime) > 1 : + fps = counter / (current_time - startTime) + counter = 0 + startTime = current_time + + frame = inPreview.getCvFrame() + + depthFrame = depth.getFrame() # depthFrame values are in millimeters + + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 # Set a default minimum depth value when all elements are zero + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + detections = inDet.detections + + # If the frame is available, draw bounding boxes on it and show the frame + height = frame.shape[0] + width = frame.shape[1] + for detection in detections: + roiData = detection.boundingBoxMapping + roi = roiData.roi + roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0]) + topLeft = roi.topLeft() + bottomRight = roi.bottomRight() + xmin = int(topLeft.x) + ymin = int(topLeft.y) + xmax = int(bottomRight.x) + ymax = int(bottomRight.y) + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1) + + # Denormalize bounding box + x1 = int(detection.xmin * width) + x2 = int(detection.xmax * width) + y1 = int(detection.ymin * height) + y2 = int(detection.ymax * height) + try: + label = labelMap[detection.label] + except: + label = detection.label + cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + + cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX) + + cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255)) + cv2.imshow("depth", depthFrameColor) + cv2.imshow("preview", frame) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/obstacle_avoidance/SpatialDetection/spatial_mobilenet_mono.py b/obstacle_avoidance/SpatialDetection/spatial_mobilenet_mono.py new file mode 100644 index 00000000..be9e80fe --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/spatial_mobilenet_mono.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import cv2 +import depthai as dai +import numpy as np +import time + +''' +Mobilenet SSD device side decoding demo + The "mobilenet-ssd" model is a Single-Shot multibox Detection (SSD) network intended + to perform object detection. This model is implemented using the Caffe* framework. + For details about this model, check out the repository . +''' + +# Get argument first +nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute()) +if len(sys.argv) > 1: + nnPath = sys.argv[1] + +if not Path(nnPath).exists(): + import sys + raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') + +# MobilenetSSD label texts +labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +syncNN = True + +# Create pipeline +pipeline = dai.Pipeline() + +# Define sources and outputs +monoLeft = pipeline.create(dai.node.MonoCamera) +monoRight = pipeline.create(dai.node.MonoCamera) +stereo = pipeline.create(dai.node.StereoDepth) +spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) +imageManip = pipeline.create(dai.node.ImageManip) + +xoutManip = pipeline.create(dai.node.XLinkOut) +nnOut = pipeline.create(dai.node.XLinkOut) +xoutDepth = pipeline.create(dai.node.XLinkOut) + +xoutManip.setStreamName("right") +nnOut.setStreamName("detections") +xoutDepth.setStreamName("depth") + +# Properties +imageManip.initialConfig.setResize(300, 300) +# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) +imageManip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p) + +monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoLeft.setCamera("left") +monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoRight.setCamera("right") + +# StereoDepth +stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) +stereo.setSubpixel(True) + +# Define a neural network that will make predictions based on the source frames +spatialDetectionNetwork.setConfidenceThreshold(0.5) +spatialDetectionNetwork.setBlobPath(nnPath) +spatialDetectionNetwork.input.setBlocking(False) +spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) +spatialDetectionNetwork.setDepthLowerThreshold(100) +spatialDetectionNetwork.setDepthUpperThreshold(5000) + +# Linking +monoLeft.out.link(stereo.left) +monoRight.out.link(stereo.right) + +imageManip.out.link(spatialDetectionNetwork.input) +if syncNN: + spatialDetectionNetwork.passthrough.link(xoutManip.input) +else: + imageManip.out.link(xoutManip.input) + +spatialDetectionNetwork.out.link(nnOut.input) + +stereo.rectifiedRight.link(imageManip.inputImage) +stereo.depth.link(spatialDetectionNetwork.inputDepth) +spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input) + +# Connect to device and start pipeline +with dai.Device(pipeline) as device: + + # Output queues will be used to get the rgb frames and nn data from the outputs defined above + previewQueue = device.getOutputQueue(name="right", maxSize=4, blocking=False) + detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False) + depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) + + rectifiedRight = None + detections = [] + + startTime = time.monotonic() + counter = 0 + fps = 0 + color = (255, 255, 255) + + while True: + inRectified = previewQueue.get() + inDet = detectionNNQueue.get() + inDepth = depthQueue.get() + + counter += 1 + currentTime = time.monotonic() + if (currentTime - startTime) > 1: + fps = counter / (currentTime - startTime) + counter = 0 + startTime = currentTime + + rectifiedRight = inRectified.getCvFrame() + + depthFrame = inDepth.getFrame() # depthFrame values are in millimeters + + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 # Set a default minimum depth value when all elements are zero + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + detections = inDet.detections + + # If the rectifiedRight is available, draw bounding boxes on it and show the rectifiedRight + height = rectifiedRight.shape[0] + width = rectifiedRight.shape[1] + for detection in detections: + roiData = detection.boundingBoxMapping + roi = roiData.roi + roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0]) + topLeft = roi.topLeft() + bottomRight = roi.bottomRight() + xmin = int(topLeft.x) + ymin = int(topLeft.y) + xmax = int(bottomRight.x) + ymax = int(bottomRight.y) + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX) + + # Denormalize bounding box + x1 = int(detection.xmin * width) + x2 = int(detection.xmax * width) + y1 = int(detection.ymin * height) + y2 = int(detection.ymax * height) + + try: + label = labelMap[detection.label] + except: + label = detection.label + + cv2.putText(rectifiedRight, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(rectifiedRight, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(rectifiedRight, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(rectifiedRight, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(rectifiedRight, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + + cv2.rectangle(rectifiedRight, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX) + + cv2.putText(rectifiedRight, "NN fps: {:.2f}".format(fps), (2, rectifiedRight.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) + cv2.imshow("depth", depthFrameColor) + cv2.imshow("rectified right", rectifiedRight) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/obstacle_avoidance/SpatialDetection/spatial_tiny_yolo.py b/obstacle_avoidance/SpatialDetection/spatial_tiny_yolo.py new file mode 100644 index 00000000..ba4faad9 --- /dev/null +++ b/obstacle_avoidance/SpatialDetection/spatial_tiny_yolo.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import cv2 +import depthai as dai +import numpy as np +import time + +''' +Spatial Tiny-yolo example + Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map. + Can be used for tiny-yolo-v3 or tiny-yolo-v4 networks +''' + +# Get argument first +nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute()) +if 1 < len(sys.argv): + arg = sys.argv[1] + if arg == "yolo3": + nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute()) + elif arg == "yolo4": + nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute()) + else: + nnBlobPath = arg +else: + print("Using Tiny YoloV4 model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'") + +if not Path(nnBlobPath).exists(): + import sys + raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') + +# Tiny yolo v3/4 label texts +labelMap = [ + "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", + "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", + "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", + "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", + "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", + "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", + "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", + "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush" +] + +syncNN = True + +# Create pipeline +pipeline = dai.Pipeline() + +# Define sources and outputs +camRgb = pipeline.create(dai.node.ColorCamera) +spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork) +monoLeft = pipeline.create(dai.node.MonoCamera) +monoRight = pipeline.create(dai.node.MonoCamera) +stereo = pipeline.create(dai.node.StereoDepth) +nnNetworkOut = pipeline.create(dai.node.XLinkOut) + +xoutRgb = pipeline.create(dai.node.XLinkOut) +xoutNN = pipeline.create(dai.node.XLinkOut) +xoutDepth = pipeline.create(dai.node.XLinkOut) + +xoutRgb.setStreamName("rgb") +xoutNN.setStreamName("detections") +xoutDepth.setStreamName("depth") +nnNetworkOut.setStreamName("nnNetwork") + +# Properties +camRgb.setPreviewSize(416, 416) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) +camRgb.setInterleaved(False) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) + +monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoLeft.setCamera("left") +monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) +monoRight.setCamera("right") + +# setting node configs +stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) +# Align depth map to the perspective of RGB camera, on which inference is done +stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A) +stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight()) +stereo.setSubpixel(True) + +spatialDetectionNetwork.setBlobPath(nnBlobPath) +spatialDetectionNetwork.setConfidenceThreshold(0.5) +spatialDetectionNetwork.input.setBlocking(False) +spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) +spatialDetectionNetwork.setDepthLowerThreshold(100) +spatialDetectionNetwork.setDepthUpperThreshold(5000) + +# Yolo specific parameters +spatialDetectionNetwork.setNumClasses(80) +spatialDetectionNetwork.setCoordinateSize(4) +spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319]) +spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5] }) +spatialDetectionNetwork.setIouThreshold(0.5) + +# Linking +monoLeft.out.link(stereo.left) +monoRight.out.link(stereo.right) + +camRgb.preview.link(spatialDetectionNetwork.input) +if syncNN: + spatialDetectionNetwork.passthrough.link(xoutRgb.input) +else: + camRgb.preview.link(xoutRgb.input) + +spatialDetectionNetwork.out.link(xoutNN.input) + +stereo.depth.link(spatialDetectionNetwork.inputDepth) +spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input) +spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input) + +# Connect to device and start pipeline +with dai.Device(pipeline) as device: + + # Output queues will be used to get the rgb frames and nn data from the outputs defined above + previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) + detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False) + depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) + networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False) + + startTime = time.monotonic() + counter = 0 + fps = 0 + color = (255, 255, 255) + printOutputLayersOnce = True + + while True: + inPreview = previewQueue.get() + inDet = detectionNNQueue.get() + depth = depthQueue.get() + inNN = networkQueue.get() + + if printOutputLayersOnce: + toPrint = 'Output layer names:' + for ten in inNN.getAllLayerNames(): + toPrint = f'{toPrint} {ten},' + print(toPrint) + printOutputLayersOnce = False + + frame = inPreview.getCvFrame() + depthFrame = depth.getFrame() # depthFrame values are in millimeters + + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 # Set a default minimum depth value when all elements are zero + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + counter+=1 + current_time = time.monotonic() + if (current_time - startTime) > 1 : + fps = counter / (current_time - startTime) + counter = 0 + startTime = current_time + + detections = inDet.detections + + # If the frame is available, draw bounding boxes on it and show the frame + height = frame.shape[0] + width = frame.shape[1] + for detection in detections: + roiData = detection.boundingBoxMapping + roi = roiData.roi + roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0]) + topLeft = roi.topLeft() + bottomRight = roi.bottomRight() + xmin = int(topLeft.x) + ymin = int(topLeft.y) + xmax = int(bottomRight.x) + ymax = int(bottomRight.y) + cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1) + + # Denormalize bounding box + x1 = int(detection.xmin * width) + x2 = int(detection.xmax * width) + y1 = int(detection.ymin * height) + y2 = int(detection.ymax * height) + try: + label = labelMap[detection.label] + except: + label = detection.label + cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + + cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX) + + cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color) + cv2.imshow("depth", depthFrameColor) + cv2.imshow("rgb", frame) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/obstacle_avoidance/path_adjustment.py b/obstacle_avoidance/path_adjustment.py new file mode 100644 index 00000000..c52414f9 --- /dev/null +++ b/obstacle_avoidance/path_adjustment.py @@ -0,0 +1,92 @@ +''' +take in 2d array of data (currently 9x15) +if obstacle detected in any of boxes that aren't detecting the ground (ex: top 2/3 of boxes), then: +compute x and y position of said object (x = lateral distance from top-down perspective, y = distance ahead) + note: (x^2 + y^2 = d^2), where d is value in a box + to compute x and y, account for FOV of camera, determine angle(x) and angle(y) at each box (must be precomputed), + then use d to determine exact distances +then, given the position of the obstacle, the turning radius of the robot and the size of the robot, +determine the best possible path that minimizes the time required to get to the endpoint while +maximizing the distance from the robot to the obstacle + +edge cases to consider in testing: +- very wide object; robot may try to take shortest path to avoid it repeatedly, resulting in going + over the same area repeatedly +- very long object; robot may avoid the head of it successfully, but then crash into its back if + unaware of how long the object truly is +- moving object; may require predicting the path of the object and avoiding it w.r.t. where it is in time +''' + +GRID_WIDTH = 15 +GRID_HEIGHT = 9 +GROUND_ROWS_TO_IGNORE = 1 # potentially important if camera is low to ground, adjust later with testing +CRITICAL_DISTANCE = 2000 # 2 meters + +# pretend this is the publisher node that the steering node will subscribe to +def get_obstacle_list(spatial_data: list) -> list: + """ + Filters the 135-point spatial data list to find true, non-ground obstacles. + note: the list is a 1d array because of the depthai function call q.get().getSpatialLocations() + + Returns: + list: A list of (x, z) tuples for each detected obstacle, + where x is lateral distance and z is forward distance in mm. + """ + obstacles = [] + + if not spatial_data or len(spatial_data) != (GRID_WIDTH * GRID_HEIGHT): + return [] + + for i, loc in enumerate(spatial_data): + coords = loc.spatialCoordinates + + # invalid data + if coords.z == 0: + continue + + row = i // GRID_WIDTH + + if row >= (GRID_HEIGHT - GROUND_ROWS_TO_IGNORE): + continue + + if coords.z < CRITICAL_DISTANCE: + obstacle_pos_xz = (coords.x, coords.z) + obstacles.append(obstacle_pos_xz) + + return obstacles + +''' +things to add when in ROS (next thing to do): + +update curvature in pure pursuit algo to adjust for and avoid obstacles +according to gemini this is correct code, yet to test: + +# 1. Calculate the base curvature to follow the path +path_curv = curvature(la, self.pos, angle, LOOKAHEAD) + +# 2. Calculate the avoidance curvature +avoid_curv = 0.0 +K_AVOID = 1.5 # can adjust +AVOID_DISTANCE_X = 10.0 # Max distance ahead to react +AVOID_DISTANCE_Y = 2.0 # Max lateral distance to react + +if self.obstacle_pos is not None: + obs_x, obs_y = self.obstacle_pos + + # Only react if obstacle is within our "activation zone" + if 0.1 < obs_x < AVOID_DISTANCE_X and abs(obs_y) < AVOID_DISTANCE_Y: + + # Use the same logic as pure pursuit: kappa = 2*eta / L^2 + # Here, lateral offset (eta) = obs_y + # Lookahead (L) = obs_x + # We add a negative sign to steer *away* from the offset + # We use 1/x^2, so it's very strong when close + avoid_curv = -K_AVOID * (2 * obs_y / (obs_x**2)) + + # Important: Consume the obstacle data so we don't + # react to stale information. + self.obstacle_pos = None + +# 3. Combine the curvatures +curv = path_curv + avoid_curv +''' \ No newline at end of file diff --git a/obstacle_avoidance/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob b/obstacle_avoidance/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob new file mode 100644 index 00000000..bb9a4d6b Binary files /dev/null and b/obstacle_avoidance/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob differ diff --git a/obstacle_avoidance/yolo.py b/obstacle_avoidance/yolo.py new file mode 100644 index 00000000..9f6f09bd --- /dev/null +++ b/obstacle_avoidance/yolo.py @@ -0,0 +1,64 @@ +from ultralytics import YOLO +import cv2 +import math +# start webcam +cap = cv2.VideoCapture(0) +cap.set(3, 640) +cap.set(4, 480) + +# model +model = YOLO("yolo-Weights/yolov8n.pt") + +# object classes +classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", + "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", + "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", + "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", + "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush" + ] + + +while True: + success, img = cap.read() + results = model(img, stream=True) + + # coordinates + for r in results: + boxes = r.boxes + + for box in boxes: + # bounding box + x1, y1, x2, y2 = box.xyxy[0] + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values + + # put box in cam + cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3) + + # confidence + confidence = math.ceil((box.conf[0]*100))/100 + print("Confidence --->",confidence) + + # class name + cls = int(box.cls[0]) + print("Class name -->", classNames[cls]) + + # object details + org = [x1, y1] + font = cv2.FONT_HERSHEY_SIMPLEX + fontScale = 1 + color = (255, 0, 0) + thickness = 2 + + cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness) + + cv2.imshow('Webcam', img) + if cv2.waitKey(1) == ord('q'): + break + +cap.release() +cv2.destroyAllWindows() \ No newline at end of file diff --git a/obstacle_avoidance/yolo_example.py b/obstacle_avoidance/yolo_example.py new file mode 100644 index 00000000..117bcd39 --- /dev/null +++ b/obstacle_avoidance/yolo_example.py @@ -0,0 +1,64 @@ +from ultralytics import YOLO +import cv2 +import math +# start webcam +cap = cv2.VideoCapture(1) +cap.set(3, 640) +cap.set(4, 480) + +# model +model = YOLO("yolo-Weights/yolov8n.pt") + +# object classes +classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", + "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", + "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", + "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", + "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush" + ] + + +while True: + success, img = cap.read() + results = model(img, stream=True) + + # coordinates + for r in results: + boxes = r.boxes + + for box in boxes: + # bounding box + x1, y1, x2, y2 = box.xyxy[0] + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values + + # put box in cam + cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3) + + # confidence + confidence = math.ceil((box.conf[0]*100))/100 + print("Confidence --->",confidence) + + # class name + cls = int(box.cls[0]) + print("Class name -->", classNames[cls]) + + # object details + org = [x1, y1] + font = cv2.FONT_HERSHEY_SIMPLEX + fontScale = 1 + color = (255, 0, 0) + thickness = 2 + + cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness) + + cv2.imshow('Webcam', img) + if cv2.waitKey(1) == ord('q'): + break + +cap.release() +cv2.destroyAllWindows() \ No newline at end of file