diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c59584e --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +notes +prototypes +*.png +yolo +camera1photos +camera2photos +trained_model +__pycache__ diff --git a/camera_calibrator.py b/camera_calibrator.py new file mode 100755 index 0000000..00f58bc --- /dev/null +++ b/camera_calibrator.py @@ -0,0 +1,224 @@ +import cv2 +import numpy as np +import os +import glob + +CHESSBOARD_HEIGHT = 6 # TODO: change if needed +CHESSBOARD_WIDTH = 7 # TODO: change if needed +CHESSBOARD_DIM = (CHESSBOARD_WIDTH, CHESSBOARD_HEIGHT) +MIN_IMAGES = 20 # the minimum amount of images needed to calibrate +CRITERIA = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) +PHOTO_DIRECTORY = "calibration_photos" + +# creates the world points in relation to the chessboard itself +objp = np.zeros(((CHESSBOARD_WIDTH * CHESSBOARD_HEIGHT), 3), np.float32) +objp[:,:2] = np.mgrid[0:CHESSBOARD_WIDTH, 0:CHESSBOARD_HEIGHT].T.reshape(-1,2) + +class CameraCalibrator: + + imgSize = None + + def __init__(self, camDescriptor): + self.descriptor = camDescriptor + self.objpoints = [] # array to store world points + self.imgpoints = [] # array to store 2d points + self.totalNumImages = 0 # total number of images found with the required pattern + # self.imgSize = None # size of image used to calibrate the camera + self.camera = None + + # components from calibrating the camera + self.retVal = None + self.cameraMatrix = None + self.distCoeff = None + self.rvecs = None + self.tvecs = None + self.reprojectionError = None + + self.photoDirectory = f"camera{self.descriptor}photos" + + """ + This function will initialize a directory for the photos that were able to find the required pattern in + """ + def initializePhotoDirectory(self): + + os.mkdir(self.photoDirectory) + + """ + Checks whether or not a directory exists for holding the photos that were able to find the required pattern in + """ + def checkExistingDirectory(self): + + pass + + # Initializes the camera with the specified descriptor + + def initializeCamera(self): + + if not self.camera: + self.camera = cv2.VideoCapture(self.descriptor, cv2.CAP_DSHOW) + + if not self.camera.isOpened(): + print(f'Unable to open camera {self.descriptor}') + exit() + # raise Exception("") + + + def findPatternCameras(self, img): + + ret, corners = cv2.findChessboardCorners(img, CHESSBOARD_DIM, None) + + # ret is True if the algorithm was able to find the corners in the frame, else try again. also if minimum number of images is not reached yet keep capturing pictures + if ret: + + self.totalNumImages += 1 + + print(f"{self.photoDirectory}/photo{self.totalNumImages}.png") + + # write image to directory + cv2.imwrite(f"{self.photoDirectory}/photo{self.totalNumImages}.png", img) + + print(f'Successfully captured, total: {self.totalNumImages}') + + CameraCalibrator.imgSize = img.shape + print(img.shape) + + # self.imgSize = img.shape # TODO: will this work? i mean it updates everytime + + self.objpoints.append(objp) + + # this basically improves the accuracy of the corners + corners2 = cv2.cornerSubPix(img, corners, (11, 11), (-1, -1), CRITERIA) + self.imgpoints.append(corners2) + + # just draws the corners found in the frame + cv2.drawChessboardCorners(img, CHESSBOARD_DIM, corners2, ret) + cv2.imshow("found pattern", img) + else: + print("Unsuccessful capture") + + def findPatternImages(self, img): + + ret, corners = cv2.findChessboardCorners(img, CHESSBOARD_DIM, None) + + # ret is True if the algorithm was able to find the corners in the frame, else try again. also if minimum number of images is not reached yet keep capturing pictures + if ret: + + self.totalNumImages += 1 + + print(f'Successfully captured, total: {self.totalNumImages}') + + CameraCalibrator.imgSize = img.shape + print(img.shape) + + # self.imgSize = img.shape # TODO: will this work? i mean it updates everytime + + self.objpoints.append(objp) + + # this basically improves the accuracy of the corners + corners2 = cv2.cornerSubPix(img, corners, (11, 11), (-1, -1), CRITERIA) + self.imgpoints.append(corners2) + + # just draws the corners found in the frame + cv2.drawChessboardCorners(img, CHESSBOARD_DIM, corners2, ret) + cv2.imshow("found pattern", img) + else: + print("Unsuccessful capture") + + def calculateReprojectionError(self): + mean_error = 0 + + # reproject each set of image points for each image + for i in range(len(self.objpoints)): + imgpoints2, _ = cv2.projectPoints(self.objpoints[i], self.rvecs[i], self.tvecs[i], self.cameraMatrix, self.distCoeff) + error = cv2.norm(imgpoints2, self.imgpoints[i], cv2.NORM_L2) + mean_error += error + + self.reprojectionError = (mean_error / len(self.objpoints)) + # print(f'Reprojection error for camera {self.descriptor} is {mean_error / len(self.objpoints)}') + + def printInternals(self): + print(f'Results from calibration for camera {self.descriptor}') + + print(f'ret: {self.retVal}') + print(f'Camera matrix: {self.cameraMatrix}') + print(f'Distortion coefficients: {self.distCoeff}') + print(f'Rotation vectors: {self.rvecs}') + print(f'Translation vectors: {self.tvecs}') + + print(f'Reprojection error: {self.reprojectionError}') + + def calibrateUsingCameras(self): + + while True: + + # read in the next frame + ret, frame = self.camera.read() + + if not ret: + print("Read unsuccessful") + + if self.totalNumImages >= MIN_IMAGES: + break + + # frame is inverted so flip TODO: is this right? should it be kept unflipped? + # frame = cv2.flip(frame, 1) + cv2.imshow("frame", frame) + + key = cv2.waitKey(1) + + # capture image + if key == ord('c'): + + # turn the image grayscale for finding the corners + capturedImage = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + + self.findPatternCameras(capturedImage) + + return capturedImage.shape[::-1] + + def calibrateUsingImages(self): + + # Note: this assumes the photo directory exists + + # walk through the directory and read the image + for img in glob.glob(f"{self.photoDirectory}/*.png"): + frame = cv2.imread(img) + + capturedImage = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + + self.findPatternImages(capturedImage) + # pass image to self.findPattern(image) + + return capturedImage.shape[::-1] + + + def calibrate(self): + + self.initializeCamera() + + if not os.path.exists(self.photoDirectory): + self.initializePhotoDirectory() + capturedImageShape = self.calibrateUsingCameras() + else: + capturedImageShape = self.calibrateUsingImages() + + # Note: when we get here we should have the minumum amount of required images + print("Minimum requirement reached") + + # calibrate the camera + ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(self.objpoints, self.imgpoints, capturedImageShape, None, None) + + self.retVal = ret + self.cameraMatrix = mtx + self.distCoeff = dist + self.rvecs = rvecs + self.tvecs = tvecs + + self.calculateReprojectionError() + + self.printInternals() + + # TODO: probably shouldn't release? + # self.camera.release() + cv2.destroyAllWindows() + diff --git a/camera_test.py b/camera_test.py new file mode 100755 index 0000000..0770548 --- /dev/null +++ b/camera_test.py @@ -0,0 +1,50 @@ +import cv2 + +# laptopCam = cv2.VideoCapture(0) +camLeft = cv2.VideoCapture(1, cv2.CAP_DSHOW) +camRight = cv2.VideoCapture(2, cv2.CAP_DSHOW) + +print("Opened cameras") + +while(True): + + if not camLeft: + print("Camera is not available") + exit() + + ret, frame = camLeft.read() + + cv2.imshow("laptop", frame) + + key = cv2.waitKey(1) + + if key == ord('q'): + break + +# while(True): + +# if not camLeft: +# print("Camera is not available") +# exit() + +# ret, frame = camLeft.read() + +# key = cv2.waitKey(0) + +# if key == ord('q'): +# break + +while(True): + + if not camRight: + print("Camera is not available") + exit() + + ret, frame = camRight.read() + + cv2.imshow("cam", frame) + + key = cv2.waitKey(1) + + if key == ord('q'): + break diff --git a/main.py b/main.py new file mode 100755 index 0000000..4588550 --- /dev/null +++ b/main.py @@ -0,0 +1,139 @@ +import camera_calibrator +import cv2 +import numpy as np +import yolo_model +import time + +""" +1. we already have the images and cameras have not moved +- easier to load the parameters and distortion coefficients? +- then use those to go straight to stereoCalibration + +2. components of stereo rig moved, have to perform camera calibration again +- delete the photo directories of each camera +- delete the parameters of each camera + +3. we dont have the images, have to perform camera calibration +- initialize photo directories +- initialize parameter file +""" + +if __name__ == "__main__": + + camLeft = camera_calibrator.CameraCalibrator(1) + camLeft.calibrate() + + camRight = camera_calibrator.CameraCalibrator(2) # TODO: either 1 or 2 + camRight.calibrate() + + imgSize = camera_calibrator.CameraCalibrator.imgSize + + print(imgSize) + + print(camera_calibrator.objp.shape) + print(len(camLeft.imgpoints)) + print(len(camRight.imgpoints)) + + # TODO: stereoCalibrate() will return new matrices and coefficients should probably use them + + # perform stereo calibration + retval, stereoCam1, stereoDist1, stereoCam2, stereoDist2, stereoR, stereoT, E, F = cv2.stereoCalibrate(camLeft.objpoints, camLeft.imgpoints, camRight.imgpoints, camLeft.cameraMatrix, camLeft.distCoeff, camRight.cameraMatrix, camRight.distCoeff, imgSize) # TODO: could use FIX_INTRINSIC here + + R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = cv2.stereoRectify(stereoCam1, stereoDist1, stereoCam2, stereoDist2, imgSize, stereoR, stereoT) + + map1_L, map2_L = cv2.initUndistortRectifyMap(stereoCam1, stereoDist1, R1, P1, imgSize, cv2.CV_32FC1) # TODO: adjust m1type + + map1_R, map2_R = cv2.initUndistortRectifyMap(stereoCam2, stereoDist2, R2, P2, imgSize, cv2.CV_32FC1) # TODO: adjust m1type + + # map1_L, map2_L = cv2.initUndistortRectifyMap(camLeft.cameraMatrix, camLeft.distCoeff, R1, P1, imgSize, cv2.CV_32FC2) # TODO: adjust m1type + + # map1_R, map2_R = cv2.initUndistortRectifyMap(camRight.cameraMatrix, camRight.distCoeff, R2, P2, imgSize, cv2.CV_32FC2) # TODO: adjust m1type + + retval = None + X = None + Y = None + + while not retval: + + leftImage = None + rightImage = None + + while True: + # read in the next frame + ret1, frame1 = camLeft.camera.read() + ret2, frame2 = camRight.camera.read() + + cv2.imshow("left camera", frame1) + cv2.imshow("right camera", frame2) + + key = cv2.waitKey(1) + + # capture image + if key == ord('c'): + leftImage = frame1 + rightImage = frame2 + break + + print(frame1) + print(frame2) + + # stereo rectify the images + # R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = cv2.stereoRectify(camLeft.cameraMatrix, camLeft.distCoeff, camRight.cameraMatrix, camRight.distCoeff, imgSize, stereoR, stereoT) + + print(f"Left image element type: {leftImage.dtype}\nLeft image shape: {leftImage.shape}") + print(f"RIght image element type: {rightImage.dtype}\n RIght image shape: {rightImage.shape}") + + # undistort and rectify the images + + rectified_L = cv2.remap(leftImage, map1_L, map2_L, cv2.INTER_NEAREST) # TODO: adjust interpolation + + rectified_R = cv2.remap(rightImage, map1_R, map2_R, cv2.INTER_NEAREST) # TODO: adjust interpolation + + print("Rectifying the images") + print(f"Left image element type: {rectified_L.dtype}\nLeft image shape: {rectified_L.shape}") + print(f"RIght image element type: {rectified_R.dtype}\n RIght image shape: {rectified_R.shape}") + + print("Saving rectified images") + cv2.imwrite("rectified_L.png", rectified_L) + cv2.imwrite("rectified_R.png", rectified_R) + # cv2.imshow("rectified_L", rectified_L) + print(rectified_L) + + # feed the undistorted rectified LEFT image to the YOLO model + model = yolo_model.YoloModel() + + # X, Y will be in the rectified first/left camera's coordinate system + retval, X, Y = model.getXYCoordinates(rectified_L) + + # TODO: is this really the right way to do this? + + rectified_L_8bit = cv2.cvtColor(rectified_L, cv2.COLOR_BGR2GRAY) + rectified_R_8bit = cv2.cvtColor(rectified_R, cv2.COLOR_BGR2GRAY) + + # rectified_L_8bit = (rectified_L/256).astype(np.uint8) + # rectified_R_8bit = (rectified_R/256).astype(np.uint8) + + print("Converting images to 8 bit single channel") + print(f"Left image element type: {rectified_L.dtype}\nLeft image shape: {rectified_L.shape}") + print(f"RIght image element type: {rectified_R.dtype}\n RIght image shape: {rectified_R.shape}") + + # pipe the undistorted rectified images to stereoBM/stereoSGBM + stereoBM = cv2.StereoBM.create(16, 15) # TODO change parameters if necessary + disparity_BM = stereoBM.compute(rectified_L_8bit, rectified_R_8bit, disparity=cv2.CV_32F) #.astype(np.float32) / 16.0 # TODO what type are the rectified images? compute() says it takes in 8-bit single channel images + + print("Showing disparity map") + cv2.imwrite("disparity.png", disparity_BM) + # cv2.imshow("disparity", disparity_BM) + print(disparity_BM) + + # stereoSGBM = cv2.StereoSGBM.create() # TODO + + # ?, note that disparity should be divided by 16 and scaled to float + threeD = cv2.reprojectImageTo3D(disparity_BM, Q) + + # use the disparity map to estimate the depth + + baseline = 10 # TODO: this was measured in real life, distance between the cameras on the stereo rig + + focal_length = 1 # TODO: unsure if this is the focal length from camera intrinsics or from camera manufacturer + diff --git a/mkdir.py b/mkdir.py new file mode 100755 index 0000000..7207505 --- /dev/null +++ b/mkdir.py @@ -0,0 +1,13 @@ +import os +import time + +os.mkdir("photos") + +for i in range(5): + with open(f'photos/copy{i}.txt', "w") as file: + file.write("Your text goes here") + +time.sleep(10) + +os.rmdir("photos") + diff --git a/stereo_camera.py b/stereo_camera.py new file mode 100755 index 0000000..c531b8c --- /dev/null +++ b/stereo_camera.py @@ -0,0 +1,13 @@ +class StereoCamera: + def __init__(self, left, right): + self.leftCam = left + self.rightCam = right + + + """ + @return + """ + def calibrate(): + pass + + diff --git a/test.py b/test.py new file mode 100755 index 0000000..5397409 --- /dev/null +++ b/test.py @@ -0,0 +1,45 @@ +import yolo_model +import cv2 +import os + +IMG_PATH = 'yolo/img.png' + +# TODO: check if an image already exists + +if os.path.exists(IMG_PATH): + + # load the existing image + img = cv2.imread(IMG_PATH) + + # perform object detection + model = yolo_model.YoloModel() + model.getXYCoordinates(img) + +else: + videoCapture = cv2.VideoCapture(0) + + if not videoCapture.isOpened(): + print("Unable to open camera") + exit() + + while True: + + # read in the next frame + ret, frame = videoCapture.read() + + if not ret: + print("Unsuccessful") + break + + # frame = cv2.flip(frame, 1) + cv2.imshow("noob", frame) + + key = cv2.waitKey(1) + + if key == ord('c'): + + print(cv2.imwrite("yolo/img.png", frame)) + + break + elif key == ord('q'): + exit() diff --git a/yolo_model.py b/yolo_model.py new file mode 100755 index 0000000..28a5dcb --- /dev/null +++ b/yolo_model.py @@ -0,0 +1,40 @@ +import cv2 +import numpy as np +import ultralytics + +from ultralytics import YOLO + +class YoloModel: + + def __init__(self): + self.model = YOLO('trained_model/my_model.pt') + + def getXYCoordinates(self, image): + + results = self.model(image) + + for result in results: + + result.show() + + # TODO: somehow check if no cubes have been detected + + boxes = result.boxes + + # TODO: for now just handle one cube, but might add support for multiple cubes + for box in boxes: + + # print(box) # TODO: could return this and redraw the bounding box for the cube that got its depth estimated + + if self.model.names[int(box.cls)] == "cube": + + dim = box.xywh[0] + + center_x = dim[0] + center_y = dim[1] + + return True, int(center_x), int(center_y) + + return False, 0, 0 + +