standard-cv/main.py at main · PennState-RoboX/standard-cv · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import enum
import math
import pathlib
from dataclasses import dataclass
import cv2
import numpy as np
import serial
from UART_UTIL import send_data, get_imu
from camera_source import CameraSource
from kinematic_prediction import poly_predict
import argparse
import logging
import time
from camera_params import camera_params, DepthSource
from Target import Target
import struct

active_cam_config = None
frame_aligner = None
num = 0  # Add global variable for frame counting


def nothing(x):
    pass


class TargetColor(enum.Enum):
    RED = 'red'
    BLUE = 'blue'


class CVParams:
    def __init__(self, target_color: TargetColor):
        self.target_color = target_color
        if target_color == TargetColor.RED:
            self.hue_min, self.hue_min_range = 4, (0, 180, 1)
            self.hue_max, self.hue_max_range = 38, (0, 180, 1)
            self.saturation_min, self.saturation_min_range = 54, (0, 255, 1)
            self.value_min, self.value_min_range = 111, (0, 255, 1)

            self.close_size = 1
            self.erode_size = 1
            self.dilate_size = 5
        else:
            self.hue_min, self.hue_min_range = 90, (0, 180, 1)
            self.hue_max, self.hue_max_range = 120, (0, 180, 1)
            self.saturation_min, self.saturation_min_range = 20, (0, 255, 1)
            self.value_min, self.value_min_range = 128, (0, 255, 1)

            self.close_size = 3
            self.erode_size = 2
            self.dilate_size = 2

        self.close_size_range = self.erode_size_range = self.dilate_size_range = (
            1, 20, 1)

        self.bar_aspect_ratio_min = 1.1
        self.bar_aspect_ratio_max = 13.0
        self.bar_z_angle_max = 20.0
        self.relative_x_delta_max = 3.0
        self.relative_y_delta_max = 3.0
        self.relative_height_diff_max = 0.5
        self.z_delta_max = 10.0


def createTrackbarsForParams(window_name: str, params: CVParams):
    for key, value in params.__dict__.items():
        if not key.endswith('_range') and type(value) in [int, float]:
            if hasattr(params, key + '_range'):
                slider_min, slider_max, scaling = getattr(
                    params, key + '_range')
            else:
                slider_min = 10 ** math.floor(math.log10(value))
                slider_max = 10 * slider_min
                scaling = 0.01

            cv2.createTrackbar(key, window_name, int(
                slider_min / scaling), int(slider_max / scaling), nothing)
            cv2.setTrackbarPos(key, window_name, int(value / scaling))


def updateParamsFromTrackbars(window_name: str, params: CVParams):
    for key, value in params.__dict__.items():
        if not key.endswith('_range') and type(value) in [int, float]:
            if hasattr(params, key + '_range'):
                scaling = getattr(params, key + '_range')[2]
            else:
                scaling = 0.01

            setattr(params, key, cv2.getTrackbarPos(
                key, window_name) * scaling)


def open_binary(binary, x, y):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (x, y))
    dst = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    return dst


def close_binary(binary, x, y):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (x, y))
    dst = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    return dst


def erode_binary(binary, x, y):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (x, y))
    dst = cv2.erode(binary, kernel)
    return dst


def dilate_binary(binary, x, y):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (x, y))
    dst = cv2.dilate(binary, kernel)
    return dst


# read cap and morphological operation to get led binary image.
def read_morphology(cap, config: CVParams):
    try:
        frame = cap
        if frame is None:
            logger.error("Invalid frame in read_morphology")
            return np.zeros((480, 640), dtype=np.uint8), np.zeros((480, 640, 3), dtype=np.uint8)

        # Convert to HSV once - this is expensive, so avoid if possible
        hsv_image = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

        # Avoid using cv2.split which is slow - use direct numpy indexing instead
        # H, S, V = cv2.split(hsv_image)  # Split channels
        H = hsv_image[:, :, 0]
        S = hsv_image[:, :, 1]
        V = hsv_image[:, :, 2]

        # Use vectorized numpy operations - much faster than multiple comparisons
        # Pre-compute and reuse mask operations
        h_mask = (H >= config.hue_min) & (H <= config.hue_max)
        s_mask = (S >= config.saturation_min)
        v_mask = (V >= config.value_min)
        mask_processed = h_mask & s_mask & v_mask
        mask_processed = mask_processed.astype(np.uint8) * 255

        # Only create kernels once per frame
        # Define kernels once to avoid recreating them for each operation
        kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT,
                                               (config.close_size, config.close_size))
        kernel_erode = cv2.getStructuringElement(cv2.MORPH_RECT,
                                               (config.erode_size, config.erode_size))
        kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT,
                                                (config.dilate_size, config.dilate_size))

        # Apply morphological operations
        dst_close = cv2.morphologyEx(mask_processed, cv2.MORPH_CLOSE, kernel_close)
        dst_erode = cv2.erode(dst_close, kernel_erode)
        dst_dilate = cv2.dilate(dst_erode, kernel_dilate)

        if debug:
            """
            Display the final image after preprocessing
            """
            cv2.imshow("erode", dst_dilate)

        return dst_dilate, frame
    except Exception as e:
        logger.error(f"Error in read_morphology: {e}")
        # Return empty images if processing fails
        empty_binary = np.zeros((480, 640), dtype=np.uint8)
        empty_frame = np.zeros((480, 640, 3), dtype=np.uint8) if frame is None else frame.copy()
        return empty_binary, empty_frame


def spherical_to_cartesian(yaw: float, pitch: float, depth: float):
    # Convert once to avoid multiple conversions
    phi_rad = np.radians(90.0 - pitch)
    theta_rad = np.radians(yaw)

    # Pre-compute sine and cosine which are expensive operations
    sin_phi = np.sin(phi_rad)
    cos_phi = np.cos(phi_rad)
    sin_theta = np.sin(theta_rad)
    cos_theta = np.cos(theta_rad)

    # Use pre-computed values
    return depth * np.array([sin_phi * cos_theta, sin_phi * sin_theta, cos_phi])


def cartesian_to_spherical(coords: np.ndarray):
    # Calculate values once to avoid redundant calculations
    x, y, z = coords
    xy_squared_sum = x * x + y * y
    xy_norm = np.sqrt(xy_squared_sum)

    # Calculate angles more efficiently
    yaw = np.rad2deg(np.arctan2(y, x))
    pitch = 90.0 - np.rad2deg(np.arctan2(xy_norm, z))

    # Avoid redundant norm calculation
    depth = np.sqrt(xy_squared_sum + z * z)

    return yaw, pitch, depth

def rotationMatrixToEulerAngles(R):
    sy = np.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
    singular = sy < 1e-6
    if not singular:
        x = np.arctan2(R[2,1], R[2,2])
        y = np.arctan2(-R[2,0], sy)
        z = np.arctan2(R[1,0], R[0,0])
    else:
        x = np.arctan2(-R[1,2], R[1,1])
        y = np.arctan2(-R[2,0], sy)
        z = 0
    return np.array([x, y, z])

def get_3d_target_location(imgPoints, frame, depth_frame):
    try:
        # Cache active_cam_config values to avoid repeated lookups
        cam_matrix = np.array(active_cam_config['camera_matrix'], dtype=np.float64)
        dist_coeffs = np.array(active_cam_config['distort_coeffs'], dtype=np.float64)
        cx, cy = active_cam_config['cx'], active_cam_config['cy']
        fx, fy = active_cam_config['fx'], active_cam_config['fy']
        depth_source = active_cam_config['depth_source']

        # Undistort the given image points
        imgPoints = cv2.undistortPoints(
            imgPoints, cam_matrix, dist_coeffs, P=cam_matrix)[:, 0, :]

        # Calculate the average (center) point of the image points - use numpy's mean which is faster
        center_point = np.mean(imgPoints, axis=0)

        # Calculate the offset of the center point from the camera's optical center
        center_offset = center_point - np.array([cx, cy])
        center_offset[1] = -center_offset[1]

        # Convert the offset to angular measurements (yaw and pitch) in degrees - use precomputed fx/fy
        angles = np.rad2deg(np.arctan2(center_offset, np.array([fx, fy])))

        # Initialize Yaw and Pitch with default values from angles
        Yaw = angles[0]
        Pitch = angles[1]
        meanDVal = 1000.0  # Default depth value if calculation fails

        # Calculate depth based on the configured depth source
        if depth_source == DepthSource.PNP:
            try:
                # Pre-define object points once - avoiding recreating them every time
                width_size_half = 70  # half width of the object
                height_size_half = 62.5  # half height of the object
                objPoints = np.array([[-width_size_half, -height_size_half, 0],
                                      [width_size_half, -height_size_half, 0],
                                      [width_size_half, height_size_half, 0],
                                      [-width_size_half, height_size_half, 0]], dtype=np.float64)

                # Use solvePnP_IPPE method to find the object's pose
                retval, rvec, tvec = cv2.solvePnP(
                    objPoints, imgPoints, cam_matrix, dist_coeffs, flags=cv2.SOLVEPNP_IPPE)

                # Calculate depth and angles more efficiently
                meanDVal = np.linalg.norm(tvec[:, 0])

                # Pre-compute the division factor
                pi_factor = 2 * np.pi

                offsetY = 1  # offset for Yaw
                Yaw = np.arctan(tvec[(0,0)]/ tvec[(2,0)]) / pi_factor * 360 - offsetY

                offsetP = -4  # offset for Pitch
                Pitch = -(np.arctan(tvec[(1, 0)] / tvec[(2, 0)]) / pi_factor * 360) - offsetP
            except Exception as e:
                logger.error(f"Error in PnP depth calculation: {e}")
                # Keep default values if PnP fails

        elif depth_source == DepthSource.STEREO:
            try:
                # Ensure the depth frame is available for stereo depth calculation
                if depth_frame is not None:
                    # Create mask efficiently
                    panel_mask = np.zeros(frame.shape[:2], dtype=np.uint8)
                    # Use fillPoly directly which is faster than drawContours for this case
                    cv2.fillPoly(panel_mask, [imgPoints.astype(np.int64)], 1)

                    # Only resize if dimensions differ
                    if panel_mask.shape != depth_frame.shape:
                        panel_mask_scaled = cv2.resize(
                            panel_mask, (depth_frame.shape[1], depth_frame.shape[0]))
                    else:
                        panel_mask_scaled = panel_mask

                    # Calculate the mean depth value within the masked area
                    meanDVal, _ = cv2.meanStdDev(depth_frame, mask=panel_mask_scaled)
            except Exception as e:
                logger.error(f"Error in stereo depth calculation: {e}")
                # Keep default values if stereo depth fails
        else:
            # Log a warning if an invalid depth source is configured
            logger.warning('Invalid depth source in camera config, using default depth')

        # Store and return the calculated depth, yaw, pitch, and image points
        target_Dict = {"depth": meanDVal,
                       "Yaw": Yaw, "Pitch": Pitch, "imgPoints": imgPoints}
        return target_Dict
    except Exception as e:
        logger.error(f"Error in get_3d_target_location: {e}")
        # Return default values if function fails
        return {"depth": 1000.0, "Yaw": 0.0, "Pitch": 0.0, "imgPoints": imgPoints}


@dataclass
class ImageRect:
    points: np.ndarray

    @property
    def center(self):
        return np.average(self.points, axis=0)

    @property
    def width_vec(self):
        return np.average(self.points[2:, :], axis=0) - np.average(self.points[:2, :], axis=0)

    @property
    def width(self):
        return np.linalg.norm(self.width_vec)

    @property
    def height_vec(self):
        return np.average(self.points[(0, 3), :], axis=0) - np.average(self.points[(1, 2), :], axis=0)

    @property
    def height(self):
        return np.linalg.norm(self.height_vec)

    @property
    def angle(self):
        return 90.0 - np.rad2deg(np.arctan2(self.height_vec[1], self.height_vec[0]))


# find contours and main screening section
def find_contours(config: CVParams, binary, frame, depth_frame, fps):
    global num
    # Use a more efficient contour retrieval mode for better performance
    try:
        # Use a faster contour finding approach
        contours, _ = cv2.findContours(
            binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        first_data = []  # include all potential light bar's contourArea information dict by dict
        second_data = []
        # all potential target's [depth,yaw,pitch,imgPoints(np.array([[bl], [tl], [tr],[br]]))]
        potential_Targets = []

        # Skip if no contours found for efficiency
        if len(contours) == 0:
            return potential_Targets

        # Filter contours by area first to reduce processing
        # Use numpy for faster filtering
        areas = np.array([cv2.contourArea(contour) for contour in contours])
        valid_indices = np.where(areas >= 5)[0]
        contours = [contours[i] for i in valid_indices]

        # Pre-compute threshold values for efficiency
        bar_aspect_ratio_min = config.bar_aspect_ratio_min
        bar_aspect_ratio_max = config.bar_aspect_ratio_max
        bar_z_angle_max = config.bar_z_angle_max

        # Optimize first_data construction
        for contour in contours:
            try:
                rect = cv2.minAreaRect(contour)
                # coordinates of the four vertices of the rectangle
                coor = cv2.boxPoints(rect).astype(np.int32)

                rect_param = findVerticesOrder(coor)  # output order: [bl,tl,tr,br]
                rect = ImageRect(rect_param)

                # Debug visualization - only if showing the stream
                if debug:
                    cv2.circle(frame, rect.points[0], 9, (255, 255, 255), -1)  # test armor_tr
                    cv2.circle(frame, rect.points[1], 9, (0, 255, 0), -1)  # test armor_tl
                    cv2.circle(frame, rect.points[2], 9, (255, 255, 0), -1)  # test bottom left
                    cv2.circle(frame, rect.points[3], 9, (0, 100, 250), -1)  # test bottom left

                # Filter by aspect ratio and angle
                aspect_ratio = rect.height / rect.width
                angle = rect.angle

                # Combine the comparisons to reduce branch mispredictions
                if (aspect_ratio >= bar_aspect_ratio_min and
                    aspect_ratio <= bar_aspect_ratio_max and
                    abs(angle) <= bar_z_angle_max):

                    first_data.append(rect)

                    # Debug visualization - only if showing the stream
                    if debug:
                        box = np.int0(coor)
                        cv2.drawContours(frame, [box], -1, (255, 0, 0), 3)
            except Exception as e:
                logger.error(f"Error processing contour: {e}")
                continue

        # Pre-compute values for second-level filtering
        relative_y_delta_max = config.relative_y_delta_max
        relative_height_diff_max = config.relative_height_diff_max
        relative_x_delta_max = config.relative_x_delta_max
        z_delta_max = config.z_delta_max

        # Optimize second_data construction
        len_first_data = len(first_data)
        for i in range(len_first_data):
            c = first_data[i]
            c_center = c.center
            c_height = c.height
            c_angle = c.angle

            for j in range(i + 1, len_first_data):
                n = first_data[j]
                try:
                    # Calculate all deltas at once
                    n_center = n.center
                    n_height = n.height

                    # Calculate deltas efficiently
                    y_delta = abs(c_center[1] - n_center[1])
                    x_delta = abs(c_center[0] - n_center[0])
                    height_ratio = abs(c_height - n_height) / max(c_height, n_height)
                    angle_delta = abs(c_angle - n.angle)
                    height_avg = (c_height + n_height) / 2

                    # Use combined comparison to reduce branching
                    if (y_delta <= relative_y_delta_max * height_avg and
                        height_ratio <= relative_height_diff_max and
                        x_delta <= relative_x_delta_max * height_avg and
                        angle_delta < z_delta_max):

                        second_data.append((c, n))
                except Exception as e:
                    logger.error(f"Error processing rectangle pair: {e}")
                    continue

        # Early return if no pairs found
        if not second_data:
            return potential_Targets

        # Optimize target creation - only allocate memory once for the result
        potential_Targets = []

        # Process pairs to find targets
        for r1, r2 in second_data:
            try:
                # Calculate only once
                x_diff = abs(r1.points[0][0] - r2.points[2][0])
                y_diff = abs(r1.points[0][1] - r2.points[2][1])

                if y_diff <= 3 * x_diff:
                    # Determine left and right bars once
                    left_bar, right_bar = (r1, r2) if r1.points[3][0] <= r2.points[3][0] else (r2, r1)

                    # Calculate vectors once
                    left_side_vec = (left_bar.points[0] - left_bar.points[1]) / 2
                    right_side_vec = (left_bar.points[3] - left_bar.points[2]) / 2

                    # Construct array of points
                    imgPoints = np.array(
                        [left_bar.points[0] + left_side_vec,
                         left_bar.points[1] - left_side_vec,
                         right_bar.points[2] - right_side_vec,
                         right_bar.points[3] + right_side_vec],
                        dtype=np.float64)

                    # Get target location
                    target_Dict = get_3d_target_location(
                        imgPoints, frame, depth_frame)

                    # Create target object and add to list
                    target = Target(target_Dict)
                    potential_Targets.append(target)

                    # Debug visualization - only if in debug mode
                    if debug:
                        num += 1
                        cv2.putText(frame, "Potentials:", (int(imgPoints[2][0]), int(imgPoints[2][1]) - 5),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, [255, 255, 255])
                        center = np.average(imgPoints, axis=0).astype(np.int32)
                        cv2.circle(frame, center, 2, (0, 0, 255), -1)
            except Exception as e:
                logger.error(f"Error creating target: {e}")
                continue

        return potential_Targets

    except Exception as e:
        logger.error(f"Error in find_contours: {e}")
        return []


def targetsFilter(potential_Targetsets, frame, last_target_x):

    # if only one target, return it directly
    if len(potential_Targetsets) == 1:
        return potential_Targetsets[0] # the only target class object

    '''
    target with Number & greatest credits wins in filter process
    Credit Consideration: Area, Depth, Pitch, Yaw
    Credit Scale: 1 - 3
    '''
    # Pre-allocate arrays for faster operations
    max_Credit = 0
    best_Target = None

    # if the target from last frame exists, filter out the closest one to keep tracking on the same target
    if last_target_x is not None:
        # Calculate all distances at once for performance
        all_distance_diff = []
        for target in potential_Targetsets:
            imgPoints = target.imgPoints
            # current target's x-axis in a 1280*720 frame
            curr_target_x = imgPoints[0][0] + (imgPoints[2][0] - imgPoints[0][0]) / 2
            all_distance_diff.append(abs(curr_target_x - last_target_x))

        # Find the index of the minimum distance
        closest_idx = np.argmin(all_distance_diff)
        return potential_Targetsets[closest_idx]

    # if the target from last frame doesn't exist, filter out the best one based on credits
    for target in potential_Targetsets:
        depth = float(target.depth)
        Yaw = float(target.yaw)
        Pitch = float(target.pitch)

        # target with greatest credits wins in filter process;total_Credit = depth credit + angle credit
        depth_Credit = 0
        angle_Credit = 0

        """Assess Depth - use faster conditional logic"""
        if depth < 1800:
            depth_Credit = 5
        elif depth < 2500:
            depth_Credit = 3

        """Assess Angle - use faster conditional logic"""
        if abs(Yaw) < 5 or abs(Pitch) < 10:
            angle_Credit = 100
        elif abs(Yaw) < 10 or abs(Pitch) < 15:
            angle_Credit = 3
        elif abs(Yaw) < 20 or abs(Pitch) < 20:
            angle_Credit = 2
        elif abs(Yaw) < 30 or abs(Pitch) < 30:
            angle_Credit = 1

        """evaluate score"""
        current_credit = depth_Credit + angle_Credit
        if current_credit > max_Credit:
            max_Credit = current_credit
            best_Target = target

    return best_Target


def clipRect(rect_xywh, size):
    x, y, w, h = rect_xywh
    clipped_x, clipped_y = min(max(x, 0), size[0]), min(max(y, 0), size[1])
    return clipped_x, clipped_y, min(max(w, 0), size[0] - clipped_x), min(max(h, 0), size[1] - clipped_y)


def findVerticesOrder(pts):
    ''' sort rectangle points by clockwise '''
    # sort y-axis only - use numpy's direct sorting
    y_sorted_indices = np.argsort(pts[:, 1])
    sort_x = pts[y_sorted_indices, :]

    # get top 2 [x,y] and bottom 2 [x,y]
    Bottom = sort_x[2:, :]  # bot
    Top = sort_x[:2, :]  # top

    # Bottom sort: Bottom[0] = bl ;  Bottom[1] = br
    Bottom = Bottom[np.argsort(Bottom[:, 0]), :]

    # Top sort: Top[0] = tl ; Top[1] = tr
    Top = Top[np.argsort(Top[:, 0]), :]

    # Directly stack into a new array to avoid Python loop
    return np.stack([Bottom[0], Top[0], Top[1], Bottom[1]], axis=0)


def float_to_hex(f):
    ''' turn float to hex'''
    return ''.join([f'{byte:02x}' for byte in struct.pack('>f', f)])

def decimalToHexSerial(Yaw, Pitch):
    # Yaw and Pitch to IEEE 754 standard four-byte floating point representation and convert to hexadecimal string
    hex_Yaw = float_to_hex(Yaw)
    hex_Pitch = float_to_hex(Pitch)

    # calculate checksum
    bytes_for_checksum = struct.pack('>ff', Yaw, Pitch) # only checked Yaw & Pitch data so far
    checksum = sum(bytes_for_checksum) % 256
    hex_checksum = f'{checksum:02x}'

    # build hexadecimal data list
    return hex_Yaw, hex_Pitch, hex_checksum

def draw_crosshair(frame):
    height, width = frame.shape[:2]
    center_x, center_y = width // 2, height // 2
    color = (0, 255, 0)  # Green color
    thickness = 2
    size = 20
    cv2.line(frame, (center_x, center_y - size), (center_x, center_y + size), color, thickness)
    cv2.line(frame, (center_x - size, center_y), (center_x + size, center_y), color, thickness)
    return frame

def main(camera: CameraSource, target_color: TargetColor, show_stream: str):
    """
    Important commit updates: umature pred-imu; 50 deg limit; HSV red adj; get_imu; MVS arch rebuild ---- Shiao
    """
    try:
        cv_config = CVParams(target_color)

        # Create a window for CV parameters if debug mode is active
        if debug:
            cv2.namedWindow("CV Parameters")
            createTrackbarsForParams("CV Parameters", cv_config)
            cv2.resizeWindow("CV Parameters", 800, 180)

        '''Initialize variables for tracking and prediction'''

        fps = 0
        target_coor = []
        lock = False                    # Flag to indicate if the best target is found
        track_init_frame = None
        last_target_x = None
        last_target_y = None
        # success = False
        tracker = None
        tracking_frames = 0
        max_tracking_frames = 15        # Maximum number of frames to track

        max_history_length = 8          # Maximum number of samples for prediction
        # Time in seconds to predict the target's motion into the future
        prediction_future_time = 0.2
        '''
        Maximum time in seconds between history frames
        Should be long enough for a dropped frame or two,
        but not too long to group unrelated detections
        '''
        max_history_frame_delta = 0.15
        target_angle_history = []

        # Try to Open serial port for data transmission to STM32, if not found, continue without it
        try:
            ser = serial.Serial('/dev/ttyUSB0', 115200)
            logger.info("Successfully opened serial port")
        except Exception as e:
            ser = None
            logger.warning(f"Failed to open serial port: {str(e)}")
            print("Serial port not available. Running without serial communication.")


        detect_success = False
        track_success = False

        # FPS calculation variables
        fps_counter = 0
        fps_sum = 0
        last_fps_print_time = time.time()
        last_fps = 0

        # Cache active_cam_config values for faster access
        cam_offset = np.array(camera.active_cam_config['camera_offset'])
        cam_fx = active_cam_config['fx']
        cam_fy = active_cam_config['fy']
        cam_cx = active_cam_config['cx']
        cam_cy = active_cam_config['cy']

        # Pre-allocate arrays for visualization to avoid repeated memory allocation
        vis_frame = None

        while True:
            try:
                "to calculate fps"
                startTime = time.time()

                if debug:
                    updateParamsFromTrackbars("CV Parameters", cv_config)

                color_image, depth_image = camera.get_frames()

                # Skip processing if we didn't get a valid frame
                if color_image is None:
                    logger.warning("No valid frame received, skipping frame")
                    time.sleep(0.01)  # Short sleep to avoid busy-waiting
                    continue

                # Only modify color_image with crosshair if we're going to display it
                if show_stream == 'YES' or show_stream == 'yes':
                    frame = color_image.copy()  # Only copy if we need to display
                    frame = draw_crosshair(frame)
                else:
                    frame = color_image  # Just use reference if we're not displaying

                """Do detection"""
                binary, frame = read_morphology(color_image, cv_config)

                # get the list with all potential targets' info
                potential_Targetsets = find_contours(cv_config, binary, frame, depth_image, fps)

                if potential_Targetsets: # if dectection success
                    detect_success = True

                    # filter out the best target
                    final_Target = targetsFilter(potential_Targetsets, frame, last_target_x)

                    #extract the target's position and angle
                    depth = float(final_Target.depth)
                    Yaw = float(final_Target.yaw)
                    Pitch = float(final_Target.pitch)
                    imgPoints = final_Target.imgPoints

                    '''SORT tracking'''

                else: # if detection failed
                    """Prepare Tracking"""

                    detect_success = False
                    try:
                        if tracker is not None and tracking_frames < max_tracking_frames:
                            tracking_frames += 1
                            # Update tracker
                            track_success, bbox = tracker.update(color_image)
                        else:
                            track_success = False

                        """if Tracking success, Solve Angle & Draw bounding box"""
                        if track_success:
                            # Solve angle
                            target_coor_width = abs(
                                int(final_Target.topRight[0]) - int(final_Target.topLeft[0]))
                            target_coor_height = abs(
                                int(final_Target.topLeft[1]) - int(final_Target.bottomLeft[1]))

                            # bbox format:  (init_x,init_y,w,h)
                            bbox = (final_Target.topLeft[0] - target_coor_width * 0.05, final_Target.topLeft[1], target_coor_width * 1.10,
                                    target_coor_height) # to enlarge the bbox to include the whole target, for better tracking by KCF or others

                            bbox = clipRect(bbox, (color_image.shape[1], color_image.shape[0])) # clip the bbox to fit the frame

                            # Calculate all points at once to avoid repeated calculations
                            x, y, w, h = bbox
                            imgPoints = np.array(
                                [[x, y+h], [x, y], [x+w, y],
                                 [x+w, y+h]], dtype=np.float64)

                            target_Dict = get_3d_target_location(
                                imgPoints, color_image, depth_image)

                            final_Target.depth = target_Dict["depth"]
                            final_Target.yaw = target_Dict["Yaw"]
                            final_Target.pitch = target_Dict["Pitch"]
                            final_Target.imgPoints = target_Dict["imgPoints"]

                            '''draw tracking bouding boxes - only if we're showing the stream'''
                            if show_stream == 'YES' or show_stream == 'yes':
                                p1 = (int(bbox[0]), int(bbox[1]))
                                p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
                                cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1)
                    except Exception as e:
                        logger.error(f"Error in tracking: {e}")
                        track_success = False


                if detect_success or track_success:
                    try:
                        # store the current target's x-axis, used for detection in the next round
                        last_target_x = imgPoints[0][0] + (imgPoints[2][0] - imgPoints[0][0])/2

                        '''
                        Do Prediction
                        '''

                        if ser is not None:
                            try:
                                imu_yaw, imu_pitch, imu_roll = get_imu(ser)
                                # Don't print this every frame - too much overhead
                                if time.time() - last_fps_print_time >= 1.0:
                                    print(f"imu data receive: {imu_yaw}, {imu_pitch}, {imu_roll}")
                            except Exception as e:
                                logger.error(f"Error reading IMU data: {e}")
                                imu_yaw, imu_pitch, imu_roll = 0, 0, 0  # Safer defaults
                        else:
                            imu_yaw, imu_pitch, imu_roll = 0, 0, 0  # For testing or when serial is unavailable

                        # Apply correction factors once
                        imu_yaw_corrected = imu_yaw * -1.2
                        imu_pitch_corrected = imu_pitch * -1.2
                        global_yaw = imu_yaw_corrected + Yaw
                        global_pitch = imu_pitch_corrected + Pitch

                        # Calculate cartesian position once
                        cartesian_pos = spherical_to_cartesian(global_yaw, global_pitch, depth) - cam_offset

                        # Only do visualization if angles are in range and we're showing the stream
                        if (-30 < Pitch < 30) and (-45 < Yaw < 45):
                            if show_stream == 'YES' or show_stream == 'yes':
                                # Draw visualization for target
                                cv2.line(frame, (int(imgPoints[1][0]), int(imgPoints[1][1])),
                                         (int(imgPoints[3][0]), int(imgPoints[3][1])),
                                         (33, 255, 255), 2)
                                cv2.line(frame, (int(imgPoints[2][0]), int(imgPoints[2][1])),
                                         (int(imgPoints[0][0]), int(imgPoints[0][1])),
                                         (33, 255, 255), 2)
                                cv2.putText(frame, str(depth), (90, 20),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])
                                cv2.putText(frame, str(Yaw), (90, 50),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])
                                cv2.putText(frame, str(Pitch), (90, 80),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])

                            # Update target history
                            current_time = time.time()
                            if len(target_angle_history) < 1 or current_time - target_angle_history[-1][0] > max_history_frame_delta:
                                target_angle_history = [(current_time, *cartesian_pos)]
                            else:
                                target_angle_history.append((current_time, *cartesian_pos))

                            if len(target_angle_history) > max_history_length:
                                target_angle_history = target_angle_history[-max_history_length:]

                            # Do prediction if we have enough history
                            if len(target_angle_history) >= 2:
                                # Extract history arrays efficiently using numpy
                                target_history_array = np.array(target_angle_history)
                                time_hist_array = target_history_array[:, 0] - target_history_array[0, 0]
                                x_hist_array = target_history_array[:, 1]
                                y_hist_array = target_history_array[:, 2]
                                z_hist_array = target_history_array[:, 3]

                                degree = 1  # if len(target_angle_history) == 2 else 2

                                # Generate weights linearly
                                weights = np.linspace(float(max_history_length) - len(time_hist_array) + 1.0,
                                                     float(max_history_length) + 1.0,
                                                     len(time_hist_array))

                                predict_time = time_hist_array[-1] + prediction_future_time

                                # Do predictions
                                predicted_x = poly_predict(time_hist_array, x_hist_array, degree,
                                                           predict_time, weights=weights)
                                predicted_y = poly_predict(time_hist_array, y_hist_array, degree,
                                                           predict_time, weights=weights)
                                predicted_z = poly_predict(time_hist_array, z_hist_array, degree,
                                                           predict_time, weights=weights)

                                predicted_yaw, predicted_pitch, _ = cartesian_to_spherical(
                                    np.array([predicted_x, predicted_y, predicted_z]))
                            else:
                                predicted_yaw, predicted_pitch = global_yaw, global_pitch

                            # Visualization for prediction - only if showing stream
                            if show_stream == 'YES' or show_stream == 'yes':
                                # Calculate points for visualization
                                current_point_coords = (int(cam_fx * math.tan(math.radians(Yaw)) + cam_cx),
                                                        int(cam_fy * math.tan(math.radians(-Pitch)) + cam_cy))
                                predicted_point_coords = (int(cam_fx * math.tan(math.radians(predicted_yaw - imu_yaw_corrected)) + cam_cx),
                                                          int(cam_fy * math.tan(math.radians(-(predicted_pitch - imu_pitch_corrected))) + cam_cy))
                                cv2.line(frame, current_point_coords,
                                         predicted_point_coords, (255, 255, 255), 2)

                            # Calculate relative prediction values
                            relative_pred_yaw = predicted_yaw - imu_yaw_corrected
                            relative_pred_pitch = predicted_pitch - imu_pitch_corrected

                            # Clamp prediction values
                            relative_pred_yaw = max(-50, min(50, relative_pred_yaw))
                            relative_pred_pitch = max(-50, min(50, relative_pred_pitch))

                            # Convert to radians for sending
                            Yaw_rad = np.deg2rad(Yaw)
                            Pitch_rad = np.deg2rad(Pitch)

                            # Only print this once per second, not every frame
                            if time.time() - last_fps_print_time >= 1.0:
                                print(f"imu data send: {Yaw_rad}, {Pitch_rad}, {detect_success}")

                            # Send data to serial port if available
                            if ser is not None:
                                try:
                                    hex_Yaw, hex_Pitch, hex_checksum = decimalToHexSerial(Yaw_rad, Pitch_rad)
                                    send_data(ser, hex_Yaw, hex_Pitch, hex_checksum, detect_success)
                                except Exception as e:
                                    logger.error(f"Error sending data to serial port: {e}")
                        else:
                            logger.warning(f"Angle(s) exceed limits: Pitch: {Pitch}, Yaw: {Yaw}")
                    except Exception as e:
                        logger.error(f"Error in processing detected target: {e}")

                else:
                    # Tracking failure - only show message if we're displaying the stream
                    if show_stream == 'YES' or show_stream == 'yes':
                        cv2.putText(frame, "Tracking failure detected", (600, 80),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)

                    # send failure data(send 0 degree to make gimbal stop)
                    if ser is not None:
                        try:
                            hex_Yaw, hex_Pitch, hex_checksum=decimalToHexSerial(0, 0)
                            send_data(ser, hex_Yaw, hex_Pitch, hex_checksum,detect_success)
                        except Exception as e:
                            logger.error(f"Error sending failure data to serial port: {e}")


                # Only draw UI elements if we're showing the stream
                if show_stream == 'YES' or show_stream == 'yes':
                    cv2.circle(frame, (720, 540), 2, (255, 255, 255), -1)
                    cv2.putText(frame, 'Depth: ', (20, 20),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])
                    cv2.putText(frame, 'Yaw: ', (20, 50),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])
                    cv2.putText(frame, 'Pitch: ', (20, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])
                    cv2.putText(frame, 'FPS: ', (20, 110),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])
                    cv2.putText(frame, str(int(last_fps)), (90, 110),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0])

                    cv2.imshow("original", frame)
                    # Use waitKey(1) for maximum speed
                    cv2.waitKey(1)

                # Calculate FPS - but only print occasionally to reduce overhead
                endtime = time.time()
                frame_time = endtime - startTime
                current_fps = 1.0 / frame_time if frame_time > 0 else 0

                # Accumulate FPS
                fps_counter += 1
                fps_sum += current_fps

                # Print FPS once per second
                if endtime - last_fps_print_time >= 1.0:
                    last_fps = fps_sum / fps_counter if fps_counter > 0 else 0
                    print(f"FPS: {last_fps:.2f}")
                    fps_counter = 0
                    fps_sum = 0
                    last_fps_print_time = endtime
            except KeyboardInterrupt:
                logger.info("Keyboard interrupt detected, exiting")
                break
            except Exception as e:
                logger.error(f"Error in main loop: {e}")
                # Continue running even if there's an error
                time.sleep(0.1)  # Short sleep to avoid busy-waiting
                continue
    except Exception as e:
        logger.error(f"Fatal error in main function: {e}")
        # Allow the exception to propagate and terminate the program


if __name__ == "__main__":
    # set up argument parser
    parser = argparse.ArgumentParser()
    parser.add_argument('--target-color', required=True, type=str, choices=[val.value for val in TargetColor],
                        help='The armor board light color to detect')
    parser.add_argument('--recording-source', type=pathlib.Path,
                        help='Path to input video recordings')
    parser.add_argument('--recording-dest', type=pathlib.Path,
                        help='Path to record camera video to (MP4 format)')
    parser.add_argument('--debug', action='store_true',
                        help='Show intermediate results and debug output')
    parser.add_argument('--show-stream', type=str, choices=['YES', 'NO'], default='NO',
                        help='Display the camera stream (YES or NO)')


    args = parser.parse_args()

    # set up logger
    logger = logging.getLogger(__name__)
    debug: bool = args.debug
    logger.setLevel('DEBUG' if debug else 'INFO')

    args.target_color = TargetColor(args.target_color)
    num = 0  # for collecting dataset, pictures' names

    # choose camera params - use Intel RealSense D435I config based on detected hardware
    try:
        camera = CameraSource(camera_params['Intel RealSense D435I'], args.target_color.value,
                            recording_source=args.recording_source, recording_dest=args.recording_dest)

        active_cam_config = camera.active_cam_config
        main(camera, args.target_color, args.show_stream)
    except Exception as e:
        print(f"Failed to initialize camera: {e}")
        print("Trying fallback to generic camera...")
        try:
            camera = CameraSource(camera_params['Generic Webcam'], args.target_color.value,
                                recording_source=args.recording_source, recording_dest=args.recording_dest)
            active_cam_config = camera.active_cam_config
            main(camera, args.target_color, args.show_stream)