-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathobject_detection.py
More file actions
83 lines (71 loc) · 3.08 KB
/
object_detection.py
File metadata and controls
83 lines (71 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
this module includes detect_object function.
"""
import time
import numpy as np
import pyrealsense2 as rs
# use detectron2 to detect certain objects and their positions
def detect_object(cfg, pipe, category, predictor, object_cate, tf, cut_distance):
object_info=[]
start1 = time.time()
profile = pipe.start(cfg)
for x in range(30):
pipe.wait_for_frames() # abandon first few frames
frameset = pipe.wait_for_frames()
color_frame = frameset.get_color_frame()
depth_frame = frameset.get_depth_frame()
# Cleanup:
pipe.stop()
color = np.asanyarray(color_frame.get_data())
align = rs.align(rs.stream.color)
frameset = align.process(frameset)
# Update color and depth frames:
aligned_depth_frame = frameset.get_depth_frame()
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics
start2 = time.time()
outputs = predictor(color)
end = time.time()
print("infer time :", end - start2)
print("entire time :", end - start1)
# pred classes represents classified object numbers
# you can see each object number at https://github.com/facebookresearch/detectron2/blob/989f52d67d05445ccd030d8f13d6cc53e297fb91/detectron2/data/datasets/builtin_meta.py
# print("classes : ", outputs["instances"].pred_classes)
out_class = outputs["instances"].pred_classes
out_boxes = outputs["instances"].pred_boxes
out_scores = outputs["instances"].scores # score means probability
print("detected object :", end = " ") #print all detected objects
for class_num in out_class:
print(category[class_num]["name"], end = " ")
print()
centers = out_boxes.get_centers() # get center coordinates of boxes
depth_scale = profile.get_device().first_depth_sensor().get_depth_scale()
for i, class_num in enumerate(out_class):
name_t = category[class_num]["name"]
dont_want=True
for item in object_cate: # if not in cate:gory, drop
if item == name_t:
dont_want = False
if dont_want:
print("wanted_cate doesn't have", name_t)
continue
score_t = out_scores.cpu().numpy()[i]
x = (centers[i].cpu().numpy()[0])
y = (centers[i].cpu().numpy()[1])
# x, y is pixel coordinates -> use round ftn
x = round(x)
y = round(y)
# get depth using aligned_depth_frame
depth = aligned_depth_frame.get_distance(int(x), int(y))
# get camera coordinates with intrinsic
depth_point = rs.rs2_deproject_pixel_to_point(
depth_intrin, [x, y], depth)
coordi = np.array([depth_point[0], depth_point[1], depth_point[2], 1]).T
if depth_point[2] > cut_distance: # if object is too far from camera, skip
print("%s is too far" % name_t)
continue
print("name :", name_t)
print("camera coordi :", coordi)
coordi_global = np.matmul(tf, coordi).T
print("global coordi: ", coordi_global)
object_info.append({"name" : name_t, "score" : score_t, "coordi" : coordi_global})
return object_info