From df08ff116c7c4cb7a4db927fd5dcd4c491fe6fc3 Mon Sep 17 00:00:00 2001 From: OrestisVaggelis Date: Mon, 10 Feb 2025 17:46:22 +0200 Subject: [PATCH] Changes to run bop_toolkit on industrial --- bop_toolkit_lib/dataset_params.py | 385 +++++------------- bop_toolkit_lib/inout.py | 93 ++--- bop_toolkit_lib/misc.py | 2 +- bop_toolkit_lib/renderer_batch.py | 2 +- bop_toolkit_lib/tests/eval_bop22_coco_test.py | 35 +- bop_toolkit_lib/tests/test_misc.py | 48 --- bop_toolkit_lib/visualization.py | 2 - docs/bop_datasets_format.md | 1 - requirements.txt | 4 +- scripts/calc_gt_coco.py | 17 +- scripts/calc_gt_distribution.py | 68 +--- scripts/calc_gt_info.py | 31 +- scripts/calc_gt_masks.py | 44 +- scripts/calc_model_info.py | 18 +- scripts/create_coco_results_file_from_gt.py | 14 +- scripts/create_pose_results_file_from_gt.py | 63 +-- scripts/enumerate_test_targets.py | 18 +- scripts/eval_bop19_pose.py | 1 + scripts/eval_bop22_coco.py | 2 +- scripts/eval_bop24_pose.py | 25 -- scripts/eval_calc_errors.py | 9 +- scripts/eval_calc_errors_gpu.py | 12 +- scripts/eval_calc_scores.py | 10 +- scripts/vis_est_poses.py | 152 +++---- scripts/vis_gt_poses.py | 185 ++++----- scripts/vis_object_symmetries.py | 9 +- setup.py | 9 +- 27 files changed, 421 insertions(+), 838 deletions(-) diff --git a/bop_toolkit_lib/dataset_params.py b/bop_toolkit_lib/dataset_params.py index 99a44149..3949b609 100644 --- a/bop_toolkit_lib/dataset_params.py +++ b/bop_toolkit_lib/dataset_params.py @@ -7,8 +7,6 @@ import glob import os from os.path import join -from collections.abc import Callable -from typing import Union, Dict from bop_toolkit_lib import inout @@ -47,10 +45,6 @@ def get_camera_params(datasets_path, dataset_name, cam_type=None): cam_type = "uw" cam_filename = "camera_{}.json".format(cam_type) - # hot3d does not have a single camera file, raise an exception - elif dataset_name in ['hot3d']: - raise ValueError("BOP dataset {} does not have a global camera file.".format(dataset_name)) - else: cam_filename = "camera.json" @@ -95,8 +89,7 @@ def get_model_params(datasets_path, dataset_name, model_type=None): "hopev2": list(range(1, 29)), "hot3d": list(range(1, 34)), "handal": list(range(1, 41)), - "ipd": [0, 1, 4, 8, 10, 11, 14, 18, 19, 20], - "xyzibd": [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + "industrial": [1,2,3,4,5] }[dataset_name] # ID's of objects with ambiguous views evaluated using the ADI pose error @@ -118,8 +111,7 @@ def get_model_params(datasets_path, dataset_name, model_type=None): "hopev2": [], "hot3d": [1, 2, 3, 5, 22, 24, 25, 29, 30, 32], "handal": [26, 35, 36, 37, 38, 39, 40], - "ipd": [8, 14, 18, 19, 20], - "xyzibd": [1, 2, 5, 8, 9, 11, 12, 16, 17] + "industrial": [1,3,5] }[dataset_name] # T-LESS includes two types of object models, CAD and reconstructed. @@ -185,15 +177,12 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): depth_ext = ".tif" p["im_modalities"] = ["rgb", "depth"] - # for Classic datasets, sensor and modality used for the evaluation is implicit... - p["eval_sensor"] = None + # for Classic datasets, test modality is implicit... p["eval_modality"] = None - # ...and only one set of annotation is present in the dataset + # ...and only one set of annotation is present in the dataset # (e.g. scene_gt.json instead of scene_gt_rgb.json, scene_gt_gray1.json etc.) - sensor_modalities_have_separate_annotations = False - # file extensions for datasets with multiple sensor/modalities options - # has to be set if sensor_modalities_have_separate_annotations is True - exts = None + modalities_have_separate_annotations = False + exts = None # has to be set if modalities_have_separate_annotations is True supported_error_types = ["ad", "add", "adi", "vsd", "mssd", "mspd", "cus", "proj"] @@ -203,7 +192,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (600.90, 1102.35) # Range of camera-object distances. + p["depth_range"] = (600.90, 1102.35) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (0, 0.5 * math.pi) @@ -213,7 +202,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (346.31, 1499.84) # Range of camera-object distances. + p["depth_range"] = (346.31, 1499.84) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (0, 0.5 * math.pi) @@ -249,7 +238,16 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): # The following holds for Primesense, but is similar for the other sensors. if split == "test": - p["depth_range"] = (649.89, 940.04) # Range of camera-object distances. + p["depth_range"] = (649.89, 940.04) + p["azimuth_range"] = (0, 2 * math.pi) + p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) + + elif dataset_name == "industrial": + p["scene_ids"] = [1,2,3,4,5,6,7,8] + p["im_size"] = (640, 480) + + if split == "test": + p["depth_range"] = (300, 950) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) @@ -262,7 +260,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (569.88, 1995.27) # Range of camera-object distances. + p["depth_range"] = (851.29, 2016.14) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-0.4363, 0.5 * math.pi) # (-25, 90) [deg]. @@ -272,7 +270,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (499.57, 1246.07) # Range of camera-object distances. + p["depth_range"] = (499.57, 1246.07) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) @@ -282,7 +280,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (594.41, 739.12) # Range of camera-object distances. + p["depth_range"] = (594.41, 739.12) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) @@ -292,7 +290,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (509.12, 1120.41) # Range of camera-object distances. + p["depth_range"] = (509.12, 1120.41) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (0, 0.5 * math.pi) @@ -302,7 +300,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (454.56, 1076.29) # Range of camera-object distances. + p["depth_range"] = (454.56, 1076.29) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-1.0297, 0.5 * math.pi) # (-59, 90) [deg]. @@ -314,7 +312,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_modalities"] = ["gray", "depth"] if split == "test": - p["depth_range"] = (638.38, 775.97) # Range of camera-object distances. + p["depth_range"] = (638.38, 775.97) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) @@ -346,7 +344,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): # The following holds for Primesense, but is similar for Kinect. if split == "test": - p["depth_range"] = (438.24, 1416.97) # Range of camera-object distances. + p["depth_range"] = (438.24, 1416.97) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) @@ -367,7 +365,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["im_size"] = (640, 480) if split == "test": - p["depth_range"] = (612.92, 1243.59) # Range of camera-object distances. + p["depth_range"] = (612.92, 1243.59) p["azimuth_range"] = (0, 2 * math.pi) p["elev_range"] = (-1.2788, 1.1291) # (-73.27, 64.69) [deg]. @@ -384,7 +382,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): p["depth_range"] = None # Not calculated yet. p["azimuth_range"] = None # Not calculated yet. p["elev_range"] = None # Not calculated yet. - + # HOPEV2. elif dataset_name == "hopev2": p["scene_ids"] = { @@ -415,8 +413,8 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): # HOT3D. elif dataset_name == "hot3d": - sensor_modalities_have_separate_annotations = {"aria": True, "quest3": True} - p["im_modalities"] = {"aria": ["rgb", "gray1", "gray2"], "quest3": ["gray1", "gray2"]} + modalities_have_separate_annotations = True + p["im_modalities"] = ["rgb","gray1","gray2"] p["test_quest3_scene_ids"] = list(range(1288, 1849)) p["test_aria_scene_ids"] = list(range(3365, 3832)) p["train_quest3_scene_ids"] = list(range(0, 1288)) @@ -425,11 +423,8 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None): "test": p["test_quest3_scene_ids"] + p["test_aria_scene_ids"], # test_quest3 + test_aria "train": p["train_quest3_scene_ids"] + p["train_aria_scene_ids"], # train_quest3 + train_aria }[split] - - p["im_size"] = { - "aria" : {"rgb": (1408, 1408), "gray1": (640, 480), "gray2": (640, 480)}, - "quest3" : {"gray1": (1280, 1024), "gray2": (1280, 1024)} - } + p["quest3_im_size"] = {"gray1": (1280, 1024), "gray2": (1280, 1024)} + p["aria_im_size"] = {"rgb": (1408, 1408), "gray1": (640, 480), "gray2": (640, 480)} p["quest3_eval_modality"] = "gray1" p["aria_eval_modality"] = "rgb" @@ -441,20 +436,12 @@ def hot3d_eval_modality(scene_id): else: raise ValueError("scene_id {} not part of hot3d valid scenes".format(scene_id)) - def hot3d_eval_sensor(scene_id): - if scene_id in p["test_quest3_scene_ids"] or scene_id in p["train_quest3_scene_ids"]: - return "quest3" - elif scene_id in p["test_aria_scene_ids"] or scene_id in p["train_aria_scene_ids"]: - return "aria" - else: - raise ValueError("scene_id {} not part of hot3d valid scenes".format(scene_id)) - p["eval_modality"] = hot3d_eval_modality - p["eval_sensor"] = hot3d_eval_sensor exts = { - "aria" : {"rgb": ".jpg", "gray1": ".jpg", "gray2": ".jpg"}, - "quest3": {"gray1": ".jpg", "gray2": ".jpg"} + "rgb": ".jpg", + "gray1": ".jpg", + "gray2": "jpg", } if split == "test": @@ -463,115 +450,6 @@ def hot3d_eval_sensor(scene_id): p["elev_range"] = None # Not calculated yet. supported_error_types = ["ad", "add", "adi", "mssd", "mspd"] - elif dataset_name == "ipd": - sensor_modalities_have_separate_annotations = {"photoneo": False, "cam1" : False, "cam2" : False, "cam3" : False} - p["im_modalities"] = {"photoneo": ["rgb", "depth"], "cam1" : ["rgb", "aolp", "dolp", "depth"], - "cam2" : ["rgb", "aolp", "dolp", "depth"], "cam3" : ["rgb", "aolp", "dolp", "depth"]} - p["scene_ids"] = { - "test": list(range(15)), - "train": list(range(10)), - "val": list(range(15)), - }[split] - - p["im_size"] = { - "photoneo" : (2064, 1544), - "cam1" : (3840, 2160), - "cam2": (3840, 2160), - "cam3": (3840, 2160), - "": (2400, 2400), - } - - p["eval_modality"] = "rgb" - p["eval_sensor"] = "photoneo" - - exts = { - "photoneo": {"rgb": ".png", "depth": ".png"}, - "cam1": {"rgb": ".png", "depth": ".png", "aolp": ".png", "dolp": ".png"}, - "cam2": {"rgb": ".png", "depth": ".png", "aolp": ".png", "dolp": ".png"}, - "cam3": {"rgb": ".png", "depth": ".png", "aolp": ".png", "dolp": ".png"}, - } - - if split == "test": - p["depth_range"] = None # Not calculated yet. - p["azimuth_range"] = None # Not calculated yet. - p["elev_range"] = None # Not calculated yet. - - supported_error_types = ["ad", "add", "adi", "mssd", "mspd"] - - elif dataset_name == "xyzibd": - sensor_modalities_have_separate_annotations = {"photoneo": False, "xyz": False, "realsense": False} - p["im_modalities"] = {"photoneo": ["gray", "depth"], "xyz": ["gray", "depth"], "realsense": ["rgb", "depth"]} - val_scene_ids = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 54, 60, 65, 70] - p["scene_ids"] = { - "test": [i for i in range(1, 75) if i not in val_scene_ids], - "val": val_scene_ids, - "train": list(range(45)), - }[split] - - p["im_size"] = { - "xyz": (1440, 1080), - "realsense": (1280, 720), - "photoneo": (2064, 1544), - "": (1440, 1080), - } - - p["eval_modality"] = "gray" - p["eval_sensor"] = "xyz" - - if "pbr" == split_type: - # The PBR data is in classical BOP format without sensor names. - p["eval_modality"] = None - p["eval_sensor"] = None - sensor_modalities_have_separate_annotations = False - - exts = { - "photoneo": {"gray": ".png", "depth": ".png"}, - "xyz": {"gray": ".png", "depth": ".png"}, - "realsense": {"rgb": ".png", "depth": ".png"}, - } - - if split == "test": - p["depth_range"] = None # Not calculated yet. - p["azimuth_range"] = None # Not calculated yet. - p["elev_range"] = None # Not calculated yet. - - supported_error_types = ["ad", "add", "adi", "mssd", "mspd"] - elif dataset_name == "itoddmv": - sensor_modalities_have_separate_annotations = {"3d1": False, "cam0": False, "cam1": False, "cam2": False} - p["im_modalities"] = {"3dlong": ["gray", "depth"], "cam0": ["gray"], "cam1": ["gray"], "cam2": ["gray"]} - p["scene_ids"] = { - "test": [1], - "train": list(range(50)), - }[split] - - p["im_size"] = { - "3dlong": (1280, 960), - "cam0": (4224, 2838), - "cam1": (4224, 2838), - "cam2": (4224, 2838), - "": (1280, 960), - } - - p["eval_modality"] = "gray" - p["eval_sensor"] = "3dlong" - - if "pbr" == split_type: - # The PBR data is in classical BOP format without sensor names. - p["eval_modality"] = None - p["eval_sensor"] = None - sensor_modalities_have_separate_annotations = False - - exts = { - "3dlong": {"gray": ".tif", "depth": ".tif"}, - "cam0": {"gray": ".tif"}, - } - - if split == "test": - p["depth_range"] = (638.38, 775.97) # Range of camera-object distances. - p["azimuth_range"] = (0, 2 * math.pi) - p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi) - - supported_error_types = ["ad", "add", "adi", "mssd", "mspd"] else: raise ValueError("Unknown BOP dataset ({}).".format(dataset_name)) @@ -579,17 +457,14 @@ def hot3d_eval_sensor(scene_id): base_path = join(datasets_path, dataset_name) split_path = join(base_path, split) if split_type is not None: - if split_type == "pbr" and dataset_name != "xyzibd": + if split_type == "pbr": p["scene_ids"] = list(range(50)) split_path += "_" + split_type # Path to the split directory. p["split_path"] = split_path p["supported_error_types"] = supported_error_types - - # For classic BOP format datasets with one gt file per folder - classic_bop_format = type(p["im_modalities"]) is list - if classic_bop_format: + if not modalities_have_separate_annotations: p.update( { # Path template to a gray image. @@ -636,154 +511,90 @@ def hot3d_eval_sensor(scene_id): else: assert exts is not None, "Need to set 'exts' for dataset {}".format() - # im_modalities is a dict from sensor to modalities - for sensor, modalities in p["im_modalities"].items(): - for modality in modalities: - # If modalities have aligned extrinsics/intrinsics they are combined in one file - gt_file_suffix = sensor - # If modalities have separate extrinsics/intrinsics they are accessed by unique modalities (compatible with hot3d) - if sensor_modalities_have_separate_annotations[sensor]: - gt_file_suffix = modality - - # Path template to modality image. - if dataset_name == "hot3d": - p[f"{modality}_{sensor}_tpath"] = join( - split_path, "{scene_id:06d}", f"{modality}", "{im_id:06d}" + exts[sensor][modality] - ) - else: - p[f"{modality}_{sensor}_tpath"] = join( - split_path, "{scene_id:06d}", f"{modality}_{sensor}", "{im_id:06d}" + exts[sensor][modality] - ) - p.update( - { - # Path template to a file with per-image camera parameters. - "scene_camera_{}_{}_tpath".format(modality, sensor): join( - split_path, "{scene_id:06d}", "scene_camera_{}.json".format(gt_file_suffix) - ), - # Path template to a file with GT annotations. - "scene_gt_{}_{}_tpath".format(modality, sensor): join( - split_path, "{scene_id:06d}", "scene_gt_{}.json".format(gt_file_suffix) - ), - # Path template to a file with meta information about the GT annotations. - "scene_gt_info_{}_{}_tpath".format(modality, sensor): join( - split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(gt_file_suffix) - ), - # Path template to a file with the coco GT annotations. - "scene_gt_coco_{}_{}_tpath".format(modality, sensor): join( - split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(gt_file_suffix) - ), - # Path template to a mask of the full object silhouette. - "mask_{}_{}_tpath".format(modality, sensor): join( - split_path, "{scene_id:06d}", "mask_{}".format(gt_file_suffix), "{im_id:06d}_{gt_id:06d}.png" - ), - # Path template to a mask of the visible part of an object silhouette. - "mask_visib_{}_{}_tpath".format(modality, sensor): join( - split_path, - "{scene_id:06d}", - "mask_visib_{}".format(gt_file_suffix), - "{im_id:06d}_{gt_id:06d}.png", - ), - } - ) + for moda in p["im_modalities"]: + p.update( + { + # Path template to modality image. + "{}_tpath".format(moda): join( + split_path, "{scene_id:06d}", moda, "{im_id:06d}" + exts[moda] + ), + # Path template to a file with per-image camera parameters. + "scene_camera_{}_tpath".format(moda): join( + split_path, "{scene_id:06d}", "scene_camera_{}.json".format(moda) + ), + # Path template to a file with GT annotations. + "scene_gt_{}_tpath".format(moda): join( + split_path, "{scene_id:06d}", "scene_gt_{}.json".format(moda) + ), + # Path template to a file with meta information about the GT annotations. + "scene_gt_info_{}_tpath".format(moda): join( + split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(moda) + ), + # Path template to a file with the coco GT annotations. + "scene_gt_coco_{}_tpath".format(moda): join( + split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(moda) + ), + # Path template to a mask of the full object silhouette. + "mask_{}_tpath".format(moda): join( + split_path, "{scene_id:06d}", "mask_{}".format(moda), "{im_id:06d}_{gt_id:06d}.png" + ), + # Path template to a mask of the visible part of an object silhouette. + "mask_visib_{}_tpath".format(moda): join( + split_path, + "{scene_id:06d}", + "mask_visib_{}".format(moda), + "{im_id:06d}_{gt_id:06d}.png", + ), + } + ) return p -def get_scene_sensor_or_modality( - sm: Union[None, str, Callable], - scene_id: Union[None, int] - ) -> Union[None,str]: - """ - Get sensor|modality associated with a given scene. - - Some datasets (hot3d) have different sensor|modality available depending on the scene. - Same logic for sensor or modality. - """ - if sm is None or isinstance(sm, str): - return sm - elif callable(sm): - return sm(scene_id) - else: - raise TypeError(f"Sensor or modality {sm} should be either None, str or callable, not {type(sm)}") - - -def scene_tpaths_keys( - modality: Union[None, str, Callable], - sensor: Union[None, str, Callable], - scene_id: Union[None, int] = None - ) -> Dict[str,str]: +def scene_tpaths_keys(eval_modality, scene_id=None): """ Define keys corresponding template path defined in get_split_params output. - + Definition for scene gt, scene gt info and scene camera. - - Classic datasets (handal and hopev2 included): "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath", etc. - - hot3d and Industrial datasets: same tpath keys with modality and sensor, - e.g. "scene_gt_{modality}_{sensor}_tpath", "scene_gt_info_{modality}_{sensor}_tpath", - "scene_camera_{modality}_{sensor}_tpath", etc. - Modality|sensor may be the same for the whole dataset split (defined as a `str`), - or vary scene by scene (defined as function). - - :param modality: None, str or callable - :param sensor: None, str or callable - :param scene_id: None or int, should be specified if eval modality|sensor - changes from scene to scene + - Classic datasets: "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath" + - H3 datasets: with separate annotations for modalities, e.g. "scene_gt_{modality}_tpath", + "scene_gt_info_{modality}_tpath", "scene_camera_{modality}_tpath", etc. + Modality may be the same for the whole dataset split (defined as a `str`), + or vary scene by scene (defined as function or a dictionary) + + :param eval_modality: None, str, callable or ditc, defines + :param scene_id: None or int, should be specified if eval modality + changes from scene to scen :return: scene tpath keys dictionary """ - scene_sensor = get_scene_sensor_or_modality(sensor, scene_id) - scene_modality = get_scene_sensor_or_modality(modality, scene_id) - - # 2 valid combinations: - # - modality and sensor are None -> BOP classic format - # - modality and sensor are not None -> hot3d + BOP industrial format - assert ((scene_modality is None and scene_sensor is None) or (scene_modality is not None and scene_sensor is not None)), f"scene_modality={scene_modality}, scene_sensor={scene_sensor}" - - # "rgb_tpath" refers to the template path key of the given modality|sensor pair tpath_keys = [ - "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath", - "scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath", "rgb_tpath" + "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath", + "scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath" ] tpath_keys_multi = [ - "scene_gt_{}_{}_tpath", "scene_gt_info_{}_{}_tpath", "scene_camera_{}_{}_tpath", - "scene_gt_coco_{}_{}_tpath", "mask_{}_{}_tpath", "mask_visib_{}_{}_tpath", "{}_{}_tpath" + "scene_gt_{}_tpath", "scene_gt_info_{}_tpath", "scene_camera_{}_tpath", + "scene_gt_coco_{}_tpath", "mask_{}_tpath", "mask_visib_{}_tpath" ] - assert len(tpath_keys) == len(tpath_keys_multi) + assert len(tpath_keys) == len(tpath_keys_multi) tpath_keys_dic = {} for key, key_multi in zip(tpath_keys, tpath_keys_multi): - if scene_sensor is None: - # BOP-Classic filenames + if eval_modality is None: + # Classic filenames tpath_keys_dic[key] = key + elif isinstance(eval_modality, str): + tpath_keys_dic[key] = key_multi.format(eval_modality) + elif callable(eval_modality) and scene_id is not None: + tpath_keys_dic[key] = key_multi.format(eval_modality(scene_id)) + elif isinstance(eval_modality, dict) and scene_id is not None: + tpath_keys_dic[key] = key_multi.format(eval_modality[scene_id]) else: - tpath_keys_dic[key] = key_multi.format(scene_modality, scene_sensor) - - tpath_keys_dic["depth_tpath"] = tpath_keys_dic["rgb_tpath"].replace("rgb","depth").replace("gray","depth") + raise ValueError("eval_modality type not supported, either None, str, callable or dictionary") + return tpath_keys_dic -def sensor_has_modality(dp_split: Dict, sensor: str, modality: str): - if isinstance(dp_split["im_modalities"], list): - return modality in dp_split["im_modalities"] - else: - return modality in dp_split["im_modalities"][sensor] - - -def get_im_size(dp_split: Dict, modality: str, sensor: str): - """ - Conveniance function to retrieve the image size of a modality|sensor pair. - """ - if isinstance(dp_split["im_size"], dict): - if isinstance(dp_split["im_size"][sensor], dict): - # hot3d - return dp_split["im_size"][sensor][modality] - else: - # BOP Industrial - return dp_split["im_size"][sensor] - # BOP Classic: one image size for the whole dataset - else: - return dp_split["im_size"] - - def get_present_scene_ids(dp_split): """Returns ID's of scenes present in the specified dataset split. diff --git a/bop_toolkit_lib/inout.py b/bop_toolkit_lib/inout.py index 7567624e..e65cb98a 100644 --- a/bop_toolkit_lib/inout.py +++ b/bop_toolkit_lib/inout.py @@ -4,7 +4,6 @@ """I/O functions.""" import os -import gzip import struct import numpy as np import imageio @@ -69,65 +68,51 @@ def save_depth(path, im): def load_json(path, keys_to_int=False): """Loads content of a JSON file. - :param path: Path to the JSON file. If ".json.gz" extension, opens with gzip. + :param path: Path to the JSON file. :return: Content of the loaded JSON file. """ # Keys to integers. def convert_keys_to_int(x): return {int(k) if k.lstrip("-").isdigit() else k: v for k, v in x.items()} - - # Open+decompress with gzip if ".json.gz" file extension - if path.endswith('.json.gz'): - f = gzip.open(path, "rt", encoding="utf8") - else: - f = open(path, "r") - if keys_to_int: - content = json.load(f, object_hook=lambda x: convert_keys_to_int(x)) - else: - content = json.load(f) - f.close() + with open(path, "r") as f: + if keys_to_int: + content = json.load(f, object_hook=lambda x: convert_keys_to_int(x)) + else: + content = json.load(f) return content -def save_json(path, content, compress=False): +def save_json(path, content): """Saves the provided content to a JSON file. :param path: Path to the output JSON file. :param content: Dictionary/list to save. - :param compress: Saves as a gzip archive, appends ".gz" extension to filepath. """ - if compress: - path += ".gz" - f = gzip.open(path, "wt", encoding="utf8") - else: - f = open(path, "w") - - if isinstance(content, dict): - f.write("{\n") - content_sorted = sorted(content.items(), key=lambda x: x[0]) - for elem_id, (k, v) in enumerate(content_sorted): - f.write(' "{}": {}'.format(k, json.dumps(v, sort_keys=True))) - if elem_id != len(content) - 1: - f.write(",") - f.write("\n") - f.write("}") - - elif isinstance(content, list): - f.write("[\n") - for elem_id, elem in enumerate(content): - f.write(" {}".format(json.dumps(elem, sort_keys=True))) - if elem_id != len(content) - 1: - f.write(",") - f.write("\n") - f.write("]") + with open(path, "w") as f: + if isinstance(content, dict): + f.write("{\n") + content_sorted = sorted(content.items(), key=lambda x: x[0]) + for elem_id, (k, v) in enumerate(content_sorted): + f.write(' "{}": {}'.format(k, json.dumps(v, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(",") + f.write("\n") + f.write("}") + + elif isinstance(content, list): + f.write("[\n") + for elem_id, elem in enumerate(content): + f.write(" {}".format(json.dumps(elem, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(",") + f.write("\n") + f.write("]") - else: - json.dump(content, f, sort_keys=True) - - f.close() + else: + json.dump(content, f, sort_keys=True) def load_cam_params(path): @@ -434,7 +419,7 @@ def check_bop_results(path, version="bop19"): def check_coco_results(path, version="bop22", ann_type="segm", enforce_no_segm_if_bbox=False): """Checks if the format of extended COCO results is correct. - :param path: Path to a file with coco estimates. If ".json.gz" extension, opens with gzip. + :param path: Path to a file with coco estimates. :param version: Version of the results. :param ann_type: type of annotation expected in the file. "bbox" -> bounding boxes @@ -485,7 +470,7 @@ def check_coco_results(path, version="bop22", ann_type="segm", enforce_no_segm_i return check_passed, check_msg -def save_coco_results(path, results, version="bop22", compress=False): +def save_coco_results(path, results, version="bop22"): """Saves detections/instance segmentations for each scene in coco format. "bbox" should be [x,y,w,h] in pixels @@ -496,6 +481,7 @@ def save_coco_results(path, results, version="bop22", compress=False): :param version: Version of the results. """ + # See docs/bop_challenge_2022.md for details. if version == "bop22": coco_results = [] for res in results: @@ -512,7 +498,7 @@ def save_coco_results(path, results, version="bop22", compress=False): "time": res["run_time"] if "run_time" in res else -1, } ) - save_json(path, coco_results, compress) + save_json(path, coco_results) else: raise ValueError("Unknown version of BOP detection results.") @@ -621,7 +607,6 @@ def load_ply(path): "float": ("f", 4), "double": ("d", 8), "int": ("i", 4), - "uint": ("I", 4), "uchar": ("B", 1), } @@ -858,19 +843,7 @@ def save_ply2( def get_im_targets(im_gt, im_gt_info, visib_gt_min, eval_mode="localization"): - """ - From an image gt and gt info, given a minimum visibility, get valid object evaluation targets. - - Output format: dict[obj_id] - { - : {'inst_count': }, - : {'inst_count': }, - ... - } - """ im_targets = {} - # Objects gt detection are have gt and gt_info have same order. - # object id is retrieved from gt and visibility from gt info. for gt_id, gt in enumerate(im_gt): gt_info = im_gt_info[gt_id] obj_id = gt["obj_id"] @@ -883,4 +856,4 @@ def get_im_targets(im_gt, im_gt_info, visib_gt_min, eval_mode="localization"): if obj_id not in im_targets: im_targets[obj_id] = {"inst_count": 0} im_targets[obj_id]["inst_count"] += 1 - return im_targets + return im_targets \ No newline at end of file diff --git a/bop_toolkit_lib/misc.py b/bop_toolkit_lib/misc.py index 97e1d54a..24b1f5f6 100644 --- a/bop_toolkit_lib/misc.py +++ b/bop_toolkit_lib/misc.py @@ -131,6 +131,7 @@ def precompute_lazy(depth_im, K): :return: hxw ndarray (Xs/depth_im, Ys/depth_im) """ if depth_im.shape != Precomputer.depth_im_shape: + Precomputer.depth_im_shape = depth_im.shape Precomputer.xs, Precomputer.ys = np.meshgrid( np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0]) ) @@ -142,7 +143,6 @@ def precompute_lazy(depth_im, K): Precomputer.pre_Xs = (Precomputer.xs - K[0, 2]) / np.float64(K[0, 0]) Precomputer.pre_Ys = (Precomputer.ys - K[1, 2]) / np.float64(K[1, 1]) - Precomputer.depth_im_shape = depth_im.shape return Precomputer.pre_Xs, Precomputer.pre_Ys diff --git a/bop_toolkit_lib/renderer_batch.py b/bop_toolkit_lib/renderer_batch.py index dab36e72..ed87e83b 100644 --- a/bop_toolkit_lib/renderer_batch.py +++ b/bop_toolkit_lib/renderer_batch.py @@ -96,7 +96,7 @@ def run_vsd(self, all_im_errs): for worker_id in range(num_workers_used): cmd = [ "python", - "bop_toolkit_lib/call_vsd_worker.py", + "external/bop_toolkit/bop_toolkit_lib/call_vsd_worker.py", f"--input_dir={self.tmp_dir}", f"--worker_id={worker_id}", ] diff --git a/bop_toolkit_lib/tests/eval_bop22_coco_test.py b/bop_toolkit_lib/tests/eval_bop22_coco_test.py index b552a31e..172c3b26 100644 --- a/bop_toolkit_lib/tests/eval_bop22_coco_test.py +++ b/bop_toolkit_lib/tests/eval_bop22_coco_test.py @@ -5,20 +5,20 @@ from tqdm import tqdm from bop_toolkit_lib import inout +# EPS_AP = 0.001 -# Define path to directories -RESULT_PATH = "./bop_toolkit_lib/tests/data/" -EVAL_PATH = "./bop_toolkit_lib/tests/eval/" -LOGS_PATH = "./bop_toolkit_lib/tests/logs" -os.makedirs(EVAL_PATH, exist_ok=True) -os.makedirs(LOGS_PATH, exist_ok=True) +# Define the input directory +INPUT_DIR = "./bop_toolkit_lib/tests/data/" + +# Define the output directory +OUTPUT_DIR = "./bop_toolkit_lib/tests/logs" +os.makedirs(OUTPUT_DIR, exist_ok=True) # Define the dataset dictionary -# tuples: (submission name, annotation type, compressed) FILE_DICTIONARY = { - "ycbv_zebra_segm": ("zebraposesat-effnetb4_ycbv-test_5ed0eecc-96f8-498b-9438-d586d4d92528", "segm", False), - "ycbv_gdrnppdet_bbox": ("gdrnppdet-pbrreal_ycbv-test_abe6c5f1-cb26-4bbd-addc-bb76dd722a96", "bbox", True), + "ycbv_zebra_segm": ("zebraposesat-effnetb4_ycbv-test_5ed0eecc-96f8-498b-9438-d586d4d92528", "segm"), + "ycbv_gdrnppdet_bbox": ("gdrnppdet-pbrreal_ycbv-test_abe6c5f1-cb26-4bbd-addc-bb76dd722a96", "bbox"), } # From BOP website @@ -57,24 +57,22 @@ } # Loop through each entry in the dictionary and execute the command -for dataset_method_name, (sub_name, ann_type, compressed) in tqdm( +for dataset_method_name, (sub_name, ann_type) in tqdm( FILE_DICTIONARY.items(), desc="Executing..." ): - ext = ".json.gz" if compressed else ".json" - result_filename = sub_name + ext command = [ "python", "scripts/eval_bop22_coco.py", - "--results_path", RESULT_PATH, - "--eval_path", EVAL_PATH, - "--result_filenames", result_filename, + "--results_path", INPUT_DIR, + "--eval_path", INPUT_DIR, + "--result_filenames", sub_name+".json", "--bbox_type", "amodal", "--ann_type", ann_type ] command_ = " ".join(command) print(f"Executing: {command_}") start_time = time.time() - log_file_path = f"{LOGS_PATH}/eval_bop22_coco_test_{dataset_method_name}.txt" + log_file_path = f"{OUTPUT_DIR}/eval_bop22_coco_test_{dataset_method_name}.txt" with open(log_file_path, "a") as output_file: subprocess.run(command, stdout=output_file, stderr=subprocess.STDOUT) end_time = time.time() @@ -84,10 +82,11 @@ # Check scores for each dataset -for sub_short_name, (sub_name, ann_type, compressed) in tqdm(FILE_DICTIONARY.items(), desc="Verifying..."): +for sub_short_name, (sub_name, ann_type) in tqdm(FILE_DICTIONARY.items(), desc="Verifying..."): if sub_short_name in EXPECTED_OUTPUT: + ann_type = FILE_DICTIONARY[sub_short_name][1] eval_filename = f"scores_bop22_coco_{ann_type}.json" - eval_file_path = os.path.join(RESULT_PATH, sub_name, eval_filename) + eval_file_path = os.path.join(INPUT_DIR, sub_name, eval_filename) eval_scores = inout.load_json(eval_file_path) for key, expected_score in EXPECTED_OUTPUT[sub_short_name].items(): eval_score = eval_scores.get(key) diff --git a/bop_toolkit_lib/tests/test_misc.py b/bop_toolkit_lib/tests/test_misc.py index fbaa2af1..da9cda11 100644 --- a/bop_toolkit_lib/tests/test_misc.py +++ b/bop_toolkit_lib/tests/test_misc.py @@ -91,54 +91,6 @@ def test_project_pts(self): proj_htt[i] = pose_error_htt.project_pts_htt(self.pts, camera, R_np[i], t_np[i]) self.assertTrue(np.allclose(proj_htt, proj_np, atol=1e-4)) - def test_precomputer(self): - - # precomputer static class start with None attributes - self.assertTrue(misc.Precomputer.xs is None) - self.assertTrue(misc.Precomputer.ys is None) - self.assertTrue(misc.Precomputer.pre_Xs is None) - self.assertTrue(misc.Precomputer.pre_Ys is None) - self.assertTrue(misc.Precomputer.K is None) - - Ka = np.eye(3) - depth_ima = np.ones((10,10)) - - pre_Xs1, pre_Ys1 = misc.Precomputer.precompute_lazy(depth_ima, Ka) - self.assertEqual(depth_ima.shape, pre_Xs1.shape) - self.assertEqual(depth_ima.shape, pre_Ys1.shape) - - # same inputs should return the same internal objects - pre_Xs1_bis, pre_Ys1_bis = misc.Precomputer.precompute_lazy(depth_ima, Ka) - self.assertEqual(id(pre_Xs1), id(pre_Xs1_bis)) - self.assertEqual(id(pre_Ys1), id(pre_Ys1_bis)) - self.assertTrue(np.allclose(pre_Xs1, pre_Xs1_bis, atol=1e-9)) - self.assertTrue(np.allclose(pre_Ys1, pre_Ys1_bis, atol=1e-9)) - - # different intrinsics should trigger recomputation - Kb = 2*np.eye(3) - pre_Xs2, pre_Ys2 = misc.Precomputer.precompute_lazy(depth_ima, Kb) - self.assertNotEqual(id(pre_Xs1), id(pre_Xs2)) - self.assertNotEqual(id(pre_Ys1), id(pre_Ys2)) - self.assertFalse(np.allclose(pre_Xs1, pre_Xs2, atol=1e-9)) - self.assertFalse(np.allclose(pre_Ys1, pre_Ys2, atol=1e-9)) - - # different depth image should trigger recomputation - depth_imb = np.ones((20,20)) - pre_Xs3, pre_Ys3 = misc.Precomputer.precompute_lazy(depth_imb, Kb) - self.assertNotEqual(id(pre_Xs2), id(pre_Xs3)) - self.assertNotEqual(id(pre_Ys2), id(pre_Ys3)) - self.assertNotEqual(pre_Xs2.shape, pre_Xs3.shape) - self.assertNotEqual(pre_Ys2.shape, pre_Ys3.shape) - - # different intrinsics and depth image should trigger recomputation - Kc = 3*np.eye(3) - depth_imc = np.ones((30,30)) - pre_Xs4, pre_Ys4 = misc.Precomputer.precompute_lazy(depth_imc, Kc) - self.assertNotEqual(id(pre_Xs3), id(pre_Xs4)) - self.assertNotEqual(id(pre_Ys3), id(pre_Ys4)) - self.assertNotEqual(pre_Xs3.shape, pre_Xs4.shape) - self.assertNotEqual(pre_Ys3.shape, pre_Ys4.shape) - if __name__ == "__main__": unittest.main() diff --git a/bop_toolkit_lib/visualization.py b/bop_toolkit_lib/visualization.py index 3ffbedbf..e322b843 100644 --- a/bop_toolkit_lib/visualization.py +++ b/bop_toolkit_lib/visualization.py @@ -268,8 +268,6 @@ def vis_object_poses( {"name": "min diff", "fmt": ":.3f", "val": np.min(depth_diff_valid)}, {"name": "max diff", "fmt": ":.3f", "val": np.max(depth_diff_valid)}, {"name": "mean diff", "fmt": ":.3f", "val": np.mean(depth_diff_valid)}, - {"name": "median diff", "fmt": ":.3f", "val": np.median(np.abs(depth_diff_valid))}, - {"name": "25 percentile", "fmt": ":.3f", "val": np.percentile(np.abs(depth_diff_valid), 25)}, ] depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) inout.save_im(vis_depth_diff_path, depth_diff_vis) diff --git a/docs/bop_datasets_format.md b/docs/bop_datasets_format.md index 5af2fda1..7ffe56b3 100644 --- a/docs/bop_datasets_format.md +++ b/docs/bop_datasets_format.md @@ -22,7 +22,6 @@ DATASET_NAME │ │ ├─ scene_camera.json │ │ ├─ scene_gt.json │ │ ├─ scene_gt_info.json -│ │ ├─ scene_gt_coco.json │ │ ├─ depth │ │ ├─ mask │ │ ├─ mask_visib diff --git a/requirements.txt b/requirements.txt index 0f82ac91..f94fbf91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,12 +3,12 @@ kiwisolver==1.3.1 matplotlib==2.2.4 imageio==2.5.0 pypng==0.0.19 -Cython>=0.29.24 +Cython==0.29.24 PyOpenGL==3.1.0 triangle>=20190115.2 glumpy==1.1.0 opencv-python>=4.3.0.36 -Pillow>=8.2.0,<=9.5.0 +Pillow>=8.2.0 git+https://github.com/MartinSmeyer/cocoapi.git@v1.0#subdirectory=PythonAPI vispy>=0.6.5 webdataset>=0.1.62 diff --git a/scripts/calc_gt_coco.py b/scripts/calc_gt_coco.py index 5a1fbec8..1667a6f2 100644 --- a/scripts/calc_gt_coco.py +++ b/scripts/calc_gt_coco.py @@ -19,9 +19,9 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "xyzibd", + "dataset": "tudl", # Dataset split. Options: 'train', 'test'. - "dataset_split": "test", + "dataset_split": "train", # Dataset split type. Options: 'synt', 'real', None = default. See dataset_params.py for options. "dataset_split_type": None, # bbox type. Options: 'modal', 'amodal'. @@ -64,13 +64,11 @@ "version": "0.1.0", "year": datetime.date.today().year, "contributor": "", - "date_created": datetime.datetime.now(datetime.timezone.utc).isoformat(" "), + "date_created": datetime.datetime.utcnow().isoformat(" "), } for scene_id in dp_split["scene_ids"]: - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) - scene_modality = dataset_params.get_scene_sensor_or_modality(dp_split["eval_modality"], scene_id) - scene_sensor = dataset_params.get_scene_sensor_or_modality(dp_split["eval_sensor"], scene_id) + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) segmentation_id = 1 @@ -104,9 +102,12 @@ for scene_view, inst_list in scene_gt.items(): im_id = int(scene_view) - img_path = dp_split[tpath_keys["rgb_tpath"]].format(scene_id=scene_id, im_id=im_id) + img_path = dp_split["rgb_tpath"].format(scene_id=scene_id, im_id=im_id) relative_img_path = os.path.relpath(img_path, os.path.dirname(coco_gt_path)) - im_size = dataset_params.get_im_size(dp_split, scene_modality, scene_sensor) + if 'cam_model' in scene_camera[im_id]: + im_size = scene_camera[im_id]["cam_model"]["image_width"], scene_camera[im_id]["cam_model"]["image_height"] + else: + im_size = dp_split["im_size"] image_info = pycoco_utils.create_image_info( im_id, relative_img_path, im_size ) diff --git a/scripts/calc_gt_distribution.py b/scripts/calc_gt_distribution.py index 13ff9841..c4d53782 100644 --- a/scripts/calc_gt_distribution.py +++ b/scripts/calc_gt_distribution.py @@ -2,7 +2,6 @@ # Center for Machine Perception, Czech Technical University in Prague """Calculates distribution of GT poses.""" -import os import math import numpy as np import matplotlib.pyplot as plt @@ -17,23 +16,13 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "ycbv", + "dataset": "lm", # Dataset split. Options: 'train', 'val', 'test'. "dataset_split": "test", # Dataset split type. None = default. See dataset_params.py for options. "dataset_split_type": None, # Folder containing the BOP datasets. "datasets_path": config.datasets_path, - # Modality used to compute gt statistics, defaults to eval modality - "modality": None, - # Sensor used to compute gt statistics, defaults to eval sensor - "sensor": None, - # Folder for output visualisations. - "vis_path": os.path.join(config.output_path, "gt_distribution"), - # Save plots in "vis_path" - "save_plots": True, - # Show plots" - "show_plots": True, } ################################################################################ @@ -43,34 +32,30 @@ p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"] ) -if p["modality"] is None: - p["modality"] = dp_split["eval_modality"] -if p["sensor"] is None: - p["sensor"] = dp_split["eval_sensor"] - scene_ids = dp_split["scene_ids"] dists = [] azimuths = [] elevs = [] visib_fracts = [] ims_count = 0 - for scene_id in scene_ids: - tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id) - - misc.log(f"Processing - dataset: {p['dataset']} ({p['dataset_split']}, {p['dataset_split_type']}), scene: {scene_id}") + misc.log( + "Processing - dataset: {} ({}, {}), scene: {}".format( + p["dataset"], p["dataset_split"], p["dataset_split_type"], scene_id + ) + ) # Load GT poses. - scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id) - scene_gt = inout.load_scene_gt(scene_gt_path) - + scene_gt = inout.load_scene_gt(dp_split["scene_gt_tpath"].format(scene_id=scene_id)) + # Load info about the GT poses. - scene_gt_info_path = dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id) - scene_gt_info = inout.load_json(scene_gt_info_path, keys_to_int=True) + scene_gt_info = inout.load_json( + dp_split["scene_gt_info_tpath"].format(scene_id=scene_id), keys_to_int=True + ) ims_count += len(scene_gt) - for im_id in scene_gt: + for im_id in scene_gt.keys(): for gt_id, im_gt in enumerate(scene_gt[im_id]): # Object distance. dist = np.linalg.norm(im_gt["cam_t_m2c"]) @@ -102,10 +87,6 @@ ) misc.log("Number of images: " + str(ims_count)) -if ims_count == 0: - misc.log("No ground truth found.") - exit() - misc.log("Min dist: {}".format(np.min(dists))) misc.log("Max dist: {}".format(np.max(dists))) misc.log("Mean dist: {}".format(np.mean(dists))) @@ -122,44 +103,21 @@ misc.log("Max visib fract: {}".format(np.max(visib_fracts))) misc.log("Mean visib fract: {}".format(np.mean(visib_fracts))) -prefix = f"{p['modality']}_{p['sensor']}_" if isinstance(p["modality"], str) else "" # Visualize distributions. -if p["save_plots"]: - save_dir = os.path.join(p["vis_path"], p["dataset"]) - misc.log(f"Saving plots in {save_dir}") - misc.ensure_dir(save_dir) - plt.figure() plt.hist(dists, bins=100) plt.title("Object distance") -if p["save_plots"]: - path = os.path.join(save_dir, f"{prefix}object_distance.png") - misc.log(f"Saving {path}") - plt.savefig(path) plt.figure() plt.hist(azimuths, bins=100) plt.title("Azimuth") -if p["save_plots"]: - path = os.path.join(save_dir, f"{prefix}azimuth.png") - misc.log(f"Saving {path}") - plt.savefig(path) plt.figure() plt.hist(elevs, bins=100) plt.title("Elevation") -if p["save_plots"]: - path = os.path.join(save_dir, f"{prefix}elevation.png") - misc.log(f"Saving {path}") - plt.savefig(path) plt.figure() plt.hist(visib_fracts, bins=100) plt.title("Visibility fraction") -if p["save_plots"]: - path = os.path.join(save_dir, f"{prefix}visibility_fraction.png") - misc.log(f"Saving {path}") - plt.savefig(path) -if p["show_plots"]: - plt.show() +plt.show() diff --git a/scripts/calc_gt_info.py b/scripts/calc_gt_info.py index db3e0f2b..23b65db2 100644 --- a/scripts/calc_gt_info.py +++ b/scripts/calc_gt_info.py @@ -24,7 +24,7 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "xyzibd", + "dataset": "lm", # Dataset split. Options: 'train', 'val', 'test'. "dataset_split": "test", # Dataset split type. None = default. See dataset_params.py for options. @@ -37,10 +37,6 @@ "renderer_type": "vispy", # Options: 'vispy', 'cpp', 'python'. # Folder containing the BOP datasets. "datasets_path": config.datasets_path, - # which modality to compute masks on, default to eval modality - "modality": "rgb", - # which sensor to compute masks on, default to eval sensor - "sensor": "realsense", # Path template for output images with object masks. "vis_mask_visib_tpath": os.path.join( config.output_path, @@ -62,10 +58,6 @@ dp_split = dataset_params.get_split_params( p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"] ) -if p["modality"] is None: - p["modality"] = dp_split["eval_modality"] -if p["sensor"] is None: - p["sensor"] = dp_split["eval_sensor"] model_type = None if p["dataset"] == "tless": @@ -76,10 +68,7 @@ misc.log("Initializing renderer...") # The renderer has a larger canvas for generation of masks of truncated objects. -if isinstance(dp_split["im_size"], dict): - im_width, im_height = dp_split["im_size"][p["sensor"]] -else: # classical BOP format - im_width, im_height = dp_split["im_size"] +im_width, im_height = dp_split["im_size"] ren_width, ren_height = 3 * im_width, 3 * im_height ren_cx_offset, ren_cy_offset = im_width, im_height ren = renderer.create_renderer(ren_width, ren_height, p["renderer_type"], mode="depth") @@ -90,13 +79,11 @@ scene_ids = dataset_params.get_present_scene_ids(dp_split) for scene_id in scene_ids: - tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id) - - # Load scene GT. - scene_camera_path = dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id) - scene_camera = inout.load_scene_camera(scene_camera_path) - scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id) - scene_gt = inout.load_scene_gt(scene_gt_path) + # Load scene info and ground-truth poses. + scene_camera = inout.load_scene_camera( + dp_split["scene_camera_tpath"].format(scene_id=scene_id) + ) + scene_gt = inout.load_scene_gt(dp_split["scene_gt_tpath"].format(scene_id=scene_id)) scene_gt_info = {} im_ids = sorted(scene_gt.keys()) @@ -113,7 +100,7 @@ ) # Load depth image. - depth_fpath = dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id) + depth_fpath = dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id) if not os.path.exists(depth_fpath): depth_fpath = depth_fpath.replace(".tif", ".png") depth = inout.load_depth(depth_fpath) @@ -221,6 +208,6 @@ inout.save_im(vis_path, vis) # Save the info for the current scene. - scene_gt_info_path = dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id) + scene_gt_info_path = dp_split["scene_gt_info_tpath"].format(scene_id=scene_id) misc.ensure_dir(os.path.dirname(scene_gt_info_path)) inout.save_json(scene_gt_info_path, scene_gt_info) diff --git a/scripts/calc_gt_masks.py b/scripts/calc_gt_masks.py index 64547be4..cb53b613 100644 --- a/scripts/calc_gt_masks.py +++ b/scripts/calc_gt_masks.py @@ -18,7 +18,7 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "xyzibd", + "dataset": "lm", # Dataset split. Options: 'train', 'val', 'test'. "dataset_split": "test", # Dataset split type. None = default. See dataset_params.py for options. @@ -29,10 +29,6 @@ "renderer_type": "vispy", # Options: 'vispy', 'cpp', 'python'. # Folder containing the BOP datasets. "datasets_path": config.datasets_path, - # which modality to compute masks on, default to eval modality - "modality": None, - # which sensor to compute masks on, default to eval sensor - "sensor": None, } ################################################################################ @@ -41,12 +37,6 @@ dp_split = dataset_params.get_split_params( p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"] ) -if p["modality"] is None: - p["modality"] = dp_split["eval_modality"] -if p["sensor"] is None: - p["sensor"] = dp_split["eval_sensor"] - -classic_bop_format = isinstance(dp_split["im_modalities"], list) model_type = None if p["dataset"] == "tless": @@ -55,32 +45,28 @@ scene_ids = dataset_params.get_present_scene_ids(dp_split) for scene_id in scene_ids: - tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id) - # Load scene GT. - scene_camera_path = dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id) - scene_camera = inout.load_scene_camera(scene_camera_path) - scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id) + scene_gt_path = dp_split["scene_gt_tpath"].format(scene_id=scene_id) scene_gt = inout.load_scene_gt(scene_gt_path) + # Load scene camera. + scene_camera_path = dp_split["scene_camera_tpath"].format(scene_id=scene_id) + scene_camera = inout.load_scene_camera(scene_camera_path) + # Create folders for the output masks (if they do not exist yet). mask_dir_path = os.path.dirname( - dp_split[tpath_keys["mask_tpath"]].format(scene_id=scene_id, im_id=0, gt_id=0) + dp_split["mask_tpath"].format(scene_id=scene_id, im_id=0, gt_id=0) ) - misc.log(f"Saving masks in {mask_dir_path}") misc.ensure_dir(mask_dir_path) + mask_visib_dir_path = os.path.dirname( - dp_split[tpath_keys["mask_visib_tpath"]].format(scene_id=scene_id, im_id=0, gt_id=0) + dp_split["mask_visib_tpath"].format(scene_id=scene_id, im_id=0, gt_id=0) ) - misc.log(f"Saving visible masks in {mask_visib_dir_path}") misc.ensure_dir(mask_visib_dir_path) # Initialize a renderer. misc.log("Initializing renderer...") - if isinstance(dp_split["im_size"], dict): - width, height = dp_split["im_size"][p["sensor"]] - else: # classical BOP format - width, height = dp_split["im_size"] + width, height = dp_split["im_size"] ren = renderer.create_renderer( width, height, renderer_type=p["renderer_type"], mode="depth" ) @@ -106,10 +92,8 @@ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Load depth image. - depth_fpath = dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id) - if not os.path.exists(depth_fpath): - depth_fpath = depth_fpath.replace(".tif", ".png") - depth_im = inout.load_depth(depth_fpath) + depth_path = dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id) + depth_im = inout.load_depth(depth_path) depth_im *= scene_camera[im_id]["depth_scale"] # to [mm] dist_im = misc.depth_im_to_dist_im_fast(depth_im, K) @@ -131,12 +115,12 @@ ) # Save the calculated masks. - mask_path = dp_split[tpath_keys["mask_tpath"]].format( + mask_path = dp_split["mask_tpath"].format( scene_id=scene_id, im_id=im_id, gt_id=gt_id ) inout.save_im(mask_path, 255 * mask.astype(np.uint8)) - mask_visib_path = dp_split[tpath_keys["mask_visib_tpath"]].format( + mask_visib_path = dp_split["mask_visib_tpath"].format( scene_id=scene_id, im_id=im_id, gt_id=gt_id ) inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) diff --git a/scripts/calc_model_info.py b/scripts/calc_model_info.py index 76ea286b..c9a80b2f 100644 --- a/scripts/calc_model_info.py +++ b/scripts/calc_model_info.py @@ -12,7 +12,7 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "hot3d", + "dataset": "lm", # Type of input object models. "model_type": None, # Folder containing the BOP datasets. @@ -33,19 +33,19 @@ model = inout.load_ply(dp_model["model_tpath"].format(obj_id=obj_id)) # Calculate 3D bounding box. - xs, ys, zs = model["pts"][:,0], model["pts"][:,1], model["pts"][:,2] - bbox = misc.calc_3d_bbox(xs, ys, zs) + ref_pt = map(float, model["pts"].min(axis=0).flatten()) + size = map(float, (model["pts"].max(axis=0) - ref_pt).flatten()) # Calculated diameter. diameter = misc.calc_pts_diameter(model["pts"]) models_info[obj_id] = { - "min_x": bbox[0], - "min_y": bbox[1], - "min_z": bbox[2], - "size_x": bbox[3], - "size_y": bbox[4], - "size_z": bbox[5], + "min_x": ref_pt[0], + "min_y": ref_pt[1], + "min_z": ref_pt[2], + "size_x": size[0], + "size_y": size[1], + "size_z": size[2], "diameter": diameter, } diff --git a/scripts/create_coco_results_file_from_gt.py b/scripts/create_coco_results_file_from_gt.py index bb56bbf1..0014f0cf 100644 --- a/scripts/create_coco_results_file_from_gt.py +++ b/scripts/create_coco_results_file_from_gt.py @@ -57,8 +57,9 @@ ) # Load and organize the estimation targets. -target_file_path = os.path.join(dp_split["base_path"], p["targets_filename"]) -targets = inout.load_json(target_file_path) +targets = inout.load_json( + os.path.join(dp_split["base_path"], p["targets_filename"]) +) targets_org = {} for target in targets: targets_org.setdefault(target["scene_id"], {}).setdefault(target["im_id"], {}) @@ -66,7 +67,7 @@ # loop over coco annotation and select based on targets for scene_id in targets_org: - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) coco_gt_path = dp_split[tpath_keys["scene_gt_coco_tpath"]].format(scene_id=scene_id) if p["bbox_type"] == "modal": @@ -93,9 +94,10 @@ result_filename = "{}_{}-{}_coco.json".format(p["results_name"], p["dataset"], p["split"]) results_path = os.path.join(p["results_path"], result_filename) inout.save_json(results_path, results) -result_file_path = os.path.join(p["results_path"], result_filename) -check_passed, _ = inout.check_coco_results(result_file_path, ann_type="segm") +check_passed, _ = inout.check_coco_results( + os.path.join(p["results_path"], result_filename), ann_type="segm" +) if not check_passed: misc.log("Please correct the coco result format of {}".format(result_filename)) exit() -misc.log(f"Saved {results_path}") +print('Saved ', results_path) diff --git a/scripts/create_pose_results_file_from_gt.py b/scripts/create_pose_results_file_from_gt.py index 9e615103..6a25ae4a 100644 --- a/scripts/create_pose_results_file_from_gt.py +++ b/scripts/create_pose_results_file_from_gt.py @@ -3,8 +3,6 @@ """ Create POSE result files from ground truth annotation and targets file. -Simply generate estimates from using all object gt poses from the test target file, without caring about visibility. -Non visible estimates are discarded by eval pose scripts and do not impact AP/AR scores. """ import os @@ -13,7 +11,6 @@ from bop_toolkit_lib import config from bop_toolkit_lib import dataset_params from bop_toolkit_lib import inout -from bop_toolkit_lib import misc # PARAMETERS (can be overwritten by the command line arguments below). @@ -22,14 +19,16 @@ # Out perfect result file name "results_name": 'gt-results', # Predefined test targets - "targets_filename": "test_targets_bop24.json", + "targets_filename": "test_targets_bop24.json", # Folder with results to be evaluated. "results_path": config.results_path, # Folder containing the BOP datasets. "datasets_path": config.datasets_path, - "dataset": "xyzibd", + "dataset": "ycbv", "split": "test", "split_type": None, + # by default, we consider only objects that are at least 10% visible + "visib_gt_min": 0.1, "eval_mode": "localization", } ################################################################################ @@ -41,6 +40,7 @@ parser.add_argument("--dataset", default=p["dataset"]) parser.add_argument("--split", default=p["split"]) parser.add_argument("--split_type", default=p["split_type"]) +parser.add_argument("--visib_gt_min", default=p["visib_gt_min"]) parser.add_argument("--eval_mode", default=p["eval_mode"]) args = parser.parse_args() @@ -51,6 +51,7 @@ p["dataset"] = str(args.dataset) p["split"] = str(args.split) p["split_type"] = str(args.split_type) if args.split_type is not None else None +p["visib_gt_min"] = float(args.visib_gt_min) p["eval_mode"] = str(args.eval_mode) # Load dataset parameters. @@ -61,31 +62,33 @@ targets_path = os.path.join(p["datasets_path"], p["dataset"], p["targets_filename"]) targets = inout.load_json(targets_path) -# Load the estimation targets. -targets = inout.load_json( - os.path.join(dp_split["base_path"], p["targets_filename"]) -) - -# Organize the targets by scene and image. -misc.log("Organizing estimation targets...") -targets_org = {} -for target in targets: - targets_org.setdefault(target["scene_id"], {}).setdefault(target["im_id"], {}) +unique_scene_ids = set([t["scene_id"] for t in targets]) +scene_gts = {} +scene_gts_info = {} results = [] -for scene_id in targets_org: - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) - scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id) - scene_gt_info_path = dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id) - scene_gt = inout.load_scene_gt(scene_gt_path) - scene_gt_info = inout.load_scene_gt(scene_gt_info_path) - - for im_id in targets_org[scene_id]: - img_gt = scene_gt[im_id] - img_gt_info = scene_gt_info[im_id] - - for obj_gt in img_gt: +for target in targets: + scene_id, im_id = target["scene_id"], target["im_id"] + + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) + + if scene_id not in scene_gts: + scene_gts[scene_id] = inout.load_scene_gt( + dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id) + ) + scene_gts_info[scene_id] = inout.load_scene_gt( + dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id) + ) + + img_gt = scene_gts[scene_id][im_id] + img_gt_info = scene_gts_info[scene_id][im_id] + + if "obj_id" not in target: + target = inout.get_im_targets(img_gt, img_gt_info, p["visib_gt_min"], p["eval_mode"]) + + for obj_gt in img_gt: + if obj_gt["obj_id"] in target: result = { "scene_id": int(scene_id), "im_id": int(im_id), @@ -95,9 +98,9 @@ "t": obj_gt["cam_t_m2c"], "time": -1.0, } - results.append(result) + results.append(result) -result_filename = f"{p['results_name']}_{p['dataset']}-{p['split']}_pose.csv" +result_filename = "{}_{}-{}_pose.csv".format(p["results_name"], p["dataset"], p["split"]) results_path = os.path.join(p["results_path"], result_filename) inout.save_bop_results(results_path, results) -misc.log(f"Saved {results_path}") +print('Saved ', results_path) diff --git a/scripts/enumerate_test_targets.py b/scripts/enumerate_test_targets.py index 63a9dbb8..b5d6e574 100644 --- a/scripts/enumerate_test_targets.py +++ b/scripts/enumerate_test_targets.py @@ -55,20 +55,16 @@ # List of considered scenes. scene_ids_curr = dp_split["scene_ids"] - test_targets = [] for scene_id in scene_ids_curr: misc.log("Processing scene: {}".format(scene_id)) - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) - - # Load the ground-truth poses. - scene_gt = inout.load_scene_gt(dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)) + scene_gt = inout.load_scene_gt(dp_split["scene_gt_tpath"].format(scene_id=scene_id)) # Load meta info about the ground-truth poses. scene_gt_info = inout.load_scene_gt( - dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id) + dp_split["scene_gt_info_tpath"].format(scene_id=scene_id) ) # List of considered images. @@ -97,10 +93,18 @@ } ) +# test_targets_lines = [] +# for test_target in test_targets: +# test_targets_lines.append( +# '- {{scene_id: {}, im_id: {}, obj_id: {}, inst_count: {}}}'.format( +# test_target['scene_id'], test_target['im_id'], test_target['obj_id'], +# test_target['inst_count'])) + # Save the test targets, test_targets_path = os.path.join(dp_split["base_path"], p["test_targets_filename"]) +# with open(test_targets_path, 'w') as f: +# f.write('\n'.join(test_targets_lines)) -misc.log("Saving {}".format(test_targets_path)) inout.save_json(test_targets_path, test_targets) misc.log("Done.") diff --git a/scripts/eval_bop19_pose.py b/scripts/eval_bop19_pose.py index 396d6f6b..7fd1464b 100644 --- a/scripts/eval_bop19_pose.py +++ b/scripts/eval_bop19_pose.py @@ -39,6 +39,7 @@ "tyol": 15, "ycbv": 15, "hope": 15, + "industrial": 15 }, "vsd_taus": list(np.arange(0.05, 0.51, 0.05)), "vsd_normalized_by_diameter": True, diff --git a/scripts/eval_bop22_coco.py b/scripts/eval_bop22_coco.py index cd31634c..3e32e2ee 100644 --- a/scripts/eval_bop22_coco.py +++ b/scripts/eval_bop22_coco.py @@ -121,7 +121,7 @@ misc.log("Merging coco annotations and predictions...") # Merge coco scene annotations and results for i, scene_id in enumerate(targets_org): - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) scene_coco_ann_path = dp_split[tpath_keys["scene_gt_coco_tpath"]].format(scene_id=scene_id) if p["ann_type"] == "bbox" and p["bbox_type"] == "modal": diff --git a/scripts/eval_bop24_pose.py b/scripts/eval_bop24_pose.py index a5c3412d..a3cc3f03 100644 --- a/scripts/eval_bop24_pose.py +++ b/scripts/eval_bop24_pose.py @@ -28,12 +28,6 @@ "type": "mssd", "correct_th": [[th] for th in np.arange(0.05, 0.51, 0.05)], }, - { - "n_top": 0, - "type": "mssd", - "correct_th": [[th] for th in range(2,21,2)], - "threshold_unit": "mm" - }, { "n_top": 0, "type": "mspd", @@ -110,8 +104,6 @@ # Name of the result and the dataset. result_name = os.path.splitext(os.path.basename(result_filename))[0] dataset = str(result_name.split("_")[1].split("-")[0]) - if dataset == "xyzibd": - p["max_num_estimates_per_image"] = 200 # Calculate the average estimation time per image. ests = inout.load_bop_results( @@ -193,10 +185,6 @@ "--visib_gt_min={}".format(p["visib_gt_min"]), "--eval_mode=detection", ] - if "threshold_unit" in error: - calc_scores_cmd += [ - "--normalized_by_diameter=[]" - ] if p["ignore_object_visible_less_than_visib_gt_min"]: calc_scores_cmd += [ "--ignore_object_visible_less_than_visib_gt_min" @@ -274,9 +262,6 @@ f"mAP, {error['type']}, {obj_id}: {mAP_over_correct_th:.3f}" ) mAP_over_correct_ths.append(mAP_over_correct_th) - if "threshold_unit" in error: - error["type"] = error["type"] + "_" + error["threshold_unit"] - mAP_per_error_type[error["type"]] = np.mean(mAP_over_correct_ths) logger.info( f"{error['type']}, Final mAP: {mAP_per_error_type[error['type']]:.3f}" @@ -297,16 +282,6 @@ [mAP_per_error_type["mssd"], mAP_per_error_type["mspd"]] ) - # Final score for the given dataset. - final_scores["bop25_mAP"] = np.mean( - [mAP_per_error_type["mssd"]] - ) - - # Final score for the given dataset. - final_scores["bop25_mAP_mm"] = np.mean( - [mAP_per_error_type["mssd_mm"]] - ) - # Average estimation time per image. final_scores["bop24_average_time_per_image"] = average_time_per_image diff --git a/scripts/eval_calc_errors.py b/scripts/eval_calc_errors.py index da558c47..ba6f3bca 100644 --- a/scripts/eval_calc_errors.py +++ b/scripts/eval_calc_errors.py @@ -59,6 +59,7 @@ "tyol": 15, "ycbv": 15, "hope": 15, + "industrial": 15 }, "vsd_taus": list(np.arange(0.05, 0.51, 0.05)), "vsd_normalized_by_diameter": True, @@ -174,9 +175,6 @@ p["datasets_path"], dataset, split, split_type ) - if dataset == "xyzibd": - p["max_num_estimates_per_image"] = 200 - if p["error_type"] not in dp_split["supported_error_types"]: raise ValueError("""{} error is not among {} """ """supported error types: {}""".format(p["error_type"], dataset, dp_split["supported_error_types"])) @@ -249,8 +247,7 @@ # Load pose estimates. logger.info("Loading pose estimates...") - max_num_estimates_per_image = p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None - ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=max_num_estimates_per_image) + ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None) # Organize the pose estimates by scene, image and object. logger.info("Organizing pose estimates...") @@ -262,7 +259,7 @@ for scene_id, scene_targets in targets_org.items(): logger.info("Processing scene {} of {}...".format(scene_id, dataset)) - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) # Load GT poses for the current scene. scene_gt = inout.load_scene_gt( diff --git a/scripts/eval_calc_errors_gpu.py b/scripts/eval_calc_errors_gpu.py index 96594417..61c5f247 100644 --- a/scripts/eval_calc_errors_gpu.py +++ b/scripts/eval_calc_errors_gpu.py @@ -83,7 +83,6 @@ ), "num_workers": config.num_workers, # Number of parallel workers for the calculation of errors. "eval_mode": "localization", # Options: 'localization', 'detection'. - "max_num_estimates_per_image": 100, # Maximum number of estimates per image. Only used for detection tasks. } ################################################################################ @@ -176,9 +175,6 @@ p["datasets_path"], dataset, split, split_type ) - if dataset == "xyzibd": - p["max_num_estimates_per_image"] = 200 - model_type = "eval" dp_model = dataset_params.get_model_params(p["datasets_path"], dataset, model_type) @@ -229,8 +225,7 @@ # Load pose estimates. logger.info("Loading pose estimates...") - max_num_estimates_per_image = p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None - ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=max_num_estimates_per_image) + ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None) # Organize the pose estimates by scene, image and object. logger.info("Organizing pose estimates...") @@ -261,7 +256,7 @@ # for each scene, organize the estimates per object as each object est_per_object = copy.deepcopy(estimate_templates) - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) # Load camera and GT poses for the current scene. scene_camera = inout.load_scene_camera( @@ -403,11 +398,10 @@ "obj_id": obj_id, "est_id": est_id, "score": score, - "gt_visib_fracts": {}, + "gt_visib_fract": gt_visib_fract, "errors": {}, } scene_errs[key_name]["errors"][gt_id] = [errors[i]] - scene_errs[key_name]["gt_visib_fracts"][gt_id] = [gt_visib_fract] scene_errs = [v for k, v in scene_errs.items()] del est_per_object diff --git a/scripts/eval_calc_scores.py b/scripts/eval_calc_scores.py index 75269049..58e469dd 100644 --- a/scripts/eval_calc_scores.py +++ b/scripts/eval_calc_scores.py @@ -177,9 +177,6 @@ # Evaluation signature. score_sign = misc.get_score_signature(p["correct_th"][err_type], p["visib_gt_min"]) - if dataset == "xyzibd": - p["max_num_estimates_per_image"] = 200 - logger.info( "Calculating score - error: {}, method: {}, dataset: {}.".format( err_type, method, dataset @@ -220,7 +217,7 @@ for scene_id, scene_targets in targets_org.items(): logger.info("Processing scene {} of {}...".format(scene_id, dataset)) - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) # Load GT poses for the current scene. scene_gt = inout.load_scene_gt( @@ -234,7 +231,10 @@ scene_camera = inout.load_scene_camera(dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id)) # Handle change of image size location between BOP19 and BOP24 dataset formats - scene_im_widths[scene_id] = dataset_params.get_im_size(dp_split, dp_split['eval_modality'], dp_split['eval_sensor'])[0] + if "cam_model" in next(iter(scene_camera.items()))[1]: + scene_im_widths[scene_id] = scene_camera[0]["cam_model"]["image_width"] + else: + scene_im_widths[scene_id] = float(dp_split["im_size"][0]) # Keep GT poses only for the selected targets. scene_gt_curr = {} diff --git a/scripts/vis_est_poses.py b/scripts/vis_est_poses.py index 46fabade..e840eadc 100644 --- a/scripts/vis_est_poses.py +++ b/scripts/vis_est_poses.py @@ -14,27 +14,13 @@ from bop_toolkit_lib import renderer from bop_toolkit_lib import visualization -# Get the base name of the file without the .py extension -file_name = os.path.splitext(os.path.basename(__file__))[0] -logger = misc.get_logger(file_name) - -htt_available = False -try: - from bop_toolkit_lib import pose_error_htt - htt_available = True -except ImportError as e: - logger.warning("""Missing hand_tracking_toolkit dependency, - mandatory if you are running evaluation on HOT3d. - Refer to the README.md for installation instructions. - """) - # PARAMETERS. ################################################################################ p = { # Top N pose estimates (with the highest score) to be visualized for each # object in each image. - "n_top": 0, # 0 = all estimates, -1 = given by the number of GT poses. + "n_top": 1, # 0 = all estimates, -1 = given by the number of GT poses. # True = one visualization for each (im_id, obj_id), False = one per im_id. "vis_per_obj_id": True, # Indicates whether to render RGB image. @@ -45,7 +31,7 @@ # of individual objects are blended together. "vis_rgb_resolve_visib": True, # Indicates whether to render depth image. - "vis_depth_diff": True, + "vis_depth_diff": False, # If to use the original model color. "vis_orig_color": False, # Type of the renderer (used for the VSD pose error function). @@ -88,21 +74,6 @@ split = dataset_info[1] split_type = dataset_info[2] if len(dataset_info) > 2 else None - ####################### - # hot3d specific checks - if dataset == "hot3d" and not htt_available: - raise ImportError("Missing hand_tracking_toolkit dependency, mandatory for HOT3D dataset.") - - if dataset == "hot3d" and p["renderer_type"] != "htt": - raise ValueError("'htt' renderer_type is mandatory for HOT3D dataset.") - - # hot3d does not contain depth modality, some visualizations are not available - if dataset in ["hot3d"]: - p["vis_rgb"] = True - p["vis_rgb_resolve_visib"] = False - p["vis_depth_diff"] = False - ####################### - # Load dataset parameters. dp_split = dataset_params.get_split_params( p["datasets_path"], dataset, split, split_type @@ -111,6 +82,30 @@ model_type = "eval" dp_model = dataset_params.get_model_params(p["datasets_path"], dataset, model_type) + # Rendering mode. + renderer_modalities = [] + if p["vis_rgb"]: + renderer_modalities.append("rgb") + if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): + renderer_modalities.append("depth") + renderer_mode = "+".join(renderer_modalities) + + # Create a renderer. + width, height = dp_split["im_size"] + ren = renderer.create_renderer( + width, height, p["renderer_type"], mode=renderer_mode + ) + + # Load object models. + models = {} + for obj_id in dp_model["obj_ids"]: + misc.log("Loading 3D model of object {}...".format(obj_id)) + model_path = dp_model["model_tpath"].format(obj_id=obj_id) + model_color = None + if not p["vis_orig_color"]: + model_color = tuple(colors[(obj_id - 1) % len(colors)]) + ren.add_object(obj_id, model_path, surf_color=model_color) + # Load pose estimates. misc.log("Loading pose estimates...") ests = inout.load_bop_results(os.path.join(config.results_path, result_fname)) @@ -123,54 +118,25 @@ est["im_id"], {} ).setdefault(est["obj_id"], []).append(est) - # Rendering mode. - renderer_modalities = [] - if p["vis_rgb"]: - renderer_modalities.append("rgb") - if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): - renderer_modalities.append("depth") - renderer_mode = "+".join(renderer_modalities) - - width, height = None, None - ren = None - for scene_id, scene_ests in ests_org.items(): - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id) - scene_modality = dataset_params.get_scene_sensor_or_modality(dp_split["eval_modality"], scene_id) - scene_sensor = dataset_params.get_scene_sensor_or_modality(dp_split["eval_sensor"], scene_id) - - # Create a new renderer if image size has changed - scene_width, scene_height = dataset_params.get_im_size(dp_split, scene_modality, scene_sensor) - if (width, height) != (scene_width, scene_height): - width, height = scene_width, scene_height - misc.log(f"Creating renderer of type {p['renderer_type']}") - ren = renderer.create_renderer( - width, height, p["renderer_type"], mode=renderer_mode, shading="flat" - ) - # Load object models in the new renderer. - for obj_id in dp_model["obj_ids"]: - misc.log(f"Loading 3D model of object {obj_id}...") - model_path = dp_model["model_tpath"].format(obj_id=obj_id) - model_color = None - if not p["vis_orig_color"]: - model_color = tuple(colors[(obj_id - 1) % len(colors)]) - ren.add_object(obj_id, model_path, surf_color=model_color) - - # Load info and ground-truth poses for the current scene. - scene_camera = inout.load_scene_camera(dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id)) - scene_gt = inout.load_scene_gt(dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)) + scene_camera = inout.load_scene_camera( + dp_split["scene_camera_tpath"].format(scene_id=scene_id) + ) + scene_gt = inout.load_scene_gt( + dp_split["scene_gt_tpath"].format(scene_id=scene_id) + ) for im_ind, (im_id, im_ests) in enumerate(scene_ests.items()): if im_ind % 10 == 0: split_type_str = " - " + split_type if split_type is not None else "" - misc.log(f"Visualizing pose estimates - method: {method}, dataset: {dataset}{split_type_str}, scene: {scene_id}, im: {im_id}") + misc.log( + "Visualizing pose estimates - method: {}, dataset: {}{}, scene: {}, " + "im: {}".format(method, dataset, split_type_str, scene_id, im_id) + ) - # Retrieve camera intrinsics. - if dataset == 'hot3d': - cam = pose_error_htt.create_camera_model(scene_camera[im_id]) - else: - cam = scene_camera[im_id]["cam_K"] + # Intrinsic camera matrix. + K = scene_camera[im_id]["cam_K"] im_ests_vis = [] im_ests_vis_obj_ids = [] @@ -214,32 +180,24 @@ # Load the color and depth images and prepare images for rendering. rgb = None if p["vis_rgb"]: - # rgb_tpath is an alias refering to the sensor|modality image paths on which the poses are rendered - im_tpath = tpath_keys["rgb_tpath"] - # check for BOP classic (itodd) - rgb_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, 'rgb') - if im_tpath == "rgb_tpath" and not rgb_available: - im_tpath = "gray_tpath" - - rgb = inout.load_im( - dp_split[im_tpath].format(scene_id=scene_id, im_id=im_id) - ) - # if image is grayscale (e.g. quest3), convert it to 3 channels - if rgb.ndim == 2: - rgb = np.dstack([rgb, rgb, rgb]) + if "rgb" in dp_split["im_modalities"]: + rgb = inout.load_im( + dp_split["rgb_tpath"].format(scene_id=scene_id, im_id=im_id) + )[:, :, :3] + elif "gray" in dp_split["im_modalities"]: + gray = inout.load_im( + dp_split["gray_tpath"].format( + scene_id=scene_id, im_id=im_id + ) + ) + rgb = np.dstack([gray, gray, gray]) else: - rgb = rgb[:,:,:3] # should we keep this? - - depth = None - if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): - depth_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, "depth") - if not depth_available: - misc.log(f"{scene_sensor} has no depth data, skipping depth visualization") - p["vis_depth_diff"] = False - p["vis_rgb_resolve_visib"] = False - else: + raise ValueError("RGB nor gray images are available.") + + depth = None + if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): depth = inout.load_depth( - dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id) + dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id) ) depth *= scene_camera[im_id]["depth_scale"] # Convert to [mm]. @@ -274,7 +232,7 @@ # Visualization. visualization.vis_object_poses( poses=ests_vis, - K=cam, + K=K, renderer=ren, rgb=rgb, depth=depth, diff --git a/scripts/vis_gt_poses.py b/scripts/vis_gt_poses.py index 02ecc05e..ac8aa600 100644 --- a/scripts/vis_gt_poses.py +++ b/scripts/vis_gt_poses.py @@ -13,6 +13,7 @@ from bop_toolkit_lib import dataset_params from bop_toolkit_lib import inout from bop_toolkit_lib import misc +from bop_toolkit_lib import pose_error_htt from bop_toolkit_lib import renderer from bop_toolkit_lib import visualization @@ -22,20 +23,19 @@ htt_available = False try: - from bop_toolkit_lib import pose_error_htt + from bop_toolkit_lib import renderer_htt htt_available = True except ImportError as e: - logger.warning("""Missing hand_tracking_toolkit dependency, - mandatory if you are running evaluation on HOT3d. - Refer to the README.md for installation instructions. - """) - + logger.warn("""Missing hand_tracking_toolkit dependency, + mandatory if you are running evaluation on HOT3d. + Refer to the README.md for installation instructions. + """) # PARAMETERS. ################################################################################ p = { # See dataset_params.py for options. - "dataset": "xyzibd", + "dataset": "lm", # Dataset split. Options: 'train', 'val', 'test'. "dataset_split": "test", # Dataset split type. None = default. See dataset_params.py for options. @@ -50,18 +50,9 @@ "scene_ids": [], "im_ids": [], "gt_ids": [], - ######### - # Which sensor to visualize, . By default it uses the evaluation modality set - # in dataset_params.py. Set to None for rendering PBR images or BOP core datasets. - # Set to sensor for new BOP core sets, e.g. "photoneo". - ######### - # Modality used to visualize ground truth, default to eval modality. Should not be "depth". - "modality": None, - # Sensor used to visualize ground truth, default to eval sensor. - "sensor": None, # --------------------------------------------------------------------------------- - # Next parameters apply only to dataset with aligned color and depth images. + # Next parameters apply only to classical BOP19 datasets (not the H3 BOP24 format) # --------------------- # Indicates whether to render RGB images. "vis_rgb": True, @@ -77,7 +68,7 @@ # Whether to use the original model color. "vis_orig_color": True, # Type of the renderer (used for the VSD pose error function). - "renderer_type": "vispy", # Options: 'vispy', 'cpp', 'python'. 'htt' is mandatory for "hot3d" dataset. + "renderer_type": "vispy", # Options: 'vispy', 'cpp', 'python'. # Folder containing the BOP datasets. "datasets_path": config.datasets_path, # Folder for output visualisations. @@ -96,30 +87,19 @@ } ################################################################################ -####################### -# hot3d specific checks if p["dataset"] == "hot3d" and not htt_available: raise ImportError("Missing hand_tracking_toolkit dependency, mandatory for HOT3D dataset.") -if p["dataset"] == "hot3d" and p["renderer_type"] != "htt": - raise ValueError("'htt' renderer_type is mandatory for HOT3D dataset.") - -# hot3d does not contain depth modality, some visualizations are not available -if p["dataset"] in ["hot3d"]: +# if HOT3D dataset is used, next parameters are set +if p["dataset"] == "hot3d": p["vis_rgb"] = True p["vis_rgb_resolve_visib"] = False p["vis_depth_diff"] = False -####################### # Load dataset parameters. dp_split = dataset_params.get_split_params( p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"] ) -if p["modality"] is None: - p["modality"] = dp_split["eval_modality"] -assert p["modality"] != "depth", "Modality should be a color modality (not 'depth')" -if p["sensor"] is None: - p["sensor"] = dp_split["eval_sensor"] model_type = "eval" # None = default. dp_model = dataset_params.get_model_params(p["datasets_path"], p["dataset"], model_type) @@ -145,39 +125,52 @@ scene_ids_curr = set(scene_ids_curr).intersection(p["scene_ids"]) # Rendering mode. -renderer_modalities = [] -if p["vis_rgb"]: - renderer_modalities.append("rgb") -if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): - renderer_modalities.append("depth") -renderer_mode = "+".join(renderer_modalities) - - -width, height = None, None -ren = None - -for scene_id in scene_ids_curr: - tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id) - scene_modality = dataset_params.get_scene_sensor_or_modality(p["modality"], scene_id) - scene_sensor = dataset_params.get_scene_sensor_or_modality(p["sensor"], scene_id) - - # Create a new renderer if image size has changed - scene_width, scene_height = dataset_params.get_im_size(dp_split, scene_modality, scene_sensor) - if (width, height) != (scene_width, scene_height): - width, height = scene_width, scene_height - misc.log(f"Creating renderer of type {p['renderer_type']}") - ren = renderer.create_renderer( - width, height, p["renderer_type"], mode=renderer_mode, shading="flat" - ) - # Load object models in the new renderer. - for obj_id in dp_model["obj_ids"]: - misc.log(f"Loading 3D model of object {obj_id}...") - model_path = dp_model["model_tpath"].format(obj_id=obj_id) - model_color = None - if not p["vis_orig_color"]: - model_color = tuple(colors[(obj_id - 1) % len(colors)]) - ren.add_object(obj_id, model_path, surf_color=model_color) +# if classical BOP19 format define render modalities +# The H3 BOP24 format for HOT3D does not include depth images, so this is irrelevant +if not p['dataset'] == "hot3d": + renderer_modalities = [] + if p["vis_rgb"]: + renderer_modalities.append("rgb") + if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): + renderer_modalities.append("depth") + renderer_mode = "+".join(renderer_modalities) + +# Create a renderer. +# if HOT3D dataset, create separate renderers for Quest3 and Aria with different image sizes +if p["dataset"] == "hot3d": + quest3_im_size = dp_split["quest3_im_size"][dp_split["quest3_eval_modality"]] + aria_im_size = dp_split["aria_im_size"][dp_split["aria_eval_modality"]] + quest3_ren = renderer_htt.RendererHtt(quest3_im_size, p["renderer_type"], shading="flat") + aria_ren = renderer_htt.RendererHtt(aria_im_size, p["renderer_type"], shading="flat") +else: # classical BOP format + width, height = dp_split["im_size"] + ren = renderer.create_renderer( + width, height, p["renderer_type"], mode=renderer_mode, shading="flat" + ) +# Load object models. +models = {} +for obj_id in dp_model["obj_ids"]: + misc.log("Loading 3D model of object {}...".format(obj_id)) + model_path = dp_model["model_tpath"].format(obj_id=obj_id) + model_color = None + if not p["vis_orig_color"]: + model_color = tuple(colors[(obj_id - 1) % len(colors)]) + if p["dataset"] == "hot3d": + quest3_ren.add_object(obj_id, model_path, surf_color=model_color) + aria_ren.add_object(obj_id, model_path, surf_color=model_color) + else: + ren.add_object(obj_id, model_path, surf_color=model_color) + +scene_ids = dataset_params.get_present_scene_ids(dp_split) +for scene_id in scene_ids: + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) + if p["dataset"] == "hot3d": # for other dataset the renderer does not change + # find which renderer to use (quest3 or aria) + if scene_id in dp_split["test_quest3_scene_ids"] or scene_id in dp_split["train_quest3_scene_ids"]: + ren = quest3_ren + elif scene_id in dp_split["test_aria_scene_ids"] or scene_id in dp_split["train_aria_scene_ids"]: + ren = aria_ren # Load scene info and ground-truth poses. scene_camera = inout.load_scene_camera(dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id)) scene_gt = inout.load_scene_gt(dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)) @@ -198,9 +191,9 @@ ) ) - # Retrieve camera intrinsics. if p['dataset'] == 'hot3d': cam = pose_error_htt.create_camera_model(scene_camera[im_id]) + # TODO might delete if-else here else: cam = scene_camera[im_id]["cam_K"] @@ -231,60 +224,60 @@ } ) - # Load the color and depth images and prepare images for rendering. - rgb = None - if p["vis_rgb"]: - # rgb_tpath is an alias refering to the sensor|modality image paths on which the poses are rendered - im_tpath = tpath_keys["rgb_tpath"] - # check for BOP classic (itodd) - rgb_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, 'rgb') - if im_tpath == "rgb_tpath" and not rgb_available: - im_tpath = "gray_tpath" - + if p["dataset"] == "hot3d": + # load the image of the eval modality rgb = inout.load_im( - dp_split[im_tpath].format(scene_id=scene_id, im_id=im_id) + dp_split[dp_split["eval_modality"](scene_id) + "_tpath"].format(scene_id=scene_id, im_id=im_id) ) - # if image is grayscale (e.g. quest3), convert it to 3 channels + # if image is grayscale (quest3), convert it to 3 channels if rgb.ndim == 2: rgb = np.dstack([rgb, rgb, rgb]) - else: - rgb = rgb[:,:,:3] # should we keep this? + else: + # Load the color and depth images and prepare images for rendering. + rgb = None + if p["vis_rgb"]: + if "rgb" in dp_split["im_modalities"] or p["dataset_split_type"] == "pbr": + rgb = inout.load_im( + dp_split["rgb_tpath"].format(scene_id=scene_id, im_id=im_id) + )[:, :, :3] + elif "gray" in dp_split["im_modalities"]: + gray = inout.load_im( + dp_split["gray_tpath"].format(scene_id=scene_id, im_id=im_id) + ) + rgb = np.dstack([gray, gray, gray]) + else: + raise ValueError("RGB nor gray images are available.") depth = None - if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): - depth_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, "depth") - if not depth_available: - misc.log(f"{scene_sensor} has no depth data, skipping depth visualization") - p["vis_depth_diff"] = False - p["vis_rgb_resolve_visib"] = False - else: + if p["dataset"] != "hot3d": + if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): depth = inout.load_depth( - dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id) + dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id) ) depth *= scene_camera[im_id]["depth_scale"] # Convert to [mm]. # Path to the output RGB visualization. - split = "{}_{}".format(p["dataset_split"], scene_sensor) if scene_sensor else p["dataset_split"] vis_rgb_path = None if p["vis_rgb"]: vis_rgb_path = p["vis_rgb_tpath"].format( vis_path=p["vis_path"], dataset=p["dataset"], - split=split, + split=p["dataset_split"], scene_id=scene_id, im_id=im_id, ) # Path to the output depth difference visualization. vis_depth_diff_path = None - if p["vis_depth_diff"]: - vis_depth_diff_path = p["vis_depth_diff_tpath"].format( - vis_path=p["vis_path"], - dataset=p["dataset"], - split=split, - scene_id=scene_id, - im_id=im_id, - ) + if p["dataset"] != "hot3d": + if p["vis_depth_diff"]: + vis_depth_diff_path = p["vis_depth_diff_tpath"].format( + vis_path=p["vis_path"], + dataset=p["dataset"], + split=p["dataset_split"], + scene_id=scene_id, + im_id=im_id, + ) # Visualization. visualization.vis_object_poses( diff --git a/scripts/vis_object_symmetries.py b/scripts/vis_object_symmetries.py index 6065dd0d..617645e2 100644 --- a/scripts/vis_object_symmetries.py +++ b/scripts/vis_object_symmetries.py @@ -18,7 +18,7 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "xyzibd", + "dataset": "itodd", # Type of the renderer (used for the VSD pose error function). "renderer_type": "vispy", # Options: 'vispy', 'cpp', 'python'. # See misc.get_symmetry_transformations(). @@ -48,12 +48,13 @@ if p["dataset"] == "tless": model_type = "cad" dp_model = dataset_params.get_model_params(p["datasets_path"], p["dataset"], model_type) +dp_camera = dataset_params.get_camera_params(p["datasets_path"], p["dataset"]) -# Use reasonable camera intrinsics default for rendering (copied from T-LESS) -width, height = 1280, 1024 -fx, fy, cx, cy = 1075, 1073, 641, 507 +K = dp_camera["K"] +fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # Create a renderer. +width, height = dp_camera["im_size"] ren = renderer.create_renderer( width, height, p["renderer_type"], mode="rgb", shading="flat" ) diff --git a/setup.py b/setup.py index 4a943484..f91558ce 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,7 @@ from setuptools import setup, find_packages -package_name = 'bop_toolkit_lib' - setup( - name=package_name, + name="bop_toolkit_lib", version="1.0", packages=find_packages(exclude=("docs")), install_requires=["pytz", "vispy>=0.6.5", "PyOpenGL==3.1.0", "pypng", "cython"], @@ -11,9 +9,4 @@ author_email="tom.hodan@gmail.com, Martin.Sundermeyer@dlr.de", license="MIT license", package_data={"bop_toolkit_lib": ["*"]}, - data_files=[ - ('share/ament_index/resource_index/packages', - ['resource/' + package_name]), - ('share/' + package_name, ['package.xml']), - ], )