From df08ff116c7c4cb7a4db927fd5dcd4c491fe6fc3 Mon Sep 17 00:00:00 2001
From: OrestisVaggelis <orestisvaggelis@mail.com>
Date: Mon, 10 Feb 2025 17:46:22 +0200
Subject: [PATCH] Changes to run bop_toolkit on industrial

---
 bop_toolkit_lib/dataset_params.py             | 385 +++++-------------
 bop_toolkit_lib/inout.py                      |  93 ++---
 bop_toolkit_lib/misc.py                       |   2 +-
 bop_toolkit_lib/renderer_batch.py             |   2 +-
 bop_toolkit_lib/tests/eval_bop22_coco_test.py |  35 +-
 bop_toolkit_lib/tests/test_misc.py            |  48 ---
 bop_toolkit_lib/visualization.py              |   2 -
 docs/bop_datasets_format.md                   |   1 -
 requirements.txt                              |   4 +-
 scripts/calc_gt_coco.py                       |  17 +-
 scripts/calc_gt_distribution.py               |  68 +---
 scripts/calc_gt_info.py                       |  31 +-
 scripts/calc_gt_masks.py                      |  44 +-
 scripts/calc_model_info.py                    |  18 +-
 scripts/create_coco_results_file_from_gt.py   |  14 +-
 scripts/create_pose_results_file_from_gt.py   |  63 +--
 scripts/enumerate_test_targets.py             |  18 +-
 scripts/eval_bop19_pose.py                    |   1 +
 scripts/eval_bop22_coco.py                    |   2 +-
 scripts/eval_bop24_pose.py                    |  25 --
 scripts/eval_calc_errors.py                   |   9 +-
 scripts/eval_calc_errors_gpu.py               |  12 +-
 scripts/eval_calc_scores.py                   |  10 +-
 scripts/vis_est_poses.py                      | 152 +++----
 scripts/vis_gt_poses.py                       | 185 ++++-----
 scripts/vis_object_symmetries.py              |   9 +-
 setup.py                                      |   9 +-
 27 files changed, 421 insertions(+), 838 deletions(-)

diff --git a/bop_toolkit_lib/dataset_params.py b/bop_toolkit_lib/dataset_params.py
index 99a44149..3949b609 100644
--- a/bop_toolkit_lib/dataset_params.py
+++ b/bop_toolkit_lib/dataset_params.py
@@ -7,8 +7,6 @@
 import glob
 import os
 from os.path import join
-from collections.abc import Callable
-from typing import Union, Dict
 
 from bop_toolkit_lib import inout
 
@@ -47,10 +45,6 @@ def get_camera_params(datasets_path, dataset_name, cam_type=None):
             cam_type = "uw"
         cam_filename = "camera_{}.json".format(cam_type)
 
-    # hot3d does not have a single camera file, raise an exception
-    elif dataset_name in ['hot3d']:
-        raise ValueError("BOP dataset {} does not have a global camera file.".format(dataset_name))
-
     else:
         cam_filename = "camera.json"
 
@@ -95,8 +89,7 @@ def get_model_params(datasets_path, dataset_name, model_type=None):
         "hopev2": list(range(1, 29)),
         "hot3d": list(range(1, 34)),
         "handal": list(range(1, 41)),
-        "ipd": [0, 1, 4, 8, 10, 11, 14, 18, 19, 20],
-        "xyzibd": [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        "industrial": [1,2,3,4,5]
     }[dataset_name]
 
     # ID's of objects with ambiguous views evaluated using the ADI pose error
@@ -118,8 +111,7 @@ def get_model_params(datasets_path, dataset_name, model_type=None):
         "hopev2": [],
         "hot3d": [1, 2, 3, 5, 22, 24, 25, 29, 30, 32],
         "handal": [26, 35, 36, 37, 38, 39, 40],
-        "ipd": [8, 14, 18, 19, 20],
-        "xyzibd": [1, 2, 5, 8, 9, 11, 12, 16, 17]
+        "industrial": [1,3,5]
     }[dataset_name]
 
     # T-LESS includes two types of object models, CAD and reconstructed.
@@ -185,15 +177,12 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         depth_ext = ".tif"
 
     p["im_modalities"] = ["rgb", "depth"]
-    # for Classic datasets, sensor and modality used for the evaluation is implicit...
-    p["eval_sensor"] = None
+    # for Classic datasets, test modality is implicit... 
     p["eval_modality"] = None
-    # ...and only one set of annotation is present in the dataset
+    # ...and only one set of annotation is present in the dataset 
     # (e.g. scene_gt.json instead of scene_gt_rgb.json, scene_gt_gray1.json etc.)
-    sensor_modalities_have_separate_annotations = False
-    # file extensions for datasets with multiple sensor/modalities options
-    # has to be set if sensor_modalities_have_separate_annotations is True
-    exts = None
+    modalities_have_separate_annotations = False 
+    exts = None  # has to be set if modalities_have_separate_annotations is True
 
     supported_error_types = ["ad", "add", "adi", "vsd", "mssd", "mspd", "cus", "proj"]
 
@@ -203,7 +192,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (600.90, 1102.35)  # Range of camera-object distances.
+            p["depth_range"] = (600.90, 1102.35)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (0, 0.5 * math.pi)
 
@@ -213,7 +202,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (346.31, 1499.84)  # Range of camera-object distances.
+            p["depth_range"] = (346.31, 1499.84)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (0, 0.5 * math.pi)
 
@@ -249,7 +238,16 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
 
         # The following holds for Primesense, but is similar for the other sensors.
         if split == "test":
-            p["depth_range"] = (649.89, 940.04)  # Range of camera-object distances.
+            p["depth_range"] = (649.89, 940.04)
+            p["azimuth_range"] = (0, 2 * math.pi)
+            p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
+
+    elif dataset_name == "industrial":
+        p["scene_ids"] = [1,2,3,4,5,6,7,8]
+        p["im_size"] = (640, 480)
+
+        if split == "test":
+            p["depth_range"] = (300, 950)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
 
@@ -262,7 +260,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (569.88, 1995.27)  # Range of camera-object distances.
+            p["depth_range"] = (851.29, 2016.14)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-0.4363, 0.5 * math.pi)  # (-25, 90) [deg].
 
@@ -272,7 +270,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (499.57, 1246.07)  # Range of camera-object distances.
+            p["depth_range"] = (499.57, 1246.07)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
 
@@ -282,7 +280,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (594.41, 739.12)  # Range of camera-object distances.
+            p["depth_range"] = (594.41, 739.12)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
 
@@ -292,7 +290,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (509.12, 1120.41)  # Range of camera-object distances.
+            p["depth_range"] = (509.12, 1120.41)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (0, 0.5 * math.pi)
 
@@ -302,7 +300,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (454.56, 1076.29)  # Range of camera-object distances.
+            p["depth_range"] = (454.56, 1076.29)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-1.0297, 0.5 * math.pi)  # (-59, 90) [deg].
 
@@ -314,7 +312,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_modalities"] = ["gray", "depth"]
 
         if split == "test":
-            p["depth_range"] = (638.38, 775.97)  # Range of camera-object distances.
+            p["depth_range"] = (638.38, 775.97)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
 
@@ -346,7 +344,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
 
         # The following holds for Primesense, but is similar for Kinect.
         if split == "test":
-            p["depth_range"] = (438.24, 1416.97)  # Range of camera-object distances.
+            p["depth_range"] = (438.24, 1416.97)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
 
@@ -367,7 +365,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
         p["im_size"] = (640, 480)
 
         if split == "test":
-            p["depth_range"] = (612.92, 1243.59)  # Range of camera-object distances.
+            p["depth_range"] = (612.92, 1243.59)
             p["azimuth_range"] = (0, 2 * math.pi)
             p["elev_range"] = (-1.2788, 1.1291)  # (-73.27, 64.69) [deg].
 
@@ -384,7 +382,7 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
             p["depth_range"] = None  # Not calculated yet.
             p["azimuth_range"] = None  # Not calculated yet.
             p["elev_range"] = None  # Not calculated yet.
-
+    
     # HOPEV2.
     elif dataset_name == "hopev2":
         p["scene_ids"] = {
@@ -415,8 +413,8 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
 
     # HOT3D.
     elif dataset_name == "hot3d":
-        sensor_modalities_have_separate_annotations = {"aria": True, "quest3": True}
-        p["im_modalities"] = {"aria": ["rgb", "gray1", "gray2"], "quest3": ["gray1", "gray2"]}
+        modalities_have_separate_annotations = True 
+        p["im_modalities"] = ["rgb","gray1","gray2"]
         p["test_quest3_scene_ids"] = list(range(1288, 1849))
         p["test_aria_scene_ids"] = list(range(3365, 3832))
         p["train_quest3_scene_ids"] = list(range(0, 1288))
@@ -425,11 +423,8 @@ def get_split_params(datasets_path, dataset_name, split, split_type=None):
             "test": p["test_quest3_scene_ids"] + p["test_aria_scene_ids"],  # test_quest3 + test_aria
             "train": p["train_quest3_scene_ids"] + p["train_aria_scene_ids"],  # train_quest3 + train_aria
         }[split]
-
-        p["im_size"] = {
-            "aria" : {"rgb": (1408, 1408), "gray1": (640, 480), "gray2": (640, 480)},
-            "quest3" : {"gray1": (1280, 1024), "gray2": (1280, 1024)}
-        }
+        p["quest3_im_size"] = {"gray1": (1280, 1024), "gray2": (1280, 1024)}
+        p["aria_im_size"] = {"rgb": (1408, 1408), "gray1": (640, 480), "gray2": (640, 480)}
 
         p["quest3_eval_modality"] = "gray1"
         p["aria_eval_modality"] = "rgb"
@@ -441,20 +436,12 @@ def hot3d_eval_modality(scene_id):
             else:
                 raise ValueError("scene_id {} not part of hot3d valid scenes".format(scene_id))
 
-        def hot3d_eval_sensor(scene_id):
-            if scene_id in p["test_quest3_scene_ids"] or scene_id in p["train_quest3_scene_ids"]:
-                return "quest3"
-            elif scene_id in p["test_aria_scene_ids"] or scene_id in p["train_aria_scene_ids"]:
-                return "aria"
-            else:
-                raise ValueError("scene_id {} not part of hot3d valid scenes".format(scene_id))
-
         p["eval_modality"] = hot3d_eval_modality
-        p["eval_sensor"] = hot3d_eval_sensor
 
         exts = {
-            "aria" : {"rgb": ".jpg", "gray1": ".jpg", "gray2": ".jpg"},
-            "quest3": {"gray1": ".jpg", "gray2": ".jpg"}
+            "rgb": ".jpg",
+            "gray1": ".jpg",
+            "gray2": "jpg",
         }
 
         if split == "test":
@@ -463,115 +450,6 @@ def hot3d_eval_sensor(scene_id):
             p["elev_range"] = None  # Not calculated yet.
 
         supported_error_types = ["ad", "add", "adi", "mssd", "mspd"]
-    elif dataset_name == "ipd":
-            sensor_modalities_have_separate_annotations = {"photoneo": False, "cam1" : False, "cam2" : False, "cam3" : False}
-            p["im_modalities"] = {"photoneo": ["rgb", "depth"], "cam1" : ["rgb", "aolp", "dolp", "depth"],
-                                  "cam2" : ["rgb", "aolp", "dolp", "depth"], "cam3" : ["rgb", "aolp", "dolp", "depth"]}
-            p["scene_ids"] = {
-                "test": list(range(15)),
-                "train": list(range(10)),
-                "val": list(range(15)),
-            }[split]
-
-            p["im_size"] = {
-                "photoneo" : (2064, 1544),
-                "cam1" : (3840, 2160),
-                "cam2": (3840, 2160),
-                "cam3": (3840, 2160),
-                "": (2400, 2400),
-            }
-
-            p["eval_modality"] = "rgb"
-            p["eval_sensor"] = "photoneo"
-
-            exts = {
-                "photoneo": {"rgb": ".png", "depth": ".png"},
-                "cam1": {"rgb": ".png", "depth": ".png", "aolp": ".png", "dolp": ".png"},
-                "cam2": {"rgb": ".png", "depth": ".png", "aolp": ".png", "dolp": ".png"},
-                "cam3": {"rgb": ".png", "depth": ".png", "aolp": ".png", "dolp": ".png"},
-            }
-
-            if split == "test":
-                p["depth_range"] = None  # Not calculated yet.
-                p["azimuth_range"] = None  # Not calculated yet.
-                p["elev_range"] = None  # Not calculated yet.
-
-            supported_error_types = ["ad", "add", "adi", "mssd", "mspd"]
-
-    elif dataset_name == "xyzibd":
-        sensor_modalities_have_separate_annotations = {"photoneo": False, "xyz": False, "realsense": False}
-        p["im_modalities"] = {"photoneo": ["gray", "depth"], "xyz": ["gray", "depth"], "realsense": ["rgb", "depth"]}
-        val_scene_ids = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 54, 60, 65, 70]
-        p["scene_ids"] = {
-            "test": [i for i in range(1, 75) if i not in val_scene_ids],
-            "val": val_scene_ids,
-            "train": list(range(45)),
-        }[split]
-
-        p["im_size"] = {
-            "xyz": (1440, 1080),
-            "realsense": (1280, 720),
-            "photoneo": (2064, 1544),
-            "": (1440, 1080),
-        }
-
-        p["eval_modality"] = "gray"
-        p["eval_sensor"] = "xyz"
-
-        if "pbr" == split_type:
-            # The PBR data is in classical BOP format without sensor names.
-            p["eval_modality"] = None
-            p["eval_sensor"] = None
-            sensor_modalities_have_separate_annotations = False
-
-        exts = {
-            "photoneo": {"gray": ".png", "depth": ".png"},
-            "xyz": {"gray": ".png", "depth": ".png"},
-            "realsense": {"rgb": ".png", "depth": ".png"},
-        }
-
-        if split == "test":
-            p["depth_range"] = None  # Not calculated yet.
-            p["azimuth_range"] = None  # Not calculated yet.
-            p["elev_range"] = None  # Not calculated yet.
-
-        supported_error_types = ["ad", "add", "adi", "mssd", "mspd"]
-    elif dataset_name == "itoddmv":
-        sensor_modalities_have_separate_annotations = {"3d1": False, "cam0": False, "cam1": False, "cam2": False}
-        p["im_modalities"] = {"3dlong": ["gray", "depth"], "cam0": ["gray"], "cam1": ["gray"], "cam2": ["gray"]}
-        p["scene_ids"] = {
-            "test": [1],
-            "train": list(range(50)),
-        }[split]
-
-        p["im_size"] = {
-            "3dlong": (1280, 960),
-            "cam0": (4224, 2838),
-            "cam1": (4224, 2838),
-            "cam2": (4224, 2838),
-            "": (1280, 960),
-        }
-
-        p["eval_modality"] = "gray"
-        p["eval_sensor"] = "3dlong"
-
-        if "pbr" == split_type:
-            # The PBR data is in classical BOP format without sensor names.
-            p["eval_modality"] = None
-            p["eval_sensor"] = None
-            sensor_modalities_have_separate_annotations = False
-
-        exts = {
-            "3dlong": {"gray": ".tif", "depth": ".tif"},
-            "cam0": {"gray": ".tif"},
-        }
-
-        if split == "test":
-            p["depth_range"] = (638.38, 775.97)  # Range of camera-object distances.
-            p["azimuth_range"] = (0, 2 * math.pi)
-            p["elev_range"] = (-0.5 * math.pi, 0.5 * math.pi)
-
-        supported_error_types = ["ad", "add", "adi", "mssd", "mspd"]
 
     else:
         raise ValueError("Unknown BOP dataset ({}).".format(dataset_name))
@@ -579,17 +457,14 @@ def hot3d_eval_sensor(scene_id):
     base_path = join(datasets_path, dataset_name)
     split_path = join(base_path, split)
     if split_type is not None:
-        if split_type == "pbr" and dataset_name != "xyzibd":
+        if split_type == "pbr":
             p["scene_ids"] = list(range(50))
         split_path += "_" + split_type
 
     # Path to the split directory.
     p["split_path"] = split_path
     p["supported_error_types"] = supported_error_types
-
-    # For classic BOP format datasets with one gt file per folder
-    classic_bop_format = type(p["im_modalities"]) is list
-    if classic_bop_format:
+    if not modalities_have_separate_annotations:
         p.update(
             {
                 # Path template to a gray image.
@@ -636,154 +511,90 @@ def hot3d_eval_sensor(scene_id):
 
     else:
         assert exts is not None, "Need to set 'exts' for dataset {}".format()
-        # im_modalities is a dict from sensor to modalities
-        for sensor, modalities in p["im_modalities"].items():
-            for modality in modalities:
-                # If modalities have aligned extrinsics/intrinsics they are combined in one file
-                gt_file_suffix = sensor
-                # If modalities have separate extrinsics/intrinsics they are accessed by unique modalities (compatible with hot3d)
-                if sensor_modalities_have_separate_annotations[sensor]:
-                    gt_file_suffix = modality
-
-                # Path template to modality image.
-                if dataset_name == "hot3d":
-                    p[f"{modality}_{sensor}_tpath"] = join(
-                        split_path, "{scene_id:06d}", f"{modality}", "{im_id:06d}" + exts[sensor][modality]
-                    )
-                else:
-                    p[f"{modality}_{sensor}_tpath"] = join(
-                        split_path, "{scene_id:06d}", f"{modality}_{sensor}", "{im_id:06d}" + exts[sensor][modality]
-                    )
-                p.update(
-                    {
-                        # Path template to a file with per-image camera parameters.
-                        "scene_camera_{}_{}_tpath".format(modality, sensor): join(
-                            split_path, "{scene_id:06d}", "scene_camera_{}.json".format(gt_file_suffix)
-                        ),
-                        # Path template to a file with GT annotations.
-                        "scene_gt_{}_{}_tpath".format(modality, sensor): join(
-                            split_path, "{scene_id:06d}", "scene_gt_{}.json".format(gt_file_suffix)
-                        ),
-                        # Path template to a file with meta information about the GT annotations.
-                        "scene_gt_info_{}_{}_tpath".format(modality, sensor): join(
-                            split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(gt_file_suffix)
-                        ),
-                        # Path template to a file with the coco GT annotations.
-                        "scene_gt_coco_{}_{}_tpath".format(modality, sensor): join(
-                            split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(gt_file_suffix)
-                        ),
-                        # Path template to a mask of the full object silhouette.
-                        "mask_{}_{}_tpath".format(modality, sensor): join(
-                            split_path, "{scene_id:06d}", "mask_{}".format(gt_file_suffix), "{im_id:06d}_{gt_id:06d}.png"
-                        ),
-                        # Path template to a mask of the visible part of an object silhouette.
-                        "mask_visib_{}_{}_tpath".format(modality, sensor): join(
-                            split_path,
-                            "{scene_id:06d}",
-                            "mask_visib_{}".format(gt_file_suffix),
-                            "{im_id:06d}_{gt_id:06d}.png",
-                        ),
-                    }
-                )
+        for moda in p["im_modalities"]:
+            p.update(
+                {
+                    # Path template to modality image.
+                    "{}_tpath".format(moda): join(
+                        split_path, "{scene_id:06d}", moda, "{im_id:06d}" + exts[moda]
+                    ),
+                    # Path template to a file with per-image camera parameters.
+                    "scene_camera_{}_tpath".format(moda): join(
+                        split_path, "{scene_id:06d}", "scene_camera_{}.json".format(moda)
+                    ),
+                    # Path template to a file with GT annotations.
+                    "scene_gt_{}_tpath".format(moda): join(
+                        split_path, "{scene_id:06d}", "scene_gt_{}.json".format(moda)
+                    ),
+                    # Path template to a file with meta information about the GT annotations.
+                    "scene_gt_info_{}_tpath".format(moda): join(
+                        split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(moda)
+                    ),
+                    # Path template to a file with the coco GT annotations.
+                    "scene_gt_coco_{}_tpath".format(moda): join(
+                        split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(moda)
+                    ),
+                    # Path template to a mask of the full object silhouette.
+                    "mask_{}_tpath".format(moda): join(
+                        split_path, "{scene_id:06d}", "mask_{}".format(moda), "{im_id:06d}_{gt_id:06d}.png"
+                    ),
+                    # Path template to a mask of the visible part of an object silhouette.
+                    "mask_visib_{}_tpath".format(moda): join(
+                        split_path,
+                        "{scene_id:06d}",
+                        "mask_visib_{}".format(moda),
+                        "{im_id:06d}_{gt_id:06d}.png",
+                    ),
+                }
+            )
 
     return p
 
 
-def get_scene_sensor_or_modality(
-        sm: Union[None, str, Callable],
-        scene_id: Union[None, int]
-    ) -> Union[None,str]:
-    """
-    Get sensor|modality associated with a given scene.
-
-    Some datasets (hot3d) have different sensor|modality available depending on the scene.
-    Same logic for sensor or modality.
-    """
-    if sm is None or isinstance(sm, str):
-        return sm
-    elif callable(sm):
-        return sm(scene_id)
-    else:
-        raise TypeError(f"Sensor or modality {sm} should be either None, str or callable, not {type(sm)}")
-
-
-def scene_tpaths_keys(
-        modality: Union[None, str, Callable],
-        sensor: Union[None, str, Callable],
-        scene_id: Union[None, int] = None
-    ) -> Dict[str,str]:
+def scene_tpaths_keys(eval_modality, scene_id=None):
     """
     Define keys corresponding template path defined in get_split_params output.
-
+    
     Definition for scene gt, scene gt info and scene camera.
-    - Classic datasets (handal and hopev2 included): "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath", etc.
-    - hot3d and Industrial datasets: same tpath keys with modality and sensor,
-    e.g. "scene_gt_{modality}_{sensor}_tpath", "scene_gt_info_{modality}_{sensor}_tpath",
-    "scene_camera_{modality}_{sensor}_tpath", etc.
-    Modality|sensor may be the same for the whole dataset split (defined as a `str`),
-    or vary scene by scene (defined as function).
-
-    :param modality: None, str or callable
-    :param sensor: None, str or callable
-    :param scene_id: None or int, should be specified if eval modality|sensor
-                     changes from scene to scene
+    - Classic datasets: "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath"
+    - H3 datasets: with separate annotations for modalities, e.g. "scene_gt_{modality}_tpath", 
+    "scene_gt_info_{modality}_tpath", "scene_camera_{modality}_tpath", etc.
+    Modality may be the same for the whole dataset split (defined as a `str`), 
+    or vary scene by scene (defined as function or a dictionary)
+
+    :param eval_modality: None, str, callable or ditc, defines
+    :param scene_id: None or int, should be specified if eval modality 
+                     changes from scene to scen
     :return: scene tpath keys dictionary
     """
 
-    scene_sensor = get_scene_sensor_or_modality(sensor, scene_id)
-    scene_modality = get_scene_sensor_or_modality(modality, scene_id)
-
-    # 2 valid combinations:
-    # - modality and sensor are None -> BOP classic format
-    # - modality and sensor are not None -> hot3d + BOP industrial format
-    assert ((scene_modality is None and scene_sensor is None) or (scene_modality is not None and scene_sensor is not None)), f"scene_modality={scene_modality}, scene_sensor={scene_sensor}"
-
-    # "rgb_tpath" refers to the template path key of the given modality|sensor pair
     tpath_keys = [
-        "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath",
-        "scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath", "rgb_tpath"
+        "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath", 
+        "scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath"
     ]
     tpath_keys_multi = [
-        "scene_gt_{}_{}_tpath", "scene_gt_info_{}_{}_tpath", "scene_camera_{}_{}_tpath",
-        "scene_gt_coco_{}_{}_tpath", "mask_{}_{}_tpath", "mask_visib_{}_{}_tpath", "{}_{}_tpath"
+        "scene_gt_{}_tpath", "scene_gt_info_{}_tpath", "scene_camera_{}_tpath", 
+        "scene_gt_coco_{}_tpath", "mask_{}_tpath", "mask_visib_{}_tpath"
     ]
-    assert len(tpath_keys) == len(tpath_keys_multi)
 
+    assert len(tpath_keys) == len(tpath_keys_multi)
     tpath_keys_dic = {}
     for key, key_multi in zip(tpath_keys, tpath_keys_multi):
-        if scene_sensor is None:
-            # BOP-Classic filenames
+        if eval_modality is None:
+            # Classic filenames
             tpath_keys_dic[key] = key
+        elif isinstance(eval_modality, str):
+            tpath_keys_dic[key] = key_multi.format(eval_modality)
+        elif callable(eval_modality) and scene_id is not None:
+            tpath_keys_dic[key] = key_multi.format(eval_modality(scene_id))
+        elif isinstance(eval_modality, dict) and scene_id is not None:
+            tpath_keys_dic[key] = key_multi.format(eval_modality[scene_id])
         else:
-            tpath_keys_dic[key] = key_multi.format(scene_modality, scene_sensor)
-
-    tpath_keys_dic["depth_tpath"] = tpath_keys_dic["rgb_tpath"].replace("rgb","depth").replace("gray","depth")
+            raise ValueError("eval_modality type not supported, either None, str, callable or dictionary")
+    
     return tpath_keys_dic
 
 
-def sensor_has_modality(dp_split: Dict, sensor: str, modality: str):
-    if isinstance(dp_split["im_modalities"], list):
-        return modality in dp_split["im_modalities"]
-    else:
-        return modality in dp_split["im_modalities"][sensor]
-
-
-def get_im_size(dp_split: Dict, modality: str, sensor: str):
-    """
-    Conveniance function to retrieve the image size of a modality|sensor pair.
-    """
-    if isinstance(dp_split["im_size"], dict):
-        if isinstance(dp_split["im_size"][sensor], dict):
-            # hot3d
-            return dp_split["im_size"][sensor][modality]
-        else:
-            # BOP Industrial
-            return dp_split["im_size"][sensor]
-    # BOP Classic: one image size for the whole dataset
-    else:
-        return dp_split["im_size"]
-
-
 def get_present_scene_ids(dp_split):
     """Returns ID's of scenes present in the specified dataset split.
 
diff --git a/bop_toolkit_lib/inout.py b/bop_toolkit_lib/inout.py
index 7567624e..e65cb98a 100644
--- a/bop_toolkit_lib/inout.py
+++ b/bop_toolkit_lib/inout.py
@@ -4,7 +4,6 @@
 """I/O functions."""
 
 import os
-import gzip
 import struct
 import numpy as np
 import imageio
@@ -69,65 +68,51 @@ def save_depth(path, im):
 def load_json(path, keys_to_int=False):
     """Loads content of a JSON file.
 
-    :param path: Path to the JSON file. If ".json.gz" extension, opens with gzip.
+    :param path: Path to the JSON file.
     :return: Content of the loaded JSON file.
     """
 
     # Keys to integers.
     def convert_keys_to_int(x):
         return {int(k) if k.lstrip("-").isdigit() else k: v for k, v in x.items()}
-    
-    # Open+decompress with gzip if ".json.gz" file extension
-    if path.endswith('.json.gz'):
-        f = gzip.open(path, "rt", encoding="utf8")
-    else:
-        f = open(path, "r")
-    if keys_to_int:
-        content = json.load(f, object_hook=lambda x: convert_keys_to_int(x))
-    else:
-        content = json.load(f)
 
-    f.close()
+    with open(path, "r") as f:
+        if keys_to_int:
+            content = json.load(f, object_hook=lambda x: convert_keys_to_int(x))
+        else:
+            content = json.load(f)
 
     return content
 
 
-def save_json(path, content, compress=False):
+def save_json(path, content):
     """Saves the provided content to a JSON file.
 
     :param path: Path to the output JSON file.
     :param content: Dictionary/list to save.
-    :param compress: Saves as a gzip archive, appends ".gz" extension to filepath.
     """
-    if compress:
-        path += ".gz"
-        f = gzip.open(path, "wt", encoding="utf8")
-    else:
-        f = open(path, "w")
-
-    if isinstance(content, dict):
-        f.write("{\n")
-        content_sorted = sorted(content.items(), key=lambda x: x[0])
-        for elem_id, (k, v) in enumerate(content_sorted):
-            f.write('  "{}": {}'.format(k, json.dumps(v, sort_keys=True)))
-            if elem_id != len(content) - 1:
-                f.write(",")
-            f.write("\n")
-        f.write("}")
-
-    elif isinstance(content, list):
-        f.write("[\n")
-        for elem_id, elem in enumerate(content):
-            f.write("  {}".format(json.dumps(elem, sort_keys=True)))
-            if elem_id != len(content) - 1:
-                f.write(",")
-            f.write("\n")
-        f.write("]")
+    with open(path, "w") as f:
+        if isinstance(content, dict):
+            f.write("{\n")
+            content_sorted = sorted(content.items(), key=lambda x: x[0])
+            for elem_id, (k, v) in enumerate(content_sorted):
+                f.write('  "{}": {}'.format(k, json.dumps(v, sort_keys=True)))
+                if elem_id != len(content) - 1:
+                    f.write(",")
+                f.write("\n")
+            f.write("}")
+
+        elif isinstance(content, list):
+            f.write("[\n")
+            for elem_id, elem in enumerate(content):
+                f.write("  {}".format(json.dumps(elem, sort_keys=True)))
+                if elem_id != len(content) - 1:
+                    f.write(",")
+                f.write("\n")
+            f.write("]")
 
-    else:
-        json.dump(content, f, sort_keys=True)
-
-    f.close()
+        else:
+            json.dump(content, f, sort_keys=True)
 
 
 def load_cam_params(path):
@@ -434,7 +419,7 @@ def check_bop_results(path, version="bop19"):
 def check_coco_results(path, version="bop22", ann_type="segm", enforce_no_segm_if_bbox=False):
     """Checks if the format of extended COCO results is correct.
 
-    :param path: Path to a file with coco estimates. If ".json.gz" extension, opens with gzip.
+    :param path: Path to a file with coco estimates.
     :param version: Version of the results.
     :param ann_type: type of annotation expected in the file.
         "bbox" -> bounding boxes
@@ -485,7 +470,7 @@ def check_coco_results(path, version="bop22", ann_type="segm", enforce_no_segm_i
     return check_passed, check_msg
 
 
-def save_coco_results(path, results, version="bop22", compress=False):
+def save_coco_results(path, results, version="bop22"):
     """Saves detections/instance segmentations for each scene in coco format.
 
     "bbox" should be [x,y,w,h] in pixels
@@ -496,6 +481,7 @@ def save_coco_results(path, results, version="bop22", compress=False):
     :param version: Version of the results.
     """
 
+    # See docs/bop_challenge_2022.md for details.
     if version == "bop22":
         coco_results = []
         for res in results:
@@ -512,7 +498,7 @@ def save_coco_results(path, results, version="bop22", compress=False):
                     "time": res["run_time"] if "run_time" in res else -1,
                 }
             )
-        save_json(path, coco_results, compress)
+        save_json(path, coco_results)
     else:
         raise ValueError("Unknown version of BOP detection results.")
 
@@ -621,7 +607,6 @@ def load_ply(path):
         "float": ("f", 4),
         "double": ("d", 8),
         "int": ("i", 4),
-        "uint": ("I", 4),
         "uchar": ("B", 1),
     }
 
@@ -858,19 +843,7 @@ def save_ply2(
 
 
 def get_im_targets(im_gt, im_gt_info, visib_gt_min, eval_mode="localization"):
-    """
-    From an image gt and gt info, given a minimum visibility, get valid object evaluation targets.
-
-    Output format: dict[obj_id]
-    {
-        <obj_id1>: {'inst_count': <inst_count_1>},
-        <obj_id2>: {'inst_count': <inst_count_2>},
-        ...
-    }
-    """
     im_targets = {}
-    # Objects gt detection are have gt and gt_info have same order.
-    # object id is retrieved from gt and visibility from gt info.
     for gt_id, gt in enumerate(im_gt):
         gt_info = im_gt_info[gt_id]
         obj_id = gt["obj_id"]
@@ -883,4 +856,4 @@ def get_im_targets(im_gt, im_gt_info, visib_gt_min, eval_mode="localization"):
         if obj_id not in im_targets:
             im_targets[obj_id] = {"inst_count": 0}
         im_targets[obj_id]["inst_count"] += 1
-    return im_targets
+    return im_targets
\ No newline at end of file
diff --git a/bop_toolkit_lib/misc.py b/bop_toolkit_lib/misc.py
index 97e1d54a..24b1f5f6 100644
--- a/bop_toolkit_lib/misc.py
+++ b/bop_toolkit_lib/misc.py
@@ -131,6 +131,7 @@ def precompute_lazy(depth_im, K):
         :return: hxw ndarray (Xs/depth_im, Ys/depth_im)
         """
         if depth_im.shape != Precomputer.depth_im_shape:
+            Precomputer.depth_im_shape = depth_im.shape
             Precomputer.xs, Precomputer.ys = np.meshgrid(
                 np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0])
             )
@@ -142,7 +143,6 @@ def precompute_lazy(depth_im, K):
             Precomputer.pre_Xs = (Precomputer.xs - K[0, 2]) / np.float64(K[0, 0])
             Precomputer.pre_Ys = (Precomputer.ys - K[1, 2]) / np.float64(K[1, 1])
 
-        Precomputer.depth_im_shape = depth_im.shape
         return Precomputer.pre_Xs, Precomputer.pre_Ys
 
 
diff --git a/bop_toolkit_lib/renderer_batch.py b/bop_toolkit_lib/renderer_batch.py
index dab36e72..ed87e83b 100644
--- a/bop_toolkit_lib/renderer_batch.py
+++ b/bop_toolkit_lib/renderer_batch.py
@@ -96,7 +96,7 @@ def run_vsd(self, all_im_errs):
         for worker_id in range(num_workers_used):
             cmd = [
                 "python",
-                "bop_toolkit_lib/call_vsd_worker.py",
+                "external/bop_toolkit/bop_toolkit_lib/call_vsd_worker.py",
                 f"--input_dir={self.tmp_dir}",
                 f"--worker_id={worker_id}",
             ]
diff --git a/bop_toolkit_lib/tests/eval_bop22_coco_test.py b/bop_toolkit_lib/tests/eval_bop22_coco_test.py
index b552a31e..172c3b26 100644
--- a/bop_toolkit_lib/tests/eval_bop22_coco_test.py
+++ b/bop_toolkit_lib/tests/eval_bop22_coco_test.py
@@ -5,20 +5,20 @@
 from tqdm import tqdm
 from bop_toolkit_lib import inout
 
+# 
 EPS_AP = 0.001
 
-# Define path to directories
-RESULT_PATH = "./bop_toolkit_lib/tests/data/"
-EVAL_PATH = "./bop_toolkit_lib/tests/eval/"
-LOGS_PATH = "./bop_toolkit_lib/tests/logs"
-os.makedirs(EVAL_PATH, exist_ok=True)
-os.makedirs(LOGS_PATH, exist_ok=True)
+# Define the input directory
+INPUT_DIR = "./bop_toolkit_lib/tests/data/"
+
+# Define the output directory
+OUTPUT_DIR = "./bop_toolkit_lib/tests/logs"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
 
 # Define the dataset dictionary
-# tuples: (submission name, annotation type, compressed)
 FILE_DICTIONARY = {
-    "ycbv_zebra_segm": ("zebraposesat-effnetb4_ycbv-test_5ed0eecc-96f8-498b-9438-d586d4d92528", "segm", False),
-    "ycbv_gdrnppdet_bbox": ("gdrnppdet-pbrreal_ycbv-test_abe6c5f1-cb26-4bbd-addc-bb76dd722a96", "bbox", True),
+    "ycbv_zebra_segm": ("zebraposesat-effnetb4_ycbv-test_5ed0eecc-96f8-498b-9438-d586d4d92528", "segm"),
+    "ycbv_gdrnppdet_bbox": ("gdrnppdet-pbrreal_ycbv-test_abe6c5f1-cb26-4bbd-addc-bb76dd722a96", "bbox"),
 }
 
 # From BOP website
@@ -57,24 +57,22 @@
 }
 
 # Loop through each entry in the dictionary and execute the command
-for dataset_method_name, (sub_name, ann_type, compressed) in tqdm(
+for dataset_method_name, (sub_name, ann_type) in tqdm(
     FILE_DICTIONARY.items(), desc="Executing..."
 ):
-    ext = ".json.gz" if compressed else ".json"
-    result_filename = sub_name + ext
     command = [
         "python",
         "scripts/eval_bop22_coco.py",
-        "--results_path", RESULT_PATH,
-        "--eval_path", EVAL_PATH,
-        "--result_filenames", result_filename,
+        "--results_path", INPUT_DIR,
+        "--eval_path", INPUT_DIR,
+        "--result_filenames", sub_name+".json",
         "--bbox_type", "amodal",
         "--ann_type", ann_type
     ]
     command_ = " ".join(command)
     print(f"Executing: {command_}")
     start_time = time.time()
-    log_file_path = f"{LOGS_PATH}/eval_bop22_coco_test_{dataset_method_name}.txt"
+    log_file_path = f"{OUTPUT_DIR}/eval_bop22_coco_test_{dataset_method_name}.txt"
     with open(log_file_path, "a") as output_file:
         subprocess.run(command, stdout=output_file, stderr=subprocess.STDOUT)
     end_time = time.time()
@@ -84,10 +82,11 @@
 
 
 # Check scores for each dataset
-for sub_short_name, (sub_name, ann_type, compressed) in tqdm(FILE_DICTIONARY.items(), desc="Verifying..."):
+for sub_short_name, (sub_name, ann_type) in tqdm(FILE_DICTIONARY.items(), desc="Verifying..."):
     if sub_short_name in EXPECTED_OUTPUT:
+        ann_type = FILE_DICTIONARY[sub_short_name][1]
         eval_filename = f"scores_bop22_coco_{ann_type}.json"
-        eval_file_path = os.path.join(RESULT_PATH, sub_name, eval_filename)
+        eval_file_path = os.path.join(INPUT_DIR, sub_name, eval_filename)
         eval_scores = inout.load_json(eval_file_path)
         for key, expected_score in EXPECTED_OUTPUT[sub_short_name].items():
             eval_score = eval_scores.get(key)
diff --git a/bop_toolkit_lib/tests/test_misc.py b/bop_toolkit_lib/tests/test_misc.py
index fbaa2af1..da9cda11 100644
--- a/bop_toolkit_lib/tests/test_misc.py
+++ b/bop_toolkit_lib/tests/test_misc.py
@@ -91,54 +91,6 @@ def test_project_pts(self):
             proj_htt[i] = pose_error_htt.project_pts_htt(self.pts, camera, R_np[i], t_np[i])
         self.assertTrue(np.allclose(proj_htt, proj_np, atol=1e-4))
 
-    def test_precomputer(self):
-
-        # precomputer static class start with None attributes
-        self.assertTrue(misc.Precomputer.xs is None)
-        self.assertTrue(misc.Precomputer.ys is None)
-        self.assertTrue(misc.Precomputer.pre_Xs is None)
-        self.assertTrue(misc.Precomputer.pre_Ys is None)
-        self.assertTrue(misc.Precomputer.K is None)
-
-        Ka = np.eye(3)
-        depth_ima = np.ones((10,10))
-
-        pre_Xs1, pre_Ys1 = misc.Precomputer.precompute_lazy(depth_ima, Ka)
-        self.assertEqual(depth_ima.shape, pre_Xs1.shape)
-        self.assertEqual(depth_ima.shape, pre_Ys1.shape)
-
-        # same inputs should return the same internal objects
-        pre_Xs1_bis, pre_Ys1_bis = misc.Precomputer.precompute_lazy(depth_ima, Ka)
-        self.assertEqual(id(pre_Xs1), id(pre_Xs1_bis))
-        self.assertEqual(id(pre_Ys1), id(pre_Ys1_bis))
-        self.assertTrue(np.allclose(pre_Xs1, pre_Xs1_bis, atol=1e-9))
-        self.assertTrue(np.allclose(pre_Ys1, pre_Ys1_bis, atol=1e-9))
-
-        # different intrinsics should trigger recomputation
-        Kb = 2*np.eye(3)
-        pre_Xs2, pre_Ys2 = misc.Precomputer.precompute_lazy(depth_ima, Kb)
-        self.assertNotEqual(id(pre_Xs1), id(pre_Xs2))
-        self.assertNotEqual(id(pre_Ys1), id(pre_Ys2))
-        self.assertFalse(np.allclose(pre_Xs1, pre_Xs2, atol=1e-9))
-        self.assertFalse(np.allclose(pre_Ys1, pre_Ys2, atol=1e-9))
-
-        # different depth image should trigger recomputation 
-        depth_imb = np.ones((20,20))
-        pre_Xs3, pre_Ys3 = misc.Precomputer.precompute_lazy(depth_imb, Kb)
-        self.assertNotEqual(id(pre_Xs2), id(pre_Xs3))
-        self.assertNotEqual(id(pre_Ys2), id(pre_Ys3))
-        self.assertNotEqual(pre_Xs2.shape, pre_Xs3.shape)
-        self.assertNotEqual(pre_Ys2.shape, pre_Ys3.shape)
-
-        # different intrinsics and depth image should trigger recomputation 
-        Kc = 3*np.eye(3)
-        depth_imc = np.ones((30,30))
-        pre_Xs4, pre_Ys4 = misc.Precomputer.precompute_lazy(depth_imc, Kc)
-        self.assertNotEqual(id(pre_Xs3), id(pre_Xs4))
-        self.assertNotEqual(id(pre_Ys3), id(pre_Ys4))
-        self.assertNotEqual(pre_Xs3.shape, pre_Xs4.shape)
-        self.assertNotEqual(pre_Ys3.shape, pre_Ys4.shape)
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/bop_toolkit_lib/visualization.py b/bop_toolkit_lib/visualization.py
index 3ffbedbf..e322b843 100644
--- a/bop_toolkit_lib/visualization.py
+++ b/bop_toolkit_lib/visualization.py
@@ -268,8 +268,6 @@ def vis_object_poses(
             {"name": "min diff", "fmt": ":.3f", "val": np.min(depth_diff_valid)},
             {"name": "max diff", "fmt": ":.3f", "val": np.max(depth_diff_valid)},
             {"name": "mean diff", "fmt": ":.3f", "val": np.mean(depth_diff_valid)},
-            {"name": "median diff", "fmt": ":.3f", "val": np.median(np.abs(depth_diff_valid))},
-            {"name": "25 percentile", "fmt": ":.3f", "val": np.percentile(np.abs(depth_diff_valid), 25)},
         ]
         depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
         inout.save_im(vis_depth_diff_path, depth_diff_vis)
diff --git a/docs/bop_datasets_format.md b/docs/bop_datasets_format.md
index 5af2fda1..7ffe56b3 100644
--- a/docs/bop_datasets_format.md
+++ b/docs/bop_datasets_format.md
@@ -22,7 +22,6 @@ DATASET_NAME
 │  │  ├─ scene_camera.json
 │  │  ├─ scene_gt.json
 │  │  ├─ scene_gt_info.json
-│  │  ├─ scene_gt_coco.json
 │  │  ├─ depth
 │  │  ├─ mask
 │  │  ├─ mask_visib
diff --git a/requirements.txt b/requirements.txt
index 0f82ac91..f94fbf91 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,12 +3,12 @@ kiwisolver==1.3.1
 matplotlib==2.2.4
 imageio==2.5.0
 pypng==0.0.19
-Cython>=0.29.24
+Cython==0.29.24
 PyOpenGL==3.1.0
 triangle>=20190115.2
 glumpy==1.1.0
 opencv-python>=4.3.0.36
-Pillow>=8.2.0,<=9.5.0
+Pillow>=8.2.0
 git+https://github.com/MartinSmeyer/cocoapi.git@v1.0#subdirectory=PythonAPI
 vispy>=0.6.5
 webdataset>=0.1.62
diff --git a/scripts/calc_gt_coco.py b/scripts/calc_gt_coco.py
index 5a1fbec8..1667a6f2 100644
--- a/scripts/calc_gt_coco.py
+++ b/scripts/calc_gt_coco.py
@@ -19,9 +19,9 @@
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "xyzibd",
+    "dataset": "tudl",
     # Dataset split. Options: 'train', 'test'.
-    "dataset_split": "test",
+    "dataset_split": "train",
     # Dataset split type. Options: 'synt', 'real', None = default. See dataset_params.py for options.
     "dataset_split_type": None,
     # bbox type. Options: 'modal', 'amodal'.
@@ -64,13 +64,11 @@
     "version": "0.1.0",
     "year": datetime.date.today().year,
     "contributor": "",
-    "date_created": datetime.datetime.now(datetime.timezone.utc).isoformat(" "),
+    "date_created": datetime.datetime.utcnow().isoformat(" "),
 }
 
 for scene_id in dp_split["scene_ids"]:
-    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
-    scene_modality = dataset_params.get_scene_sensor_or_modality(dp_split["eval_modality"], scene_id)
-    scene_sensor = dataset_params.get_scene_sensor_or_modality(dp_split["eval_sensor"], scene_id)
+    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
 
     segmentation_id = 1
 
@@ -104,9 +102,12 @@
     for scene_view, inst_list in scene_gt.items():
         im_id = int(scene_view)
 
-        img_path = dp_split[tpath_keys["rgb_tpath"]].format(scene_id=scene_id, im_id=im_id)
+        img_path = dp_split["rgb_tpath"].format(scene_id=scene_id, im_id=im_id)
         relative_img_path = os.path.relpath(img_path, os.path.dirname(coco_gt_path))
-        im_size = dataset_params.get_im_size(dp_split, scene_modality, scene_sensor)
+        if 'cam_model' in scene_camera[im_id]:
+            im_size = scene_camera[im_id]["cam_model"]["image_width"], scene_camera[im_id]["cam_model"]["image_height"]
+        else:
+            im_size = dp_split["im_size"]
         image_info = pycoco_utils.create_image_info(
             im_id, relative_img_path, im_size
         )
diff --git a/scripts/calc_gt_distribution.py b/scripts/calc_gt_distribution.py
index 13ff9841..c4d53782 100644
--- a/scripts/calc_gt_distribution.py
+++ b/scripts/calc_gt_distribution.py
@@ -2,7 +2,6 @@
 # Center for Machine Perception, Czech Technical University in Prague
 
 """Calculates distribution of GT poses."""
-import os
 import math
 import numpy as np
 import matplotlib.pyplot as plt
@@ -17,23 +16,13 @@
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "ycbv",
+    "dataset": "lm",
     # Dataset split. Options: 'train', 'val', 'test'.
     "dataset_split": "test",
     # Dataset split type. None = default. See dataset_params.py for options.
     "dataset_split_type": None,
     # Folder containing the BOP datasets.
     "datasets_path": config.datasets_path,
-    # Modality used to compute gt statistics, defaults to eval modality
-    "modality": None,
-    # Sensor used to compute gt statistics, defaults to eval sensor
-    "sensor": None,
-    # Folder for output visualisations.
-    "vis_path": os.path.join(config.output_path, "gt_distribution"),
-    # Save plots in "vis_path"
-    "save_plots": True,
-    # Show plots"
-    "show_plots": True,
 }
 ################################################################################
 
@@ -43,34 +32,30 @@
     p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"]
 )
 
-if p["modality"] is None:
-    p["modality"] = dp_split["eval_modality"]
-if p["sensor"] is None:
-    p["sensor"] = dp_split["eval_sensor"]
-
 scene_ids = dp_split["scene_ids"]
 dists = []
 azimuths = []
 elevs = []
 visib_fracts = []
 ims_count = 0
-
 for scene_id in scene_ids:
-    tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id)
-
-    misc.log(f"Processing - dataset: {p['dataset']} ({p['dataset_split']}, {p['dataset_split_type']}), scene: {scene_id}")
+    misc.log(
+        "Processing - dataset: {} ({}, {}), scene: {}".format(
+            p["dataset"], p["dataset_split"], p["dataset_split_type"], scene_id
+        )
+    )
 
     # Load GT poses.
-    scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)
-    scene_gt = inout.load_scene_gt(scene_gt_path)
-    
+    scene_gt = inout.load_scene_gt(dp_split["scene_gt_tpath"].format(scene_id=scene_id))
+
     # Load info about the GT poses.
-    scene_gt_info_path = dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id)
-    scene_gt_info = inout.load_json(scene_gt_info_path, keys_to_int=True)
+    scene_gt_info = inout.load_json(
+        dp_split["scene_gt_info_tpath"].format(scene_id=scene_id), keys_to_int=True
+    )
 
     ims_count += len(scene_gt)
 
-    for im_id in scene_gt:
+    for im_id in scene_gt.keys():
         for gt_id, im_gt in enumerate(scene_gt[im_id]):
             # Object distance.
             dist = np.linalg.norm(im_gt["cam_t_m2c"])
@@ -102,10 +87,6 @@
 )
 misc.log("Number of images: " + str(ims_count))
 
-if ims_count == 0:
-    misc.log("No ground truth found.")
-    exit()
-
 misc.log("Min dist: {}".format(np.min(dists)))
 misc.log("Max dist: {}".format(np.max(dists)))
 misc.log("Mean dist: {}".format(np.mean(dists)))
@@ -122,44 +103,21 @@
 misc.log("Max visib fract: {}".format(np.max(visib_fracts)))
 misc.log("Mean visib fract: {}".format(np.mean(visib_fracts)))
 
-prefix = f"{p['modality']}_{p['sensor']}_" if isinstance(p["modality"], str) else ""
 # Visualize distributions.
-if p["save_plots"]:
-    save_dir = os.path.join(p["vis_path"], p["dataset"])
-    misc.log(f"Saving plots in {save_dir}")
-    misc.ensure_dir(save_dir)
-
 plt.figure()
 plt.hist(dists, bins=100)
 plt.title("Object distance")
-if p["save_plots"]:
-    path = os.path.join(save_dir, f"{prefix}object_distance.png")
-    misc.log(f"Saving {path}")
-    plt.savefig(path)
 
 plt.figure()
 plt.hist(azimuths, bins=100)
 plt.title("Azimuth")
-if p["save_plots"]:
-    path = os.path.join(save_dir, f"{prefix}azimuth.png")
-    misc.log(f"Saving {path}")
-    plt.savefig(path)
 
 plt.figure()
 plt.hist(elevs, bins=100)
 plt.title("Elevation")
-if p["save_plots"]:
-    path = os.path.join(save_dir, f"{prefix}elevation.png")
-    misc.log(f"Saving {path}")
-    plt.savefig(path)
 
 plt.figure()
 plt.hist(visib_fracts, bins=100)
 plt.title("Visibility fraction")
-if p["save_plots"]:
-    path = os.path.join(save_dir, f"{prefix}visibility_fraction.png")
-    misc.log(f"Saving {path}")
-    plt.savefig(path)
 
-if p["show_plots"]:
-    plt.show()
+plt.show()
diff --git a/scripts/calc_gt_info.py b/scripts/calc_gt_info.py
index db3e0f2b..23b65db2 100644
--- a/scripts/calc_gt_info.py
+++ b/scripts/calc_gt_info.py
@@ -24,7 +24,7 @@
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "xyzibd",
+    "dataset": "lm",
     # Dataset split. Options: 'train', 'val', 'test'.
     "dataset_split": "test",
     # Dataset split type. None = default. See dataset_params.py for options.
@@ -37,10 +37,6 @@
     "renderer_type": "vispy",  # Options: 'vispy', 'cpp', 'python'.
     # Folder containing the BOP datasets.
     "datasets_path": config.datasets_path,
-    # which modality to compute masks on, default to eval modality
-    "modality": "rgb",
-    # which sensor to compute masks on, default to eval sensor
-    "sensor": "realsense",
     # Path template for output images with object masks.
     "vis_mask_visib_tpath": os.path.join(
         config.output_path,
@@ -62,10 +58,6 @@
 dp_split = dataset_params.get_split_params(
     p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"]
 )
-if p["modality"] is None:
-    p["modality"] = dp_split["eval_modality"]
-if p["sensor"] is None:
-    p["sensor"] = dp_split["eval_sensor"]
 
 model_type = None
 if p["dataset"] == "tless":
@@ -76,10 +68,7 @@
 misc.log("Initializing renderer...")
 
 # The renderer has a larger canvas for generation of masks of truncated objects.
-if isinstance(dp_split["im_size"], dict):  
-    im_width, im_height = dp_split["im_size"][p["sensor"]]
-else: # classical BOP format
-    im_width, im_height = dp_split["im_size"]
+im_width, im_height = dp_split["im_size"]
 ren_width, ren_height = 3 * im_width, 3 * im_height
 ren_cx_offset, ren_cy_offset = im_width, im_height
 ren = renderer.create_renderer(ren_width, ren_height, p["renderer_type"], mode="depth")
@@ -90,13 +79,11 @@
 
 scene_ids = dataset_params.get_present_scene_ids(dp_split)
 for scene_id in scene_ids:
-    tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id)
-
-    # Load scene GT.
-    scene_camera_path = dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id)
-    scene_camera = inout.load_scene_camera(scene_camera_path)
-    scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)
-    scene_gt = inout.load_scene_gt(scene_gt_path)
+    # Load scene info and ground-truth poses.
+    scene_camera = inout.load_scene_camera(
+        dp_split["scene_camera_tpath"].format(scene_id=scene_id)
+    )
+    scene_gt = inout.load_scene_gt(dp_split["scene_gt_tpath"].format(scene_id=scene_id))
 
     scene_gt_info = {}
     im_ids = sorted(scene_gt.keys())
@@ -113,7 +100,7 @@
             )
 
         # Load depth image.
-        depth_fpath = dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id)
+        depth_fpath = dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id)
         if not os.path.exists(depth_fpath):
             depth_fpath = depth_fpath.replace(".tif", ".png")
         depth = inout.load_depth(depth_fpath)
@@ -221,6 +208,6 @@
                 inout.save_im(vis_path, vis)
 
     # Save the info for the current scene.
-    scene_gt_info_path = dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id)
+    scene_gt_info_path = dp_split["scene_gt_info_tpath"].format(scene_id=scene_id)
     misc.ensure_dir(os.path.dirname(scene_gt_info_path))
     inout.save_json(scene_gt_info_path, scene_gt_info)
diff --git a/scripts/calc_gt_masks.py b/scripts/calc_gt_masks.py
index 64547be4..cb53b613 100644
--- a/scripts/calc_gt_masks.py
+++ b/scripts/calc_gt_masks.py
@@ -18,7 +18,7 @@
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "xyzibd",
+    "dataset": "lm",
     # Dataset split. Options: 'train', 'val', 'test'.
     "dataset_split": "test",
     # Dataset split type. None = default. See dataset_params.py for options.
@@ -29,10 +29,6 @@
     "renderer_type": "vispy",  # Options: 'vispy', 'cpp', 'python'.
     # Folder containing the BOP datasets.
     "datasets_path": config.datasets_path,
-    # which modality to compute masks on, default to eval modality
-    "modality": None,
-    # which sensor to compute masks on, default to eval sensor
-    "sensor": None,
 }
 ################################################################################
 
@@ -41,12 +37,6 @@
 dp_split = dataset_params.get_split_params(
     p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"]
 )
-if p["modality"] is None:
-    p["modality"] = dp_split["eval_modality"]
-if p["sensor"] is None:
-    p["sensor"] = dp_split["eval_sensor"]
-
-classic_bop_format = isinstance(dp_split["im_modalities"], list)
 
 model_type = None
 if p["dataset"] == "tless":
@@ -55,32 +45,28 @@
 
 scene_ids = dataset_params.get_present_scene_ids(dp_split)
 for scene_id in scene_ids:
-    tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id)
-
     # Load scene GT.
-    scene_camera_path = dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id)
-    scene_camera = inout.load_scene_camera(scene_camera_path)
-    scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)
+    scene_gt_path = dp_split["scene_gt_tpath"].format(scene_id=scene_id)
     scene_gt = inout.load_scene_gt(scene_gt_path)
 
+    # Load scene camera.
+    scene_camera_path = dp_split["scene_camera_tpath"].format(scene_id=scene_id)
+    scene_camera = inout.load_scene_camera(scene_camera_path)
+
     # Create folders for the output masks (if they do not exist yet).
     mask_dir_path = os.path.dirname(
-        dp_split[tpath_keys["mask_tpath"]].format(scene_id=scene_id, im_id=0, gt_id=0)
+        dp_split["mask_tpath"].format(scene_id=scene_id, im_id=0, gt_id=0)
     )
-    misc.log(f"Saving masks in {mask_dir_path}")
     misc.ensure_dir(mask_dir_path)
+
     mask_visib_dir_path = os.path.dirname(
-        dp_split[tpath_keys["mask_visib_tpath"]].format(scene_id=scene_id, im_id=0, gt_id=0)
+        dp_split["mask_visib_tpath"].format(scene_id=scene_id, im_id=0, gt_id=0)
     )
-    misc.log(f"Saving visible masks in {mask_visib_dir_path}")
     misc.ensure_dir(mask_visib_dir_path)
 
     # Initialize a renderer.
     misc.log("Initializing renderer...")
-    if isinstance(dp_split["im_size"], dict):  
-        width, height = dp_split["im_size"][p["sensor"]]
-    else: # classical BOP format
-        width, height = dp_split["im_size"]
+    width, height = dp_split["im_size"]
     ren = renderer.create_renderer(
         width, height, renderer_type=p["renderer_type"], mode="depth"
     )
@@ -106,10 +92,8 @@
         fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
 
         # Load depth image.
-        depth_fpath = dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id)
-        if not os.path.exists(depth_fpath):
-            depth_fpath = depth_fpath.replace(".tif", ".png")        
-        depth_im = inout.load_depth(depth_fpath)
+        depth_path = dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id)
+        depth_im = inout.load_depth(depth_path)
         depth_im *= scene_camera[im_id]["depth_scale"]  # to [mm]
         dist_im = misc.depth_im_to_dist_im_fast(depth_im, K)
 
@@ -131,12 +115,12 @@
             )
 
             # Save the calculated masks.
-            mask_path = dp_split[tpath_keys["mask_tpath"]].format(
+            mask_path = dp_split["mask_tpath"].format(
                 scene_id=scene_id, im_id=im_id, gt_id=gt_id
             )
             inout.save_im(mask_path, 255 * mask.astype(np.uint8))
 
-            mask_visib_path = dp_split[tpath_keys["mask_visib_tpath"]].format(
+            mask_visib_path = dp_split["mask_visib_tpath"].format(
                 scene_id=scene_id, im_id=im_id, gt_id=gt_id
             )
             inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8))
diff --git a/scripts/calc_model_info.py b/scripts/calc_model_info.py
index 76ea286b..c9a80b2f 100644
--- a/scripts/calc_model_info.py
+++ b/scripts/calc_model_info.py
@@ -12,7 +12,7 @@
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "hot3d",
+    "dataset": "lm",
     # Type of input object models.
     "model_type": None,
     # Folder containing the BOP datasets.
@@ -33,19 +33,19 @@
     model = inout.load_ply(dp_model["model_tpath"].format(obj_id=obj_id))
 
     # Calculate 3D bounding box.
-    xs, ys, zs = model["pts"][:,0], model["pts"][:,1], model["pts"][:,2]
-    bbox = misc.calc_3d_bbox(xs, ys, zs)
+    ref_pt = map(float, model["pts"].min(axis=0).flatten())
+    size = map(float, (model["pts"].max(axis=0) - ref_pt).flatten())
 
     # Calculated diameter.
     diameter = misc.calc_pts_diameter(model["pts"])
 
     models_info[obj_id] = {
-        "min_x": bbox[0],
-        "min_y": bbox[1],
-        "min_z": bbox[2],
-        "size_x": bbox[3],
-        "size_y": bbox[4],
-        "size_z": bbox[5],
+        "min_x": ref_pt[0],
+        "min_y": ref_pt[1],
+        "min_z": ref_pt[2],
+        "size_x": size[0],
+        "size_y": size[1],
+        "size_z": size[2],
         "diameter": diameter,
     }
 
diff --git a/scripts/create_coco_results_file_from_gt.py b/scripts/create_coco_results_file_from_gt.py
index bb56bbf1..0014f0cf 100644
--- a/scripts/create_coco_results_file_from_gt.py
+++ b/scripts/create_coco_results_file_from_gt.py
@@ -57,8 +57,9 @@
 )
 
 # Load and organize the estimation targets.
-target_file_path = os.path.join(dp_split["base_path"], p["targets_filename"])
-targets = inout.load_json(target_file_path)
+targets = inout.load_json(
+    os.path.join(dp_split["base_path"], p["targets_filename"])
+)
 targets_org = {}
 for target in targets:
     targets_org.setdefault(target["scene_id"], {}).setdefault(target["im_id"], {})
@@ -66,7 +67,7 @@
 
 # loop over coco annotation and select based on targets
 for scene_id in targets_org:
-    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
+    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
 
     coco_gt_path = dp_split[tpath_keys["scene_gt_coco_tpath"]].format(scene_id=scene_id)
     if p["bbox_type"] == "modal":
@@ -93,9 +94,10 @@
 result_filename = "{}_{}-{}_coco.json".format(p["results_name"], p["dataset"], p["split"])
 results_path = os.path.join(p["results_path"], result_filename)
 inout.save_json(results_path, results)
-result_file_path = os.path.join(p["results_path"], result_filename)
-check_passed, _ = inout.check_coco_results(result_file_path, ann_type="segm")
+check_passed, _ = inout.check_coco_results(
+    os.path.join(p["results_path"], result_filename), ann_type="segm"
+)
 if not check_passed:
     misc.log("Please correct the coco result format of {}".format(result_filename))
     exit()
-misc.log(f"Saved {results_path}")
+print('Saved ', results_path)
diff --git a/scripts/create_pose_results_file_from_gt.py b/scripts/create_pose_results_file_from_gt.py
index 9e615103..6a25ae4a 100644
--- a/scripts/create_pose_results_file_from_gt.py
+++ b/scripts/create_pose_results_file_from_gt.py
@@ -3,8 +3,6 @@
 
 """
 Create POSE result files from ground truth annotation and targets file.
-Simply generate estimates from using all object gt poses from the test target file, without caring about visibility.
-Non visible estimates are discarded by eval pose scripts and do not impact AP/AR scores.
 """
 
 import os
@@ -13,7 +11,6 @@
 from bop_toolkit_lib import config
 from bop_toolkit_lib import dataset_params
 from bop_toolkit_lib import inout
-from bop_toolkit_lib import misc
 
 
 # PARAMETERS (can be overwritten by the command line arguments below).
@@ -22,14 +19,16 @@
     # Out perfect result file name 
     "results_name": 'gt-results',    
     # Predefined test targets 
-    "targets_filename": "test_targets_bop24.json",
+    "targets_filename": "test_targets_bop24.json",    
     # Folder with results to be evaluated.
     "results_path": config.results_path,
     # Folder containing the BOP datasets.
     "datasets_path": config.datasets_path,
-    "dataset": "xyzibd",
+    "dataset": "ycbv",
     "split": "test",  
     "split_type": None,
+    # by default, we consider only objects that are at least 10% visible
+    "visib_gt_min": 0.1,
     "eval_mode": "localization",
 }
 ################################################################################
@@ -41,6 +40,7 @@
 parser.add_argument("--dataset", default=p["dataset"])
 parser.add_argument("--split", default=p["split"])
 parser.add_argument("--split_type", default=p["split_type"])
+parser.add_argument("--visib_gt_min", default=p["visib_gt_min"])
 parser.add_argument("--eval_mode", default=p["eval_mode"])
 args = parser.parse_args()
 
@@ -51,6 +51,7 @@
 p["dataset"] = str(args.dataset)
 p["split"] = str(args.split)
 p["split_type"] = str(args.split_type) if args.split_type is not None else None
+p["visib_gt_min"] = float(args.visib_gt_min)
 p["eval_mode"] = str(args.eval_mode)
 
 # Load dataset parameters.
@@ -61,31 +62,33 @@
 targets_path = os.path.join(p["datasets_path"], p["dataset"], p["targets_filename"])
 targets = inout.load_json(targets_path)
 
-# Load the estimation targets.
-targets = inout.load_json(
-    os.path.join(dp_split["base_path"], p["targets_filename"])
-)
-
-# Organize the targets by scene and image.
-misc.log("Organizing estimation targets...")
-targets_org = {}
-for target in targets:
-    targets_org.setdefault(target["scene_id"], {}).setdefault(target["im_id"], {})
+unique_scene_ids = set([t["scene_id"] for t in targets])
 
+scene_gts = {}
+scene_gts_info = {}
 results = []
-for scene_id in targets_org:
-    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
 
-    scene_gt_path = dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)
-    scene_gt_info_path = dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id)
-    scene_gt = inout.load_scene_gt(scene_gt_path)
-    scene_gt_info = inout.load_scene_gt(scene_gt_info_path)
-
-    for im_id in targets_org[scene_id]:
-        img_gt = scene_gt[im_id]
-        img_gt_info = scene_gt_info[im_id]
-
-        for obj_gt in img_gt:
+for target in targets:
+    scene_id, im_id = target["scene_id"], target["im_id"] 
+
+    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
+
+    if scene_id not in scene_gts:
+        scene_gts[scene_id] = inout.load_scene_gt(
+            dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id)
+        )
+        scene_gts_info[scene_id] = inout.load_scene_gt(
+            dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id)
+        )
+
+    img_gt = scene_gts[scene_id][im_id]
+    img_gt_info = scene_gts_info[scene_id][im_id]
+    
+    if "obj_id" not in target:
+        target = inout.get_im_targets(img_gt, img_gt_info, p["visib_gt_min"], p["eval_mode"])
+
+    for obj_gt in img_gt:
+        if obj_gt["obj_id"] in target:
             result = {
                 "scene_id": int(scene_id),
                 "im_id": int(im_id),
@@ -95,9 +98,9 @@
                 "t": obj_gt["cam_t_m2c"],
                 "time": -1.0,
             }
-            results.append(result)
+        results.append(result)
 
-result_filename = f"{p['results_name']}_{p['dataset']}-{p['split']}_pose.csv"
+result_filename = "{}_{}-{}_pose.csv".format(p["results_name"], p["dataset"], p["split"])
 results_path = os.path.join(p["results_path"], result_filename)
 inout.save_bop_results(results_path, results)
-misc.log(f"Saved {results_path}")
+print('Saved ', results_path)
diff --git a/scripts/enumerate_test_targets.py b/scripts/enumerate_test_targets.py
index 63a9dbb8..b5d6e574 100644
--- a/scripts/enumerate_test_targets.py
+++ b/scripts/enumerate_test_targets.py
@@ -55,20 +55,16 @@
 # List of considered scenes.
 scene_ids_curr = dp_split["scene_ids"]
 
-
 test_targets = []
 for scene_id in scene_ids_curr:
     misc.log("Processing scene: {}".format(scene_id))
 
-    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
-
-
     # Load the ground-truth poses.
-    scene_gt = inout.load_scene_gt(dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id))
+    scene_gt = inout.load_scene_gt(dp_split["scene_gt_tpath"].format(scene_id=scene_id))
 
     # Load meta info about the ground-truth poses.
     scene_gt_info = inout.load_scene_gt(
-        dp_split[tpath_keys["scene_gt_info_tpath"]].format(scene_id=scene_id)
+        dp_split["scene_gt_info_tpath"].format(scene_id=scene_id)
     )
 
     # List of considered images.
@@ -97,10 +93,18 @@
                 }
             )
 
+# test_targets_lines = []
+# for test_target in test_targets:
+#   test_targets_lines.append(
+#     '- {{scene_id: {}, im_id: {}, obj_id: {}, inst_count: {}}}'.format(
+#       test_target['scene_id'], test_target['im_id'], test_target['obj_id'],
+#       test_target['inst_count']))
+
 # Save the test targets,
 test_targets_path = os.path.join(dp_split["base_path"], p["test_targets_filename"])
+# with open(test_targets_path, 'w') as f:
+#   f.write('\n'.join(test_targets_lines))
 
-misc.log("Saving {}".format(test_targets_path))
 inout.save_json(test_targets_path, test_targets)
 
 misc.log("Done.")
diff --git a/scripts/eval_bop19_pose.py b/scripts/eval_bop19_pose.py
index 396d6f6b..7fd1464b 100644
--- a/scripts/eval_bop19_pose.py
+++ b/scripts/eval_bop19_pose.py
@@ -39,6 +39,7 @@
                 "tyol": 15,
                 "ycbv": 15,
                 "hope": 15,
+                "industrial": 15
             },
             "vsd_taus": list(np.arange(0.05, 0.51, 0.05)),
             "vsd_normalized_by_diameter": True,
diff --git a/scripts/eval_bop22_coco.py b/scripts/eval_bop22_coco.py
index cd31634c..3e32e2ee 100644
--- a/scripts/eval_bop22_coco.py
+++ b/scripts/eval_bop22_coco.py
@@ -121,7 +121,7 @@
     misc.log("Merging coco annotations and predictions...")
     # Merge coco scene annotations and results
     for i, scene_id in enumerate(targets_org):
-        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
+        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
         
         scene_coco_ann_path = dp_split[tpath_keys["scene_gt_coco_tpath"]].format(scene_id=scene_id)
         if p["ann_type"] == "bbox" and p["bbox_type"] == "modal":
diff --git a/scripts/eval_bop24_pose.py b/scripts/eval_bop24_pose.py
index a5c3412d..a3cc3f03 100644
--- a/scripts/eval_bop24_pose.py
+++ b/scripts/eval_bop24_pose.py
@@ -28,12 +28,6 @@
             "type": "mssd",
             "correct_th": [[th] for th in np.arange(0.05, 0.51, 0.05)],
         },
-        {
-            "n_top": 0,
-            "type": "mssd",
-            "correct_th": [[th] for th in range(2,21,2)],
-            "threshold_unit": "mm"
-        },
         {
             "n_top": 0,
             "type": "mspd",
@@ -110,8 +104,6 @@
     # Name of the result and the dataset.
     result_name = os.path.splitext(os.path.basename(result_filename))[0]
     dataset = str(result_name.split("_")[1].split("-")[0])
-    if dataset == "xyzibd":
-        p["max_num_estimates_per_image"] = 200
 
     # Calculate the average estimation time per image.
     ests = inout.load_bop_results(
@@ -193,10 +185,6 @@
                     "--visib_gt_min={}".format(p["visib_gt_min"]),
                     "--eval_mode=detection",
                 ]
-                if "threshold_unit" in error:
-                    calc_scores_cmd += [
-                        "--normalized_by_diameter=[]"
-                    ]
                 if p["ignore_object_visible_less_than_visib_gt_min"]:
                     calc_scores_cmd += [
                         "--ignore_object_visible_less_than_visib_gt_min"
@@ -274,9 +262,6 @@
                     f"mAP, {error['type']}, {obj_id}: {mAP_over_correct_th:.3f}"
                 )
                 mAP_over_correct_ths.append(mAP_over_correct_th)
-            if "threshold_unit" in error:
-                error["type"] = error["type"] + "_" + error["threshold_unit"]
-                
             mAP_per_error_type[error["type"]] = np.mean(mAP_over_correct_ths)
             logger.info(
                 f"{error['type']}, Final mAP: {mAP_per_error_type[error['type']]:.3f}"
@@ -297,16 +282,6 @@
         [mAP_per_error_type["mssd"], mAP_per_error_type["mspd"]]
     )
 
-    # Final score for the given dataset.
-    final_scores["bop25_mAP"] = np.mean(
-        [mAP_per_error_type["mssd"]]
-    )
-
-        # Final score for the given dataset.
-    final_scores["bop25_mAP_mm"] = np.mean(
-        [mAP_per_error_type["mssd_mm"]]
-    )
-
     # Average estimation time per image.
     final_scores["bop24_average_time_per_image"] = average_time_per_image
 
diff --git a/scripts/eval_calc_errors.py b/scripts/eval_calc_errors.py
index da558c47..ba6f3bca 100644
--- a/scripts/eval_calc_errors.py
+++ b/scripts/eval_calc_errors.py
@@ -59,6 +59,7 @@
         "tyol": 15,
         "ycbv": 15,
         "hope": 15,
+        "industrial": 15
     },
     "vsd_taus": list(np.arange(0.05, 0.51, 0.05)),
     "vsd_normalized_by_diameter": True,
@@ -174,9 +175,6 @@
         p["datasets_path"], dataset, split, split_type
     )
 
-    if dataset == "xyzibd":
-        p["max_num_estimates_per_image"] = 200
-
     if p["error_type"] not in dp_split["supported_error_types"]:
         raise ValueError("""{} error is not among {} """
                          """supported error types: {}""".format(p["error_type"], dataset, dp_split["supported_error_types"]))
@@ -249,8 +247,7 @@
 
     # Load pose estimates.
     logger.info("Loading pose estimates...")
-    max_num_estimates_per_image = p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None
-    ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=max_num_estimates_per_image)
+    ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None)
 
     # Organize the pose estimates by scene, image and object.
     logger.info("Organizing pose estimates...")
@@ -262,7 +259,7 @@
 
     for scene_id, scene_targets in targets_org.items():
         logger.info("Processing scene {} of {}...".format(scene_id, dataset))
-        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
+        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
 
         # Load GT poses for the current scene.
         scene_gt = inout.load_scene_gt(
diff --git a/scripts/eval_calc_errors_gpu.py b/scripts/eval_calc_errors_gpu.py
index 96594417..61c5f247 100644
--- a/scripts/eval_calc_errors_gpu.py
+++ b/scripts/eval_calc_errors_gpu.py
@@ -83,7 +83,6 @@
     ),
     "num_workers": config.num_workers,  # Number of parallel workers for the calculation of errors.
     "eval_mode": "localization",  # Options: 'localization', 'detection'.
-    "max_num_estimates_per_image": 100,  # Maximum number of estimates per image. Only used for detection tasks.
 }
 ################################################################################
 
@@ -176,9 +175,6 @@
         p["datasets_path"], dataset, split, split_type
     )
 
-    if dataset == "xyzibd":
-        p["max_num_estimates_per_image"] = 200
-
     model_type = "eval"
     dp_model = dataset_params.get_model_params(p["datasets_path"], dataset, model_type)
 
@@ -229,8 +225,7 @@
 
     # Load pose estimates.
     logger.info("Loading pose estimates...")
-    max_num_estimates_per_image = p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None
-    ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=max_num_estimates_per_image)
+    ests = inout.load_bop_results(os.path.join(p["results_path"], result_filename), max_num_estimates_per_image=p["max_num_estimates_per_image"] if p["eval_mode"] == "detection" else None)
 
     # Organize the pose estimates by scene, image and object.
     logger.info("Organizing pose estimates...")
@@ -261,7 +256,7 @@
         # for each scene, organize the estimates per object as each object
         est_per_object = copy.deepcopy(estimate_templates)
 
-        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
+        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
 
         # Load camera and GT poses for the current scene.
         scene_camera = inout.load_scene_camera(
@@ -403,11 +398,10 @@
                         "obj_id": obj_id,
                         "est_id": est_id,
                         "score": score,
-                        "gt_visib_fracts": {},
+                        "gt_visib_fract": gt_visib_fract,
                         "errors": {},
                     }
                 scene_errs[key_name]["errors"][gt_id] = [errors[i]]
-                scene_errs[key_name]["gt_visib_fracts"][gt_id] = [gt_visib_fract]
 
         scene_errs = [v for k, v in scene_errs.items()]
         del est_per_object
diff --git a/scripts/eval_calc_scores.py b/scripts/eval_calc_scores.py
index 75269049..58e469dd 100644
--- a/scripts/eval_calc_scores.py
+++ b/scripts/eval_calc_scores.py
@@ -177,9 +177,6 @@
     # Evaluation signature.
     score_sign = misc.get_score_signature(p["correct_th"][err_type], p["visib_gt_min"])
 
-    if dataset == "xyzibd":
-        p["max_num_estimates_per_image"] = 200
-
     logger.info(
         "Calculating score - error: {}, method: {}, dataset: {}.".format(
             err_type, method, dataset
@@ -220,7 +217,7 @@
     for scene_id, scene_targets in targets_org.items():
         logger.info("Processing scene {} of {}...".format(scene_id, dataset))
 
-        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
+        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
 
         # Load GT poses for the current scene.
         scene_gt = inout.load_scene_gt(
@@ -234,7 +231,10 @@
         scene_camera = inout.load_scene_camera(dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id))
 
         # Handle change of image size location between BOP19 and BOP24 dataset formats
-        scene_im_widths[scene_id] = dataset_params.get_im_size(dp_split, dp_split['eval_modality'], dp_split['eval_sensor'])[0]
+        if "cam_model" in next(iter(scene_camera.items()))[1]:
+            scene_im_widths[scene_id] = scene_camera[0]["cam_model"]["image_width"]
+        else:
+            scene_im_widths[scene_id] = float(dp_split["im_size"][0])
 
         # Keep GT poses only for the selected targets.
         scene_gt_curr = {}
diff --git a/scripts/vis_est_poses.py b/scripts/vis_est_poses.py
index 46fabade..e840eadc 100644
--- a/scripts/vis_est_poses.py
+++ b/scripts/vis_est_poses.py
@@ -14,27 +14,13 @@
 from bop_toolkit_lib import renderer
 from bop_toolkit_lib import visualization
 
-# Get the base name of the file without the .py extension
-file_name = os.path.splitext(os.path.basename(__file__))[0]
-logger = misc.get_logger(file_name)
-
-htt_available = False
-try:
-    from bop_toolkit_lib import pose_error_htt
-    htt_available = True
-except ImportError as e:
-    logger.warning("""Missing hand_tracking_toolkit dependency,
-                   mandatory if you are running evaluation on HOT3d.
-                   Refer to the README.md for installation instructions.
-                   """)
-
 
 # PARAMETERS.
 ################################################################################
 p = {
     # Top N pose estimates (with the highest score) to be visualized for each
     # object in each image.
-    "n_top": 0,  # 0 = all estimates, -1 = given by the number of GT poses.
+    "n_top": 1,  # 0 = all estimates, -1 = given by the number of GT poses.
     # True = one visualization for each (im_id, obj_id), False = one per im_id.
     "vis_per_obj_id": True,
     # Indicates whether to render RGB image.
@@ -45,7 +31,7 @@
     # of individual objects are blended together.
     "vis_rgb_resolve_visib": True,
     # Indicates whether to render depth image.
-    "vis_depth_diff": True,
+    "vis_depth_diff": False,
     # If to use the original model color.
     "vis_orig_color": False,
     # Type of the renderer (used for the VSD pose error function).
@@ -88,21 +74,6 @@
     split = dataset_info[1]
     split_type = dataset_info[2] if len(dataset_info) > 2 else None
 
-    #######################
-    # hot3d specific checks
-    if dataset == "hot3d" and not htt_available:
-        raise ImportError("Missing hand_tracking_toolkit dependency, mandatory for HOT3D dataset.")
-
-    if dataset == "hot3d" and p["renderer_type"] != "htt":
-        raise ValueError("'htt' renderer_type is mandatory for HOT3D dataset.")
-
-    # hot3d does not contain depth modality, some visualizations are not available
-    if dataset in ["hot3d"]:
-        p["vis_rgb"] = True
-        p["vis_rgb_resolve_visib"] = False
-        p["vis_depth_diff"] = False
-    #######################
-
     # Load dataset parameters.
     dp_split = dataset_params.get_split_params(
         p["datasets_path"], dataset, split, split_type
@@ -111,6 +82,30 @@
     model_type = "eval"
     dp_model = dataset_params.get_model_params(p["datasets_path"], dataset, model_type)
 
+    # Rendering mode.
+    renderer_modalities = []
+    if p["vis_rgb"]:
+        renderer_modalities.append("rgb")
+    if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
+        renderer_modalities.append("depth")
+    renderer_mode = "+".join(renderer_modalities)
+
+    # Create a renderer.
+    width, height = dp_split["im_size"]
+    ren = renderer.create_renderer(
+        width, height, p["renderer_type"], mode=renderer_mode
+    )
+
+    # Load object models.
+    models = {}
+    for obj_id in dp_model["obj_ids"]:
+        misc.log("Loading 3D model of object {}...".format(obj_id))
+        model_path = dp_model["model_tpath"].format(obj_id=obj_id)
+        model_color = None
+        if not p["vis_orig_color"]:
+            model_color = tuple(colors[(obj_id - 1) % len(colors)])
+        ren.add_object(obj_id, model_path, surf_color=model_color)
+
     # Load pose estimates.
     misc.log("Loading pose estimates...")
     ests = inout.load_bop_results(os.path.join(config.results_path, result_fname))
@@ -123,54 +118,25 @@
             est["im_id"], {}
         ).setdefault(est["obj_id"], []).append(est)
 
-    # Rendering mode.
-    renderer_modalities = []
-    if p["vis_rgb"]:
-        renderer_modalities.append("rgb")
-    if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
-        renderer_modalities.append("depth")
-    renderer_mode = "+".join(renderer_modalities)
-
-    width, height = None, None
-    ren = None
-
     for scene_id, scene_ests in ests_org.items():
-        tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], dp_split["eval_sensor"], scene_id)
-        scene_modality = dataset_params.get_scene_sensor_or_modality(dp_split["eval_modality"], scene_id)
-        scene_sensor = dataset_params.get_scene_sensor_or_modality(dp_split["eval_sensor"], scene_id)
-
-        # Create a new renderer if image size has changed
-        scene_width, scene_height = dataset_params.get_im_size(dp_split, scene_modality, scene_sensor)
-        if (width, height) != (scene_width, scene_height):
-            width, height = scene_width, scene_height
-            misc.log(f"Creating renderer of type {p['renderer_type']}")
-            ren = renderer.create_renderer(
-                width, height, p["renderer_type"], mode=renderer_mode, shading="flat"
-            )
-            # Load object models in the new renderer.
-            for obj_id in dp_model["obj_ids"]:
-                misc.log(f"Loading 3D model of object {obj_id}...")
-                model_path = dp_model["model_tpath"].format(obj_id=obj_id)
-                model_color = None
-                if not p["vis_orig_color"]:
-                    model_color = tuple(colors[(obj_id - 1) % len(colors)])
-                ren.add_object(obj_id, model_path, surf_color=model_color)
-
-
         # Load info and ground-truth poses for the current scene.
-        scene_camera = inout.load_scene_camera(dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id))
-        scene_gt = inout.load_scene_gt(dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id))
+        scene_camera = inout.load_scene_camera(
+            dp_split["scene_camera_tpath"].format(scene_id=scene_id)
+        )
+        scene_gt = inout.load_scene_gt(
+            dp_split["scene_gt_tpath"].format(scene_id=scene_id)
+        )
 
         for im_ind, (im_id, im_ests) in enumerate(scene_ests.items()):
             if im_ind % 10 == 0:
                 split_type_str = " - " + split_type if split_type is not None else ""
-                misc.log(f"Visualizing pose estimates - method: {method}, dataset: {dataset}{split_type_str}, scene: {scene_id}, im: {im_id}")
+                misc.log(
+                    "Visualizing pose estimates - method: {}, dataset: {}{}, scene: {}, "
+                    "im: {}".format(method, dataset, split_type_str, scene_id, im_id)
+                )
 
-            # Retrieve camera intrinsics.
-            if dataset == 'hot3d':
-                cam = pose_error_htt.create_camera_model(scene_camera[im_id])
-            else:
-                cam = scene_camera[im_id]["cam_K"]
+            # Intrinsic camera matrix.
+            K = scene_camera[im_id]["cam_K"]
 
             im_ests_vis = []
             im_ests_vis_obj_ids = []
@@ -214,32 +180,24 @@
                 # Load the color and depth images and prepare images for rendering.
                 rgb = None
                 if p["vis_rgb"]:
-                    # rgb_tpath is an alias refering to the sensor|modality image paths on which the poses are rendered
-                    im_tpath = tpath_keys["rgb_tpath"]
-                    # check for BOP classic (itodd)
-                    rgb_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, 'rgb')
-                    if im_tpath == "rgb_tpath" and not rgb_available:
-                        im_tpath = "gray_tpath"
-
-                    rgb = inout.load_im(
-                        dp_split[im_tpath].format(scene_id=scene_id, im_id=im_id)
-                    )
-                    # if image is grayscale (e.g. quest3), convert it to 3 channels
-                    if rgb.ndim == 2:
-                        rgb = np.dstack([rgb, rgb, rgb])
+                    if "rgb" in dp_split["im_modalities"]:
+                        rgb = inout.load_im(
+                            dp_split["rgb_tpath"].format(scene_id=scene_id, im_id=im_id)
+                        )[:, :, :3]
+                    elif "gray" in dp_split["im_modalities"]:
+                        gray = inout.load_im(
+                            dp_split["gray_tpath"].format(
+                                scene_id=scene_id, im_id=im_id
+                            )
+                        )
+                        rgb = np.dstack([gray, gray, gray])
                     else:
-                        rgb = rgb[:,:,:3]  # should we keep this?
-
-            depth = None
-            if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
-                depth_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, "depth")
-                if not depth_available:
-                    misc.log(f"{scene_sensor} has no depth data, skipping depth visualization")
-                    p["vis_depth_diff"] = False
-                    p["vis_rgb_resolve_visib"] = False
-                else:
+                        raise ValueError("RGB nor gray images are available.")
+
+                depth = None
+                if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
                     depth = inout.load_depth(
-                        dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id)
+                        dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id)
                     )
                     depth *= scene_camera[im_id]["depth_scale"]  # Convert to [mm].
 
@@ -274,7 +232,7 @@
                 # Visualization.
                 visualization.vis_object_poses(
                     poses=ests_vis,
-                    K=cam,
+                    K=K,
                     renderer=ren,
                     rgb=rgb,
                     depth=depth,
diff --git a/scripts/vis_gt_poses.py b/scripts/vis_gt_poses.py
index 02ecc05e..ac8aa600 100644
--- a/scripts/vis_gt_poses.py
+++ b/scripts/vis_gt_poses.py
@@ -13,6 +13,7 @@
 from bop_toolkit_lib import dataset_params
 from bop_toolkit_lib import inout
 from bop_toolkit_lib import misc
+from bop_toolkit_lib import pose_error_htt
 from bop_toolkit_lib import renderer
 from bop_toolkit_lib import visualization
 
@@ -22,20 +23,19 @@
 
 htt_available = False
 try:
-    from bop_toolkit_lib import pose_error_htt
+    from bop_toolkit_lib import renderer_htt
     htt_available = True
 except ImportError as e:
-    logger.warning("""Missing hand_tracking_toolkit dependency,
-                   mandatory if you are running evaluation on HOT3d.
-                   Refer to the README.md for installation instructions.
-                   """)
-
+    logger.warn("""Missing hand_tracking_toolkit dependency,
+                mandatory if you are running evaluation on HOT3d.
+                Refer to the README.md for installation instructions.
+                """)
 
 # PARAMETERS.
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "xyzibd",
+    "dataset": "lm",
     # Dataset split. Options: 'train', 'val', 'test'.
     "dataset_split": "test",
     # Dataset split type. None = default. See dataset_params.py for options.
@@ -50,18 +50,9 @@
     "scene_ids": [],
     "im_ids": [],
     "gt_ids": [],
-    #########
-    # Which sensor to visualize, . By default it uses the evaluation modality set
-    # in dataset_params.py. Set to None for rendering PBR images or BOP core datasets.
-    # Set to sensor for new BOP core sets, e.g. "photoneo".
-    #########
-    # Modality used to visualize ground truth, default to eval modality. Should not be "depth".
-    "modality": None,
-    # Sensor used to visualize ground truth, default to eval sensor.
-    "sensor": None,
 
     # ---------------------------------------------------------------------------------
-    # Next parameters apply only to dataset with aligned color and depth images.
+    # Next parameters apply only to classical BOP19 datasets (not the H3 BOP24 format)
     # ---------------------
     # Indicates whether to render RGB images.
     "vis_rgb": True,
@@ -77,7 +68,7 @@
     # Whether to use the original model color.
     "vis_orig_color": True,
     # Type of the renderer (used for the VSD pose error function).
-    "renderer_type": "vispy",  # Options: 'vispy', 'cpp', 'python'. 'htt' is mandatory for "hot3d" dataset.
+    "renderer_type": "vispy",  # Options: 'vispy', 'cpp', 'python'.
     # Folder containing the BOP datasets.
     "datasets_path": config.datasets_path,
     # Folder for output visualisations.
@@ -96,30 +87,19 @@
 }
 ################################################################################
 
-#######################
-# hot3d specific checks
 if p["dataset"] == "hot3d" and not htt_available:
     raise ImportError("Missing hand_tracking_toolkit dependency, mandatory for HOT3D dataset.")
 
-if p["dataset"] == "hot3d" and p["renderer_type"] != "htt":
-    raise ValueError("'htt' renderer_type is mandatory for HOT3D dataset.")
-
-# hot3d does not contain depth modality, some visualizations are not available
-if p["dataset"] in ["hot3d"]:
+# if HOT3D dataset is used, next parameters are set
+if p["dataset"] == "hot3d":
     p["vis_rgb"] = True
     p["vis_rgb_resolve_visib"] = False
     p["vis_depth_diff"] = False
-#######################
 
 # Load dataset parameters.
 dp_split = dataset_params.get_split_params(
     p["datasets_path"], p["dataset"], p["dataset_split"], p["dataset_split_type"]
 )
-if p["modality"] is None:
-    p["modality"] = dp_split["eval_modality"]
-assert p["modality"] != "depth", "Modality should be a color modality (not 'depth')"
-if p["sensor"] is None:
-    p["sensor"] = dp_split["eval_sensor"]
 
 model_type = "eval"  # None = default.
 dp_model = dataset_params.get_model_params(p["datasets_path"], p["dataset"], model_type)
@@ -145,39 +125,52 @@
     scene_ids_curr = set(scene_ids_curr).intersection(p["scene_ids"])
 
 # Rendering mode.
-renderer_modalities = []
-if p["vis_rgb"]:
-    renderer_modalities.append("rgb")
-if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
-    renderer_modalities.append("depth")
-renderer_mode = "+".join(renderer_modalities)
-
-
-width, height = None, None
-ren = None
-
-for scene_id in scene_ids_curr:
-    tpath_keys = dataset_params.scene_tpaths_keys(p["modality"], p["sensor"], scene_id)
-    scene_modality = dataset_params.get_scene_sensor_or_modality(p["modality"], scene_id)
-    scene_sensor = dataset_params.get_scene_sensor_or_modality(p["sensor"], scene_id)
-
-    # Create a new renderer if image size has changed
-    scene_width, scene_height = dataset_params.get_im_size(dp_split, scene_modality, scene_sensor)
-    if (width, height) != (scene_width, scene_height):
-        width, height = scene_width, scene_height
-        misc.log(f"Creating renderer of type {p['renderer_type']}")
-        ren = renderer.create_renderer(
-            width, height, p["renderer_type"], mode=renderer_mode, shading="flat"
-        )
-        # Load object models in the new renderer.
-        for obj_id in dp_model["obj_ids"]:
-            misc.log(f"Loading 3D model of object {obj_id}...")
-            model_path = dp_model["model_tpath"].format(obj_id=obj_id)
-            model_color = None
-            if not p["vis_orig_color"]:
-                model_color = tuple(colors[(obj_id - 1) % len(colors)])
-            ren.add_object(obj_id, model_path, surf_color=model_color)
+# if classical BOP19 format define render modalities
+# The H3 BOP24 format for HOT3D does not include depth images, so this is irrelevant
+if not p['dataset'] == "hot3d":
+    renderer_modalities = []
+    if p["vis_rgb"]:
+        renderer_modalities.append("rgb")
+    if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
+        renderer_modalities.append("depth")
+    renderer_mode = "+".join(renderer_modalities)
+
+# Create a renderer.
+# if HOT3D dataset, create separate renderers for Quest3 and Aria with different image sizes
+if p["dataset"] == "hot3d":
+    quest3_im_size = dp_split["quest3_im_size"][dp_split["quest3_eval_modality"]]
+    aria_im_size = dp_split["aria_im_size"][dp_split["aria_eval_modality"]]
+    quest3_ren = renderer_htt.RendererHtt(quest3_im_size, p["renderer_type"], shading="flat")
+    aria_ren = renderer_htt.RendererHtt(aria_im_size, p["renderer_type"], shading="flat")
+else:  # classical BOP format
+    width, height = dp_split["im_size"]
+    ren = renderer.create_renderer(
+        width, height, p["renderer_type"], mode=renderer_mode, shading="flat"
+    )
 
+# Load object models.
+models = {}
+for obj_id in dp_model["obj_ids"]:
+    misc.log("Loading 3D model of object {}...".format(obj_id))
+    model_path = dp_model["model_tpath"].format(obj_id=obj_id)
+    model_color = None
+    if not p["vis_orig_color"]:
+        model_color = tuple(colors[(obj_id - 1) % len(colors)])
+    if p["dataset"] == "hot3d":
+        quest3_ren.add_object(obj_id, model_path, surf_color=model_color)
+        aria_ren.add_object(obj_id, model_path, surf_color=model_color)
+    else:
+        ren.add_object(obj_id, model_path, surf_color=model_color)
+
+scene_ids = dataset_params.get_present_scene_ids(dp_split)
+for scene_id in scene_ids:
+    tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
+    if p["dataset"] == "hot3d":  # for other dataset the renderer does not change
+        # find which renderer to use (quest3 or aria)
+        if scene_id in dp_split["test_quest3_scene_ids"] or scene_id in dp_split["train_quest3_scene_ids"]:
+            ren = quest3_ren
+        elif scene_id in dp_split["test_aria_scene_ids"] or scene_id in dp_split["train_aria_scene_ids"]:
+            ren = aria_ren
     # Load scene info and ground-truth poses.
     scene_camera = inout.load_scene_camera(dp_split[tpath_keys["scene_camera_tpath"]].format(scene_id=scene_id))
     scene_gt = inout.load_scene_gt(dp_split[tpath_keys["scene_gt_tpath"]].format(scene_id=scene_id))
@@ -198,9 +191,9 @@
                 )
             )
 
-        # Retrieve camera intrinsics.
         if p['dataset'] == 'hot3d':
             cam = pose_error_htt.create_camera_model(scene_camera[im_id])
+        # TODO might delete if-else here
         else:
             cam = scene_camera[im_id]["cam_K"]
 
@@ -231,60 +224,60 @@
                 }
             )
 
-        # Load the color and depth images and prepare images for rendering.
-        rgb = None
-        if p["vis_rgb"]:
-            # rgb_tpath is an alias refering to the sensor|modality image paths on which the poses are rendered
-            im_tpath = tpath_keys["rgb_tpath"]
-            # check for BOP classic (itodd)
-            rgb_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, 'rgb')
-            if im_tpath == "rgb_tpath" and not rgb_available:
-                im_tpath = "gray_tpath"
-
+        if p["dataset"] == "hot3d":
+            # load the image of the eval modality
             rgb = inout.load_im(
-                dp_split[im_tpath].format(scene_id=scene_id, im_id=im_id)
+                dp_split[dp_split["eval_modality"](scene_id) + "_tpath"].format(scene_id=scene_id, im_id=im_id)
             )
-            # if image is grayscale (e.g. quest3), convert it to 3 channels
+            # if image is grayscale (quest3), convert it to 3 channels
             if rgb.ndim == 2:
                 rgb = np.dstack([rgb, rgb, rgb])
-            else:
-                rgb = rgb[:,:,:3]  # should we keep this?
+        else:
+            # Load the color and depth images and prepare images for rendering.
+            rgb = None
+            if p["vis_rgb"]:
+                if "rgb" in dp_split["im_modalities"] or p["dataset_split_type"] == "pbr":
+                    rgb = inout.load_im(
+                        dp_split["rgb_tpath"].format(scene_id=scene_id, im_id=im_id)
+                    )[:, :, :3]
+                elif "gray" in dp_split["im_modalities"]:
+                    gray = inout.load_im(
+                        dp_split["gray_tpath"].format(scene_id=scene_id, im_id=im_id)
+                    )
+                    rgb = np.dstack([gray, gray, gray])
+                else:
+                    raise ValueError("RGB nor gray images are available.")
 
         depth = None
-        if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
-            depth_available = dataset_params.sensor_has_modality(dp_split, scene_sensor, "depth")
-            if not depth_available:
-                misc.log(f"{scene_sensor} has no depth data, skipping depth visualization")
-                p["vis_depth_diff"] = False
-                p["vis_rgb_resolve_visib"] = False
-            else:
+        if p["dataset"] != "hot3d":
+            if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
                 depth = inout.load_depth(
-                    dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id)
+                    dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id)
                 )
                 depth *= scene_camera[im_id]["depth_scale"]  # Convert to [mm].
 
         # Path to the output RGB visualization.
-        split = "{}_{}".format(p["dataset_split"], scene_sensor) if scene_sensor else p["dataset_split"] 
         vis_rgb_path = None
         if p["vis_rgb"]:
             vis_rgb_path = p["vis_rgb_tpath"].format(
                 vis_path=p["vis_path"],
                 dataset=p["dataset"],
-                split=split,
+                split=p["dataset_split"],
                 scene_id=scene_id,
                 im_id=im_id,
             )
 
         # Path to the output depth difference visualization.
         vis_depth_diff_path = None
-        if p["vis_depth_diff"]:
-            vis_depth_diff_path = p["vis_depth_diff_tpath"].format(
-                vis_path=p["vis_path"],
-                dataset=p["dataset"],
-                split=split,
-                scene_id=scene_id,
-                im_id=im_id,
-            )
+        if p["dataset"] != "hot3d":
+            if p["vis_depth_diff"]:
+                vis_depth_diff_path = p["vis_depth_diff_tpath"].format(
+                    vis_path=p["vis_path"],
+                    dataset=p["dataset"],
+                    split=p["dataset_split"],
+                    scene_id=scene_id,
+                    im_id=im_id,
+                )
 
         # Visualization.
         visualization.vis_object_poses(
diff --git a/scripts/vis_object_symmetries.py b/scripts/vis_object_symmetries.py
index 6065dd0d..617645e2 100644
--- a/scripts/vis_object_symmetries.py
+++ b/scripts/vis_object_symmetries.py
@@ -18,7 +18,7 @@
 ################################################################################
 p = {
     # See dataset_params.py for options.
-    "dataset": "xyzibd",
+    "dataset": "itodd",
     # Type of the renderer (used for the VSD pose error function).
     "renderer_type": "vispy",  # Options: 'vispy', 'cpp', 'python'.
     # See misc.get_symmetry_transformations().
@@ -48,12 +48,13 @@
 if p["dataset"] == "tless":
     model_type = "cad"
 dp_model = dataset_params.get_model_params(p["datasets_path"], p["dataset"], model_type)
+dp_camera = dataset_params.get_camera_params(p["datasets_path"], p["dataset"])
 
-# Use reasonable camera intrinsics default for rendering (copied from T-LESS)
-width, height = 1280, 1024
-fx, fy, cx, cy = 1075, 1073, 641, 507
+K = dp_camera["K"]
+fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
 
 # Create a renderer.
+width, height = dp_camera["im_size"]
 ren = renderer.create_renderer(
     width, height, p["renderer_type"], mode="rgb", shading="flat"
 )
diff --git a/setup.py b/setup.py
index 4a943484..f91558ce 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,7 @@
 from setuptools import setup, find_packages
 
-package_name = 'bop_toolkit_lib'
-
 setup(
-    name=package_name,
+    name="bop_toolkit_lib",
     version="1.0",
     packages=find_packages(exclude=("docs")),
     install_requires=["pytz", "vispy>=0.6.5", "PyOpenGL==3.1.0", "pypng", "cython"],
@@ -11,9 +9,4 @@
     author_email="tom.hodan@gmail.com, Martin.Sundermeyer@dlr.de",
     license="MIT license",
     package_data={"bop_toolkit_lib": ["*"]},
-    data_files=[
-        ('share/ament_index/resource_index/packages',
-            ['resource/' + package_name]),
-        ('share/' + package_name, ['package.xml']),
-    ],
 )