NVlabs · jaybdub · Mar 17, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 # main
 
+- Added example for parquet conversion (to support X-Mobility training)
+- Added robot linear and angular velocity to state (to support X-Mobility training)
+- Fixed bug when replay rendering does not include segmentation info
 - Added support for surface normals image in replay rendering
 - Added support for instance ID segmentation rendering
 - Added camera world pose to state

diff --git a/README.md b/README.md
@@ -399,6 +399,8 @@ The state_dict has the following schema
     "robot.action": np.ndarray,                                      # [2] - Linear, angular command velocity
     "robot.position": np.ndarray,                                    # [3] - XYZ
     "robot.orientation": np.ndarray,                                 # [4] - Quaternion
+    "robot.linear_velocity": np.ndarray,                             # [3] - The linear velocity in world frame (As retrieved by robot.get_linear_velocity() in isaac sim) 
+    "robot.angular_velocity": np.ndarray,                            # [3] - The angular velocity of the robot in the world frame.  (As retrieved by robot.get_angular_velocity() in isaac sim) 
     "robot.joint_positions": np.ndarray,                             # [J] - Joint positions
     "robot.joint_velocities": np.ndarray,                            # [J] - Joint velocities
     "robot.front_camera.left.rgb_image": np.ndarray,                 # [HxWx3], np.uint8 - RGB image

diff --git a/examples/05_convert_to_parquet.py b/examples/05_convert_to_parquet.py
@@ -0,0 +1,75 @@
+import argparse
+import pandas
+from reader import Reader
+import numpy as np
+import tqdm
+import PIL.Image
+import io
+
+parser = argparse.ArgumentParser()
+parser.add_argument("recording_path")
+parser.add_argument("output_path")
+args = parser.parse_args()
+
+reader = Reader(recording_path=args.recording_path)
+
+index = 0
+
+
+def numpy_array_to_flattened_columns(key: str, value: np.ndarray):
+    columns = {
+        f"{key}": value.flatten()
+    }
+    # add shape if ndim > 1
+    if value.ndim > 1:
+        columns[f"{key}.shape"] = tuple(value.shape)
+    return columns
+
+
+def numpy_array_to_jpg_columns(key: str, value: np.ndarray):
+    image = PIL.Image.fromarray(value)
+    buffer = io.BytesIO()
+    image.save(buffer, format="JPEG")
+    columns = {
+        key: buffer.getvalue()
+    }
+    return columns
+
+
+output: pandas.DataFrame = None
+
+for index in tqdm.tqdm(range(len(reader))):
+
+    data_dict = {}
+
+    # Common data (saved as raw arrays)
+    state_common = reader.read_state_dict_common(index=index)
+    state_common.update(reader.read_state_dict_depth(index=index))
+    state_common.update(reader.read_state_dict_segmentation(index=index))
+    # TODO: handle normals
+
+    for k, v in state_common.items():
+        if isinstance(v, np.ndarray):
+            columns = numpy_array_to_flattened_columns(k, v)
+        else:
+            columns = {k: v}
+        data_dict.update(columns)
+
+    # RGB data (saved as jpg)
+    state_rgb = reader.read_state_dict_rgb(index=index)
+    for k, v in state_rgb.items():
+        if isinstance(v, np.ndarray):
+            columns = numpy_array_to_jpg_columns(k, v)
+        else:
+            columns = {k: v}
+        data_dict.update(columns)
+
+
+    # use first frame to initialize
+    if output is None:
+        output = pandas.DataFrame(columns=data_dict.keys())
+
+    output.loc[index] = data_dict
+
+
+output.to_parquet(args.output_path, engine="pyarrow")
diff --git a/examples/06_load_parquet.py b/examples/06_load_parquet.py
@@ -0,0 +1,19 @@
+import argparse
+import pandas
+import matplotlib.pyplot as plt
+import numpy as np
+
+parser = argparse.ArgumentParser()
+parser.add_argument("parquet_path")
+args = parser.parse_args()
+
+data = pandas.read_parquet(args.parquet_path, engine="pyarrow")
+
+
+print(data.columns)
+vel = np.stack(data['robot.linear_velocity'].to_numpy())
+
+
+plt.plot(vel[:, 0], 'r-')
+plt.plot(vel[:, 1], 'r-')
+plt.show()
diff --git a/examples/PARQUET_DATA_FORMAT.md b/examples/PARQUET_DATA_FORMAT.md
@@ -0,0 +1,28 @@
+# Parquet Data Format
+
+Below is a description of the fields in a typical MobilityGen recording, common to all scenarios.
+
+| Field | Type | Shape | Description |
+|-------|------|-------|-------------|
+| robot.action | array | 2 | The command linear, angular velocity in the robot frame. |
+| robot.position | array | 3 | The xyz position of the robot in the world frame. |
+| robot.orientation | array | 4 | The quaternion of the robot in the world frame. |
+| robot.joint_positions| array | N | The joint positions of the robot. |
+| robot.joint_velocities | array | N | The joint velocities of the robot. |
+| robot.linear_velocity | array | 3 | The linear velocity of the robot in the world frame. (Retrieved by robot.get_linear_velocity() in isaac sim) |
+| robot.angular_velocity | array | 3 | The linear velocity of the robot in the world frame.  (Retrieved by robot.get_angular_velocity() in isaac sim) |
+| robot.front_camera.left.segmentation_info | dict |  | The segmentation info dictionary as retrieved by the Isaac Sim replicator annotator. |
+| robot.front_camera.left.segmentation_image | array | (HxW) flattened | The segmentation image as retrieved by the Isaac Sim replicator annotator.  Flattened |
+| robot.front_camera.left.segmentation_image.shape | tuple | 2 | The segmentation image shape. |
+| robot.front_camera.left.rgb_image | bytes |  | The RGB camera image compressed to JPG. |
+| robot.front_camera.left.depth_image | array | (HxW) | The depth image (in meters) flattened into an array. |
+| robot.front_camera.left.depth_image.shape | tuple | 2 | The shape of the depth image.
+
+> Note, there are additional fields with similar semantics for other cameras we have excluded brevity.
+
+Below are fields specific to the path following scenario
+
+| Field | Type | Shape | Description |
+|-------|------|-------|-------------|
+| target_path | array | (Nx2) flattened | The target path generated by the path planner in world coordinates. This is updated whenever the path planner is called, which occurs when the robot reaches a goal (or at the beginning of a new recording) |
+| target_path.shape | tuple | 2 | The shape of the target path array. |
diff --git a/exts/omni.ext.mobility_gen/omni/ext/mobility_gen/robots.py b/exts/omni.ext.mobility_gen/omni/ext/mobility_gen/robots.py
@@ -139,6 +139,8 @@ def __init__(self,
         self.orientation = Buffer()
         self.joint_positions = Buffer()
         self.joint_velocities = Buffer()
+        self.linear_velocity = Buffer()
+        self.angular_velocity = Buffer()
         self.front_camera = front_camera
 
     @classmethod
@@ -187,6 +189,8 @@ def update_state(self):
         self.orientation.set_value(ori)
         self.joint_positions.set_value(self.robot.get_joint_positions())
         self.joint_velocities.set_value(self.robot.get_joint_velocities())
+        self.linear_velocity.set_value(self.robot.get_linear_velocity())
+        self.angular_velocity.set_value(self.robot.get_angular_velocity())
         super().update_state()
 
     def write_replay_data(self):

diff --git a/scripts/replay.py b/scripts/replay.py
@@ -38,7 +38,7 @@
     parser.add_argument("--rgb_enabled", type=bool, default=True)
     parser.add_argument("--segmentation_enabled", type=bool, default=True)
     parser.add_argument("--depth_enabled", type=bool, default=True)
-    parser.add_argument("--instance_id_segmentation_enabled", type=bool, default=True)
+    parser.add_argument("--instance_id_segmentation_enabled", type=bool, default=False)
     parser.add_argument("--normals_enabled", type=bool, default=False)
     parser.add_argument("--render_rt_subframes", type=int, default=1)
     parser.add_argument("--render_interval", type=int, default=1)

diff --git a/scripts/replay_directory.py b/scripts/replay_directory.py
@@ -38,7 +38,7 @@
     parser.add_argument("--rgb_enabled", type=bool, default=True)
     parser.add_argument("--segmentation_enabled", type=bool, default=True)
     parser.add_argument("--depth_enabled", type=bool, default=True)
-    parser.add_argument("--instance_id_segmentation_enabled", type=bool, default=True)
+    parser.add_argument("--instance_id_segmentation_enabled", type=bool, default=False)
     parser.add_argument("--normals_enabled", type=bool, default=False)
     parser.add_argument("--render_rt_subframes", type=int, default=1)
     parser.add_argument("--render_interval", type=int, default=1)

diff --git a/scripts/replay_implementation.py b/scripts/replay_implementation.py
@@ -31,6 +31,7 @@
 from PIL import Image
 import glob
 import tqdm
+import time
 
 import omni.replicator.core as rep
 
@@ -48,7 +49,7 @@
     parser.add_argument("--rgb_enabled", type=bool, default=True)
     parser.add_argument("--segmentation_enabled", type=bool, default=True)
     parser.add_argument("--depth_enabled", type=bool, default=True)
-    parser.add_argument("--instance_id_segmentation_enabled", type=bool, default=True)
+    parser.add_argument("--instance_id_segmentation_enabled", type=bool, default=False)
     parser.add_argument("--normals_enabled", type=bool, default=False)
     parser.add_argument("--render_rt_subframes", type=int, default=1)
     parser.add_argument("--render_interval", type=int, default=1)
@@ -99,11 +100,13 @@
     print(f"\tRendering RT subframes: {args.render_rt_subframes}")
     print(f"\tRender interval: {args.render_interval}")
 
+    t0 = time.perf_counter()
+    count = 0
     for step in tqdm.tqdm(range(0, num_steps, args.render_interval)):
 
-        state_dict = reader.read_state_dict(index=step)
+        state_dict_original = reader.read_state_dict(index=step)
 
-        scenario.load_state_dict(state_dict)
+        scenario.load_state_dict(state_dict_original)
         scenario.write_replay_data()
 
         simulation_app.update()
@@ -117,6 +120,12 @@
         scenario.update_state()
 
         state_dict = scenario.state_dict_common()
+
+        for k, v in state_dict_original.items():
+            # overwrite with original state, to ensure physics based values are correct
+            if v is not None:
+                state_dict[k] = v # don't overwrite "None" values
+
         state_rgb = scenario.state_dict_rgb()
         state_segmentation = scenario.state_dict_segmentation()
         state_depth = scenario.state_dict_depth()
@@ -128,4 +137,9 @@
         writer.write_state_dict_depth(state_depth, step)
         writer.write_state_dict_normals(state_normals, step)
 
+        count += 1
+    t1 = time.perf_counter()
+
+    print(f"Process time per frame: {count / (t1 - t0)}")
+
     simulation_app.close()