Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions egomimic/scripts/aria_process/aria_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def zarr_job(
dataset_name: str,
arm: str,
description: str = "",
) -> tuple[Path, Path] | None:
save_mp4: bool = True,
) -> tuple[Path, Path | None] | None:
"""
Convert one <vrs, vrs.json, mps_*> trio to a Zarr dataset.
"""
Expand All @@ -71,9 +72,9 @@ def zarr_job(
nthreads=2,
debug=False,
benchmark=False,
save_mp4=True,
save_mp4=save_mp4,
description=description,
dataset_name=dataset_name
)

return aria_zarr_main(args)
return aria_zarr_main(args)
46 changes: 25 additions & 21 deletions egomimic/scripts/aria_process/aria_to_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
compute_orientation_rotation_matrix,
slam_to_rgb,
undistort_to_linear,
cpf_to_rgb
)
from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
from projectaria_tools.core import data_provider, mps
Expand Down Expand Up @@ -518,13 +517,16 @@ def process_episode(episode_path, arm: str, low_res=False, benchmark=False):
rgb_timestamps_ns = np.array(stream_timestamps_ns["rgb"])

print(f"[DEBUG] LENGTH BEFORE CLEANING: {len(hand_cartesian_pose)}")
[hand_cartesian_pose, hand_keypoints_pose, head_pose], images, eye_gaze, rgb_timestamps_ns = (
AriaVRSExtractor.clean_data(
poses=[hand_cartesian_pose, hand_keypoints_pose, head_pose],
images=images,
eye_gaze=eye_gaze,
timestamps=rgb_timestamps_ns
)
(
[hand_cartesian_pose, hand_keypoints_pose, head_pose],
images,
eye_gaze,
rgb_timestamps_ns,
) = AriaVRSExtractor.clean_data(
poses=[hand_cartesian_pose, hand_keypoints_pose, head_pose],
images=images,
eye_gaze=eye_gaze,
timestamps=rgb_timestamps_ns,
)
# actions, pose, images = AriaVRSExtractor.clean_data_projection(actions=actions, pose=pose, images=images, arm=arm)
print(f"[DEBUG] LENGTH AFTER CLEANING: {len(hand_cartesian_pose)}")
Expand Down Expand Up @@ -1598,19 +1600,21 @@ def extract_episode(
enable_sharding=False,
task="",
)
mp4_path = output_dir / f"{episode_name}.mp4"
W, H = 960, 720
p = start_ffmpeg_mp4(mp4_path, W, H, fps=30, pix_fmt="rgb24")
for video_images in AriaVRSExtractor.iter_images(
episode_path, chunk_length=256, height=H, width=W, focal_mult=3
):
for image in video_images:
image = prep_frame(image, H, W)
if image is None:
continue
p.stdin.write(image.tobytes())
p.stdin.close()
p.wait()
mp4_path = None
if self.save_mp4:
mp4_path = output_dir / f"{episode_name}.mp4"
W, H = 960, 720
p = start_ffmpeg_mp4(mp4_path, W, H, fps=30, pix_fmt="rgb24")
for video_images in AriaVRSExtractor.iter_images(
episode_path, chunk_length=256, height=H, width=W, focal_mult=3
):
for image in video_images:
image = prep_frame(image, H, W)
if image is None:
continue
p.stdin.write(image.tobytes())
p.stdin.close()
p.wait()
return zarr_path, mp4_path

def extract_episodes(
Expand Down
19 changes: 17 additions & 2 deletions egomimic/scripts/aria_process/aria_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,23 @@ def build_camera_matrix(provider, pose_t):
return T_world_rgb_camera


def undistort_to_linear(provider, stream_ids, raw_image, camera_label="rgb", height=480, width=640, focal_mult=2):
def undistort_to_linear(
provider,
stream_ids,
raw_image,
camera_label="rgb",
height=480,
width=640,
focal_mult=2,
):
camera_label = provider.get_label_from_stream_id(stream_ids[camera_label])
calib = provider.get_device_calibration().get_camera_calib(camera_label)
warped = calibration.get_linear_camera_calibration(
height, width, 133.25430222 * focal_mult, camera_label, calib.get_transform_device_camera()
height,
width,
133.25430222 * focal_mult,
camera_label,
calib.get_transform_device_camera(),
)
warped_image = calibration.distort_by_calibration(raw_image, warped, calib)
warped_rot = np.rot90(warped_image, k=3)
Expand Down Expand Up @@ -106,6 +118,7 @@ def slam_to_rgb(provider):

return transform


def compute_orientation_rotation_matrix(palm_pose, wrist_pose, palm_normal):
x_axis = wrist_pose - palm_pose
x_axis = np.ravel(x_axis) / np.linalg.norm(x_axis)
Expand All @@ -119,6 +132,7 @@ def compute_orientation_rotation_matrix(palm_pose, wrist_pose, palm_normal):
rot_matrix = np.column_stack([-1 * x_axis, y_axis, z_axis])
return rot_matrix


def coordinate_frame_to_ypr(x_axis, y_axis, z_axis):
rot_matrix = np.column_stack([x_axis, y_axis, z_axis])
rotation = R.from_matrix(rot_matrix)
Expand All @@ -127,6 +141,7 @@ def coordinate_frame_to_ypr(x_axis, y_axis, z_axis):
euler_ypr = np.zeros_like(euler_ypr)
return euler_ypr


def cpf_to_rgb(provider):
"""
Get cpf (eye tracking origin) to rgb camera transform (rotated upright)
Expand Down
Loading