Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,7 @@ lerobot_test/
**/lerobot_test/
**/lerobot_test/**
**/robot/models/**
**/robot/models/
**/robot/models/
external/scale/scripts/datasets
_turbojpeg_lib/
external/scale/scripts/scale_data/
20 changes: 11 additions & 9 deletions egomimic/algo/hpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,14 +971,12 @@ def process_batch_for_training(self, batch):
"""
processed_batch = {}

for embodiment_id, _batch in batch.items():
for embodiment_name, _batch in batch.items():
embodiment_id = get_embodiment_id(embodiment_name)
processed_batch[embodiment_id] = {}
for key, value in _batch.items():
key_name = self.data_schematic.lerobot_key_to_keyname(
key, embodiment_id
)
if key_name is not None:
processed_batch[embodiment_id][key_name] = value
if key is not None:
processed_batch[embodiment_id][key] = value

ac_key = self.ac_keys[embodiment_id]
if len(processed_batch[embodiment_id][ac_key].shape) != 3:
Expand All @@ -992,6 +990,9 @@ def process_batch_for_training(self, batch):
processed_batch[embodiment_id] = self.data_schematic.normalize_data(
processed_batch[embodiment_id], embodiment_id
)
processed_batch[embodiment_id]["embodiment"] = torch.tensor(
[embodiment_id], device=self.device, dtype=torch.int64
)

return processed_batch

Expand All @@ -1009,12 +1010,12 @@ def forward_training(self, batch):
predictions = OrderedDict()
hpt_batches = {}
self.training_step += 1
for embodiment_id, _batch in batch.items():
for embodiment_id, _batch in batch.items(): # TODO why don't we use batch with embodiment_name to keep things consistent
embodiment_name = get_embodiment(embodiment_id).lower()
cam_keys = self.camera_keys[embodiment_id]
proprio_keys = self.proprio_keys[embodiment_id]
lang_keys = self.lang_keys[embodiment_id]
ac_key = self.ac_keys[embodiment_id]
embodiment_name = get_embodiment(embodiment_id).lower()
aux_ac_keys = self.auxiliary_ac_keys.get(embodiment_name, [])
data = self._robomimic_to_hpt_data(
_batch, cam_keys, proprio_keys, lang_keys, ac_key, aux_ac_keys
Expand Down Expand Up @@ -1059,11 +1060,11 @@ def forward_eval(self, batch):
"""
unnorm_preds = {}
for embodiment_id, _batch in batch.items():
embodiment_name = get_embodiment(embodiment_id).lower()
cam_keys = self.camera_keys[embodiment_id]
proprio_keys = self.proprio_keys[embodiment_id]
lang_keys = self.lang_keys[embodiment_id]
ac_key = self.ac_keys[embodiment_id]
embodiment_name = get_embodiment(embodiment_id).lower()
aux_ac_keys = self.auxiliary_ac_keys.get(embodiment_name, [])
data = self._robomimic_to_hpt_data(
_batch, cam_keys, proprio_keys, lang_keys, ac_key, aux_ac_keys
Expand Down Expand Up @@ -1250,6 +1251,7 @@ def visualize_preds(self, predictions, batch):
Returns:
ims (np.ndarray): (B, H, W, 3) - images with actions drawn on top
"""

embodiment_id = batch["embodiment"][0].item()
embodiment_name = get_embodiment(embodiment_id).lower()
ac_key = self.ac_keys[embodiment_id]
Expand Down
60 changes: 60 additions & 0 deletions egomimic/hydra_configs/data/zarr_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper

train_datasets:
scale_bimanual:
_target_: egomimic.rldb.zarr.ZarrDataset
Episode_path: external/scale/scripts/datasets/2026-02-19-03-21-23-570038/697c1e6c0cac8cd3c4873844_episode_000000.zarr
key_map:
front_img_1:
key_type: camera_keys
zarr_key: observations.images.front_img_1
ee_pose:
key_type: proprio_keys
zarr_key: observations.state.ee_pose
horizon: 100
actions_cartesian:
key_type: action_keys
zarr_key: actions_ee_se3_world
horizon: 100
actions_keypoints:
key_type: action_keys
zarr_key: actions_keypoint_world
horizon: 100
actions_head_cartesian:
key_type: action_keys
zarr_key: actions_head_se3_world
horizon: 100
valid_datasets:
scale_bimanual:
_target_: egomimic.rldb.zarr.ZarrDataset
Episode_path: external/scale/scripts/datasets/2026-02-19-03-21-23-570038/697c1e6c0cac8cd3c4873844_episode_000000.zarr
key_map:
front_img_1:
key_type: camera_keys
zarr_key: observations.images.front_img_1
ee_pose:
key_type: proprio_keys
zarr_key: observations.state.ee_pose
horizon: 100
actions_cartesian:
key_type: action_keys
zarr_key: actions_ee_se3_world
horizon: 100
actions_keypoints:
key_type: action_keys
zarr_key: actions_keypoint_world
horizon: 100
actions_head_cartesian:
key_type: action_keys
zarr_key: actions_head_se3_world
horizon: 100

train_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10

valid_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10
122 changes: 122 additions & 0 deletions egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
_target_: egomimic.pl_utils.pl_model.ModelWrapper
robomimic_model:
_target_: egomimic.algo.hpt.HPT
data_schematic: _${data.dataset.data_schematic}
camera_transforms:
scale_bimanual:
_target_: egomimic.utils.egomimicUtils.CameraTransforms
intrinsics_key: "scale" # change to base_half if using half res
extrinsics_key: "scale"

diffusion: true
6dof: true

ac_keys:
scale_bimanual: "actions_cartesian"

trunk:
embed_dim: 256
num_blocks: 64
num_heads: 8
token_postprocessing: "action_token"
observation_horizon: 1
action_horizon: 64
no_trunk: false
use_domain_embedding: true
drop_path: 0.1
weight_init_style: "pytorch"

multitask: false
pretrained: false
pretrained_checkpoint: "" # TODO
reverse_kl_samples: 8

domains: ["scale_bimanual"]
shared_obs_keys: ["front_img_1"]

shared_stem_specs:
front_img_1:
_target_: egomimic.models.hpt_nets.MLPPolicyStem
input_dim: 256
output_dim: 256
widths: [256]
specs:
random_horizon_masking: false
cross_attn:
crossattn_latent: 16
crossattn_heads: 8
crossattn_dim_head: 64
crossattn_modality_dropout: 0.1
modality_embed_dim: 256

stem_specs:
scale_bimanual:
state_ee_pose:
_target_: egomimic.models.hpt_nets.MLPPolicyStem
input_dim: 14
output_dim: 256
widths: [256]
specs:
random_horizon_masking: false
cross_attn:
crossattn_latent: 16
crossattn_heads: 8
crossattn_dim_head: 64
crossattn_modality_dropout: 0.1
modality_embed_dim: 256

head_specs:
scale_bimanual:
_target_: egomimic.models.fm_policy.FMPolicy
action_horizon: 100
num_inference_steps: 50
pooling: null
time_dist: "beta"
infer_ac_dims:
scale_bimanual: 14
model:
_target_: egomimic.models.denoising_nets.CrossTransformer
nblocks: 6
cond_dim: 256
hidden_dim: 128
act_dim: 14
act_seq: 100
n_heads: 4
dropout: 0.1
mlp_layers: 4
mlp_ratio: 4

encoder_specs:
front_img_1:
_target_: egomimic.models.hpt_nets.ResNet
output_dim: 256

train_image_augs:
_target_: torchvision.transforms.Compose
transforms:
- _target_: torchvision.transforms.ColorJitter
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.05
- _target_: torchvision.transforms.Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
eval_image_augs:
_target_: torchvision.transforms.Compose
transforms:
- _target_: torchvision.transforms.Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]

optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 3e-4
weight_decay: 0.0001

scheduler:
_target_: torch.optim.lr_scheduler.CosineAnnealingLR
_partial_: true
T_max: 1400
eta_min: 1e-5
33 changes: 33 additions & 0 deletions egomimic/hydra_configs/train_zarr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
defaults:
- model: hpt_bc_flow_aria
- paths: default
- trainer: ddp
- debug: null
- logger: wandb
- data: test_multi_zarr
- callbacks: checkpoints
- override hydra/launcher: submitit
- _self_

name: test
description: test
ckpt_path: null
train: true
eval: false

eval_class:
_target_: egomimic.scripts.evaluation.Eve
mode: real
arm: both
eval_path: "./logs/eval/${name}_${now:%Y-%m-%d_%H-%M-%S}"

hydra:
run:
# Dir should be experiment_name/description_{timestamp}
dir: ./logs/${name}/${description}_${now:%Y-%m-%d_%H-%M-%S}
sweep:
dir: ./logs/${name}/${description}_${now:%Y-%m-%d_%H-%M-%S}

launch_params:
gpus_per_node: 1
nodes: 1
2 changes: 1 addition & 1 deletion egomimic/pl_utils/pl_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def val_dataloader(self):
iterables = dict()
for dataset_name, dataset in self.valid_datasets.items():
dataset_params = self.valid_dataloader_params.get(dataset_name, {})
iterables[dataset.embodiment] = DataLoader(
iterables[dataset_name] = DataLoader(
dataset,
shuffle=False,
collate_fn=self.collate_fn,
Expand Down
4 changes: 3 additions & 1 deletion egomimic/rldb/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ benchmark_files/
*.egg-info
*.parquet
*.pyc
*.hdf5
*.hdf5
*/lerobot
*/zarr
3 changes: 3 additions & 0 deletions egomimic/rldb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class EMBODIMENT(Enum):
MECKA_BIMANUAL = 9
MECKA_RIGHT_ARM = 10
MECKA_LEFT_ARM = 11
SCALE_BIMANUAL = 12
SCALE_RIGHT_ARM = 13
SCALE_LEFT_ARM = 14


SEED = 42
Expand Down
6 changes: 6 additions & 0 deletions egomimic/rldb/zarr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@
MultiDataset,
ZarrDataset,
ZarrEpisode,
LocalEpisodeResolver,
S3EpisodeResolver,
)
#from egomimic.rldb.zarr.zarr_writer import ZarrWriter

__all__ = [
"EpisodeResolver",
"MultiDataset",
"ZarrDataset",
"ZarrEpisode",
"ZarrWriter",
"LocalEpisodeResolver",
"S3EpisodeResolver",
]
15 changes: 10 additions & 5 deletions egomimic/rldb/zarr/action_chunk_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
_matrix_to_xyzypr,
_xyzwxyz_to_matrix,
)

from egomimic.utils.egomimicUtils import EXTRINSICS
# ---------------------------------------------------------------------------
# Base Transform
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -436,12 +436,16 @@ def build_eva_bimanual_transform_list(
obs_key: str = "observations.state.ee_pose",
chunk_length: int = 100,
stride: int = 1,
extrinsics_key: str = "x5Dec13_2",
is_quat: bool = True,
left_extra_batch_key: dict | None = None,
right_extra_batch_key: dict | None = None,
) -> list[Transform]:
"""Canonical EVA bimanual transform pipeline used by tests and notebooks."""
transform_list: list[Transform] = [
extrinsics = EXTRINSICS[extrinsics_key]
left_extrinsics_pose = _matrix_to_xyzwxyz(extrinsics["left"][None, :])[0]
right_extrinsics_pose = _matrix_to_xyzwxyz(extrinsics["right"][None, :])[0]
left_extra_batch_key = {"left_extrinsics_pose": left_extrinsics_pose}
right_extra_batch_key = {"right_extrinsics_pose": right_extrinsics_pose}
transform_list = [
ActionChunkCoordinateFrameTransform(
target_world=left_target_world,
chunk_world=left_cmd_world,
Expand Down Expand Up @@ -495,6 +499,7 @@ def build_eva_bimanual_transform_list(
stride=stride,
),
]

if is_quat:
transform_list.append(
XYZWXYZ_to_XYZYPR(
Expand Down Expand Up @@ -644,4 +649,4 @@ def build_aria_bimanual_transform_list(
DeleteKeys(keys_to_delete=keys_to_delete),
]
)
return transform_list
return transform_list
Loading