Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions egomimic/algo/hpt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
from collections import OrderedDict
from functools import partial
Expand Down Expand Up @@ -26,6 +27,8 @@
reverse_kl_from_samples,
)

_log = logging.getLogger(__name__)


class HPTModel(nn.Module):
"""
Expand Down Expand Up @@ -979,6 +982,21 @@ def process_batch_for_training(self, batch):
processed_batch[embodiment_id] = self.data_schematic.normalize_data(
processed_batch[embodiment_id], embodiment_id
)

norm_actions = processed_batch[embodiment_id][ac_key]
bad_batch_mask = norm_actions.abs().amax(dim=(-1, -2)) > 5
if bad_batch_mask.any():
demo_numbers = processed_batch[embodiment_id].get("demo_number")
indices = processed_batch[embodiment_id].get("_index")
for i in bad_batch_mask.nonzero(as_tuple=True)[0].tolist():
ep = demo_numbers[i].item() if demo_numbers is not None else "?"
idx = indices[i].item() if indices is not None else "?"
max_val = norm_actions[i].abs().amax().item()
_log.warning(
f"Post-normalization action value out of range (max={max_val:.3f} > 5) "
f"| episode={ep}, episode_index={idx}"
)

processed_batch[embodiment_id]["embodiment"] = torch.tensor(
[embodiment_id], device=self.device, dtype=torch.int64
)
Expand Down
61 changes: 39 additions & 22 deletions egomimic/hydra_configs/data/eva_human_cotrain.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,28 @@ train_datasets:
transform_list:
_target_: egomimic.rldb.embodiment.eva.Eva.get_transform_list
filters:
episode_hash: "2025-12-26-18-07-46-296000"
mode: total
aria_bimanual:
task:
- "fold_clothes"
lab: "rl2"
robot_name: "eva_bimanual"
is_deleted: false
mode: train
scale_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: /coc/flash7/scratch/egoverseDebugDatasets/aria
folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
key_map:
_target_: egomimic.rldb.embodiment.human.Aria.get_keymap
_target_: egomimic.rldb.embodiment.human.Scale.get_keymap
transform_list:
_target_: egomimic.rldb.embodiment.human.Aria.get_transform_list
_target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
filters:
episode_hash: "2025-09-20-17-47-54-000000"
mode: total
task:
- "[flagship] Folding Clothes"
lab: "scale"
robot_name: "scale_bimanual"
is_deleted: false
mode: train
valid_datasets:
eva_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
Expand All @@ -35,31 +43,40 @@ valid_datasets:
transform_list:
_target_: egomimic.rldb.embodiment.eva.Eva.get_transform_list
filters:
episode_hash: "2025-12-26-18-07-46-296000"
mode: total
aria_bimanual:
task:
- "fold_clothes"
lab: "rl2"
robot_name: "eva_bimanual"
is_deleted: false
mode: valid
scale_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: /coc/flash7/scratch/egoverseDebugDatasets/aria
folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
key_map:
_target_: egomimic.rldb.embodiment.human.Aria.get_keymap
_target_: egomimic.rldb.embodiment.human.Scale.get_keymap
transform_list:
_target_: egomimic.rldb.embodiment.human.Aria.get_transform_list
_target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
filters:
episode_hash: "2025-09-20-17-47-54-000000"
mode: total
task:
- "Folding Clothes"
- "[flagship] Folding Clothes"
lab: "scale"
robot_name: "scale_bimanual"
is_deleted: false
mode: valid
train_dataloader_params:
eva_bimanual:
batch_size: 32
batch_size: 64
num_workers: 10
aria_bimanual:
batch_size: 32
scale_bimanual:
batch_size: 64
num_workers: 10
valid_dataloader_params:
eva_bimanual:
batch_size: 32
batch_size: 64
num_workers: 10
aria_bimanual:
batch_size: 32
scale_bimanual:
batch_size: 64
num_workers: 10
26 changes: 18 additions & 8 deletions egomimic/hydra_configs/data/scale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,42 @@ train_datasets:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale
folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
key_map:
_target_: egomimic.rldb.embodiment.human.Scale.get_keymap
transform_list:
_target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
filters:
episode_hash: "69199812208123403bbdb24f"
mode: total
task:
- "[flagship] Folding Clothes"
- "Folding Clothes"
lab: "scale"
robot_name: "scale_bimanual"
is_deleted: false
mode: train
valid_datasets:
scale_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale
folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
key_map:
_target_: egomimic.rldb.embodiment.human.Scale.get_keymap
transform_list:
_target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
filters:
episode_hash: "69199812208123403bbdb24f"
mode: total
task:
- "Folding Clothes"
- "[flagship] Folding Clothes"
lab: "scale"
robot_name: "scale_bimanual"
is_deleted: false
mode: valid
train_dataloader_params:
scale_bimanual:
batch_size: 32
batch_size: 64
num_workers: 10
valid_dataloader_params:
scale_bimanual:
batch_size: 32
batch_size: 64
num_workers: 10
4 changes: 2 additions & 2 deletions egomimic/hydra_configs/hydra/launcher/submitit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ account: "rl2-lab" # Slurm account (e.g., 'my_accou
cpus_per_task: 12 # Number of CPUs per task
nodes: ${launch_params.nodes} # Number of nodes
tasks_per_node: ${launch_params.gpus_per_node} # Use variable for tasks per node
gres: "gpu:a40:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count
gres: "gpu:l40s:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count
qos: "short" # Slurm QoS
timeout_min: 2880 # Timeout in minutes (48 hours)
exclude: "protocol, puma" # Nodes to exclude
exclude: "protocol, puma, bishop" # Nodes to exclude
additional_parameters:
requeue: true
2 changes: 1 addition & 1 deletion egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ robomimic_model:
optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 3e-4
lr: 1e-5
weight_decay: 0.0001

scheduler:
Expand Down
116 changes: 116 additions & 0 deletions egomimic/hydra_configs/model/hpt_bc_flow_scale_300M.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
_target_: egomimic.pl_utils.pl_model.ModelWrapper

robomimic_model:
_target_: egomimic.algo.hpt.HPT
data_schematic: _${data.dataset.data_schematic}
camera_transforms:
scale_bimanual:
_target_: egomimic.utils.egomimicUtils.CameraTransforms
intrinsics_key: "scale" # change to base_half if using half res
extrinsics_key: "scale"

diffusion: true
6dof: true
ac_keys:
scale_bimanual: "actions_cartesian"
trunk:
embed_dim: 840 # changed from 256 #84
num_blocks: 24 # changed from 16
num_heads: 10 # changed from 8
token_postprocessing: "action_token"
observation_horizon: 1
action_horizon: 64
no_trunk: false
use_domain_embedding: true
drop_path: 0.1
weight_init_style: "pytorch"

multitask: false
pretrained: false
pretrained_checkpoint: "" # TODO
reverse_kl_samples: 8

domains: ["scale_bimanual"]
shared_obs_keys: ["front_img_1"]

shared_stem_specs:
front_img_1:
_target_: egomimic.models.hpt_nets.MLPPolicyStem
input_dim: 840 # changed from 512
output_dim: 840 #changed
widths: [840] # changed from 840
specs:
random_horizon_masking: false
cross_attn:
crossattn_latent: 18
crossattn_heads: 10
crossattn_dim_head: 140 # changed from 256
crossattn_modality_dropout: 0.1
modality_embed_dim: 840 # changed from 840

stem_specs:
scale_bimanual:
state_ee_pose:
_target_: egomimic.models.hpt_nets.MLPPolicyStem
input_dim: 12
output_dim: 840 # changed from 840
widths: [840] # changed from 840
specs:
random_horizon_masking: false
cross_attn:
crossattn_latent: 18
crossattn_heads: 10
crossattn_dim_head: 140 # changed from 256 changed from 1024
crossattn_modality_dropout: 0.1
modality_embed_dim: 840 # changed from 840 changed from 1536
head_specs:
scale_bimanual:
_target_: egomimic.models.fm_policy.FMPolicy
action_horizon: 100
num_inference_steps: 50
pooling: null
time_dist: "beta"
infer_ac_dims:
scale_bimanual: 12
model:
_target_: egomimic.models.denoising_nets.CrossTransformer
nblocks: 6
cond_dim: 840 # changed from 256 changed from 1536
hidden_dim: 320 #changed from 128
act_dim: 12
act_seq: 100
n_heads: 5 # changed from 4 changed from 16
dropout: 0.1
mlp_layers: 5 # edit num of mlp layers
mlp_ratio: 5
encoder_specs:
front_img_1:
_target_: egomimic.models.hpt_nets.ResNet
output_dim: 840 # changed from 512 changed from 1536
train_image_augs:
_target_: torchvision.transforms.Compose
transforms:
- _target_: torchvision.transforms.ColorJitter
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.05
- _target_: torchvision.transforms.Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
eval_image_augs:
_target_: torchvision.transforms.Compose
transforms:
- _target_: torchvision.transforms.Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 5e-5
weight_decay: 0.0001
scheduler:
_target_: torch.optim.lr_scheduler.CosineAnnealingLR
_partial_: true
T_max: 5500
eta_min: 1e-5
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ robomimic_model:
optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 1e-4
lr: 5e-5
weight_decay: 0.0001
scheduler:
_target_: torch.optim.lr_scheduler.CosineAnnealingLR
Expand Down
4 changes: 2 additions & 2 deletions egomimic/hydra_configs/train_zarr.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
defaults:
- model: hpt_bc_flow_eva
- model: hpt_bc_flow_scale
- paths: default
- trainer: ddp
- debug: null
- logger: wandb
- data: eva
- data: scale
- callbacks: checkpoints
- override hydra/launcher: submitit
- _self_
Expand Down
15 changes: 12 additions & 3 deletions egomimic/rldb/zarr/action_chunk_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
from abc import abstractmethod

import numpy as np
import torch
from projectaria_tools.core.sophus import SE3
from scipy.spatial.transform import Rotation as R
import torch

from egomimic.utils.pose_utils import (
_interpolate_euler,
Expand Down Expand Up @@ -224,7 +224,12 @@ def transform(self, batch: dict) -> dict:
f"'{self.pose_key}'"
)
xyz = pose[:3]
ypr = R.from_quat(pose[3:7]).as_euler("ZYX", degrees=False)
quat = (
pose[3:7]
if np.linalg.norm(pose[3:7]) > 0
else np.array([0.0, 0.0, 0.0, 1.0])
)
ypr = R.from_quat(quat).as_euler("ZYX", degrees=False)
batch[self.output_key] = np.concatenate([xyz, ypr], axis=0)
return batch

Expand Down Expand Up @@ -414,10 +419,12 @@ def transform(self, batch):

return batch


# ---------------------------------------------------------------------------
# Type Transforms
# ---------------------------------------------------------------------------


class NumpyToTensor(Transform):
def __init__(self, keys: list[str]):
self.keys = keys
Expand All @@ -429,5 +436,7 @@ def transform(self, batch: dict) -> dict:
elif isinstance(batch[key], torch.Tensor):
batch[key] = batch[key].clone()
else:
raise ValueError(f"NumpyToTensor expects key '{key}' to be a numpy array or torch tensor, got {type(batch[key])}")
raise ValueError(
f"NumpyToTensor expects key '{key}' to be a numpy array or torch tensor, got {type(batch[key])}"
)
return batch
Loading