diff --git a/egomimic/algo/hpt.py b/egomimic/algo/hpt.py index 5de7e0b7..4189ed81 100644 --- a/egomimic/algo/hpt.py +++ b/egomimic/algo/hpt.py @@ -1,3 +1,4 @@ +import logging import os from collections import OrderedDict from functools import partial @@ -26,6 +27,8 @@ reverse_kl_from_samples, ) +_log = logging.getLogger(__name__) + class HPTModel(nn.Module): """ @@ -979,6 +982,21 @@ def process_batch_for_training(self, batch): processed_batch[embodiment_id] = self.data_schematic.normalize_data( processed_batch[embodiment_id], embodiment_id ) + + norm_actions = processed_batch[embodiment_id][ac_key] + bad_batch_mask = norm_actions.abs().amax(dim=(-1, -2)) > 5 + if bad_batch_mask.any(): + demo_numbers = processed_batch[embodiment_id].get("demo_number") + indices = processed_batch[embodiment_id].get("_index") + for i in bad_batch_mask.nonzero(as_tuple=True)[0].tolist(): + ep = demo_numbers[i].item() if demo_numbers is not None else "?" + idx = indices[i].item() if indices is not None else "?" + max_val = norm_actions[i].abs().amax().item() + _log.warning( + f"Post-normalization action value out of range (max={max_val:.3f} > 5) " + f"| episode={ep}, episode_index={idx}" + ) + processed_batch[embodiment_id]["embodiment"] = torch.tensor( [embodiment_id], device=self.device, dtype=torch.int64 ) diff --git a/egomimic/hydra_configs/data/eva_human_cotrain.yaml b/egomimic/hydra_configs/data/eva_human_cotrain.yaml index cabf760d..659788f3 100644 --- a/egomimic/hydra_configs/data/eva_human_cotrain.yaml +++ b/egomimic/hydra_configs/data/eva_human_cotrain.yaml @@ -10,20 +10,28 @@ train_datasets: transform_list: _target_: egomimic.rldb.embodiment.eva.Eva.get_transform_list filters: - episode_hash: "2025-12-26-18-07-46-296000" - mode: total - aria_bimanual: + task: + - "fold_clothes" + lab: "rl2" + robot_name: "eva_bimanual" + is_deleted: false + mode: train + scale_bimanual: _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver resolver: _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver - folder_path: /coc/flash7/scratch/egoverseDebugDatasets/aria + folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale key_map: - _target_: egomimic.rldb.embodiment.human.Aria.get_keymap + _target_: egomimic.rldb.embodiment.human.Scale.get_keymap transform_list: - _target_: egomimic.rldb.embodiment.human.Aria.get_transform_list + _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list filters: - episode_hash: "2025-09-20-17-47-54-000000" - mode: total + task: + - "[flagship] Folding Clothes" + lab: "scale" + robot_name: "scale_bimanual" + is_deleted: false + mode: train valid_datasets: eva_bimanual: _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver @@ -35,31 +43,40 @@ valid_datasets: transform_list: _target_: egomimic.rldb.embodiment.eva.Eva.get_transform_list filters: - episode_hash: "2025-12-26-18-07-46-296000" - mode: total - aria_bimanual: + task: + - "fold_clothes" + lab: "rl2" + robot_name: "eva_bimanual" + is_deleted: false + mode: valid + scale_bimanual: _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver resolver: _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver - folder_path: /coc/flash7/scratch/egoverseDebugDatasets/aria + folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale key_map: - _target_: egomimic.rldb.embodiment.human.Aria.get_keymap + _target_: egomimic.rldb.embodiment.human.Scale.get_keymap transform_list: - _target_: egomimic.rldb.embodiment.human.Aria.get_transform_list + _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list filters: - episode_hash: "2025-09-20-17-47-54-000000" - mode: total + task: + - "Folding Clothes" + - "[flagship] Folding Clothes" + lab: "scale" + robot_name: "scale_bimanual" + is_deleted: false + mode: valid train_dataloader_params: eva_bimanual: - batch_size: 32 + batch_size: 64 num_workers: 10 - aria_bimanual: - batch_size: 32 + scale_bimanual: + batch_size: 64 num_workers: 10 valid_dataloader_params: eva_bimanual: - batch_size: 32 + batch_size: 64 num_workers: 10 - aria_bimanual: - batch_size: 32 + scale_bimanual: + batch_size: 64 num_workers: 10 \ No newline at end of file diff --git a/egomimic/hydra_configs/data/scale.yaml b/egomimic/hydra_configs/data/scale.yaml index 44022e7b..22274c16 100644 --- a/egomimic/hydra_configs/data/scale.yaml +++ b/egomimic/hydra_configs/data/scale.yaml @@ -5,32 +5,42 @@ train_datasets: _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver resolver: _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver - folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale + folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale key_map: _target_: egomimic.rldb.embodiment.human.Scale.get_keymap transform_list: _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list filters: - episode_hash: "69199812208123403bbdb24f" - mode: total + task: + - "[flagship] Folding Clothes" + - "Folding Clothes" + lab: "scale" + robot_name: "scale_bimanual" + is_deleted: false + mode: train valid_datasets: scale_bimanual: _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver resolver: _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver - folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale + folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale key_map: _target_: egomimic.rldb.embodiment.human.Scale.get_keymap transform_list: _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list filters: - episode_hash: "69199812208123403bbdb24f" - mode: total + task: + - "Folding Clothes" + - "[flagship] Folding Clothes" + lab: "scale" + robot_name: "scale_bimanual" + is_deleted: false + mode: valid train_dataloader_params: scale_bimanual: - batch_size: 32 + batch_size: 64 num_workers: 10 valid_dataloader_params: scale_bimanual: - batch_size: 32 + batch_size: 64 num_workers: 10 diff --git a/egomimic/hydra_configs/hydra/launcher/submitit.yaml b/egomimic/hydra_configs/hydra/launcher/submitit.yaml index c56f2cd5..237c925f 100644 --- a/egomimic/hydra_configs/hydra/launcher/submitit.yaml +++ b/egomimic/hydra_configs/hydra/launcher/submitit.yaml @@ -10,9 +10,9 @@ account: "rl2-lab" # Slurm account (e.g., 'my_accou cpus_per_task: 12 # Number of CPUs per task nodes: ${launch_params.nodes} # Number of nodes tasks_per_node: ${launch_params.gpus_per_node} # Use variable for tasks per node -gres: "gpu:a40:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count +gres: "gpu:l40s:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count qos: "short" # Slurm QoS timeout_min: 2880 # Timeout in minutes (48 hours) -exclude: "protocol, puma" # Nodes to exclude +exclude: "protocol, puma, bishop" # Nodes to exclude additional_parameters: requeue: true \ No newline at end of file diff --git a/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml b/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml index d4d84f76..cb8b9576 100644 --- a/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml +++ b/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml @@ -112,7 +112,7 @@ robomimic_model: optimizer: _target_: torch.optim.AdamW _partial_: true - lr: 3e-4 + lr: 1e-5 weight_decay: 0.0001 scheduler: diff --git a/egomimic/hydra_configs/model/hpt_bc_flow_scale_300M.yaml b/egomimic/hydra_configs/model/hpt_bc_flow_scale_300M.yaml new file mode 100644 index 00000000..049a254f --- /dev/null +++ b/egomimic/hydra_configs/model/hpt_bc_flow_scale_300M.yaml @@ -0,0 +1,116 @@ +_target_: egomimic.pl_utils.pl_model.ModelWrapper + +robomimic_model: + _target_: egomimic.algo.hpt.HPT + data_schematic: _${data.dataset.data_schematic} + camera_transforms: + scale_bimanual: + _target_: egomimic.utils.egomimicUtils.CameraTransforms + intrinsics_key: "scale" # change to base_half if using half res + extrinsics_key: "scale" + + diffusion: true + 6dof: true + ac_keys: + scale_bimanual: "actions_cartesian" + trunk: + embed_dim: 840 # changed from 256 #84 + num_blocks: 24 # changed from 16 + num_heads: 10 # changed from 8 + token_postprocessing: "action_token" + observation_horizon: 1 + action_horizon: 64 + no_trunk: false + use_domain_embedding: true + drop_path: 0.1 + weight_init_style: "pytorch" + + multitask: false + pretrained: false + pretrained_checkpoint: "" # TODO + reverse_kl_samples: 8 + + domains: ["scale_bimanual"] + shared_obs_keys: ["front_img_1"] + + shared_stem_specs: + front_img_1: + _target_: egomimic.models.hpt_nets.MLPPolicyStem + input_dim: 840 # changed from 512 + output_dim: 840 #changed + widths: [840] # changed from 840 + specs: + random_horizon_masking: false + cross_attn: + crossattn_latent: 18 + crossattn_heads: 10 + crossattn_dim_head: 140 # changed from 256 + crossattn_modality_dropout: 0.1 + modality_embed_dim: 840 # changed from 840 + + stem_specs: + scale_bimanual: + state_ee_pose: + _target_: egomimic.models.hpt_nets.MLPPolicyStem + input_dim: 12 + output_dim: 840 # changed from 840 + widths: [840] # changed from 840 + specs: + random_horizon_masking: false + cross_attn: + crossattn_latent: 18 + crossattn_heads: 10 + crossattn_dim_head: 140 # changed from 256 changed from 1024 + crossattn_modality_dropout: 0.1 + modality_embed_dim: 840 # changed from 840 changed from 1536 + head_specs: + scale_bimanual: + _target_: egomimic.models.fm_policy.FMPolicy + action_horizon: 100 + num_inference_steps: 50 + pooling: null + time_dist: "beta" + infer_ac_dims: + scale_bimanual: 12 + model: + _target_: egomimic.models.denoising_nets.CrossTransformer + nblocks: 6 + cond_dim: 840 # changed from 256 changed from 1536 + hidden_dim: 320 #changed from 128 + act_dim: 12 + act_seq: 100 + n_heads: 5 # changed from 4 changed from 16 + dropout: 0.1 + mlp_layers: 5 # edit num of mlp layers + mlp_ratio: 5 + encoder_specs: + front_img_1: + _target_: egomimic.models.hpt_nets.ResNet + output_dim: 840 # changed from 512 changed from 1536 + train_image_augs: + _target_: torchvision.transforms.Compose + transforms: + - _target_: torchvision.transforms.ColorJitter + brightness: 0.1 + contrast: 0.1 + saturation: 0.1 + hue: 0.05 + - _target_: torchvision.transforms.Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + eval_image_augs: + _target_: torchvision.transforms.Compose + transforms: + - _target_: torchvision.transforms.Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] +optimizer: + _target_: torch.optim.AdamW + _partial_: true + lr: 5e-5 + weight_decay: 0.0001 +scheduler: + _target_: torch.optim.lr_scheduler.CosineAnnealingLR + _partial_: true + T_max: 5500 + eta_min: 1e-5 \ No newline at end of file diff --git a/egomimic/hydra_configs/model/hpt_cotrain_scale_flow_shared_head.yaml b/egomimic/hydra_configs/model/hpt_cotrain_scale_flow_shared_head.yaml index f68305e6..c248a266 100644 --- a/egomimic/hydra_configs/model/hpt_cotrain_scale_flow_shared_head.yaml +++ b/egomimic/hydra_configs/model/hpt_cotrain_scale_flow_shared_head.yaml @@ -59,7 +59,7 @@ robomimic_model: optimizer: _target_: torch.optim.AdamW _partial_: true - lr: 1e-4 + lr: 5e-5 weight_decay: 0.0001 scheduler: _target_: torch.optim.lr_scheduler.CosineAnnealingLR diff --git a/egomimic/hydra_configs/train_zarr.yaml b/egomimic/hydra_configs/train_zarr.yaml index 0cc53a23..7b8d9dde 100644 --- a/egomimic/hydra_configs/train_zarr.yaml +++ b/egomimic/hydra_configs/train_zarr.yaml @@ -1,10 +1,10 @@ defaults: - - model: hpt_bc_flow_eva + - model: hpt_bc_flow_scale - paths: default - trainer: ddp - debug: null - logger: wandb - - data: eva + - data: scale - callbacks: checkpoints - override hydra/launcher: submitit - _self_ diff --git a/egomimic/rldb/zarr/action_chunk_transforms.py b/egomimic/rldb/zarr/action_chunk_transforms.py index ea0ce6ad..3c0d3a9d 100644 --- a/egomimic/rldb/zarr/action_chunk_transforms.py +++ b/egomimic/rldb/zarr/action_chunk_transforms.py @@ -15,9 +15,9 @@ from abc import abstractmethod import numpy as np +import torch from projectaria_tools.core.sophus import SE3 from scipy.spatial.transform import Rotation as R -import torch from egomimic.utils.pose_utils import ( _interpolate_euler, @@ -224,7 +224,12 @@ def transform(self, batch: dict) -> dict: f"'{self.pose_key}'" ) xyz = pose[:3] - ypr = R.from_quat(pose[3:7]).as_euler("ZYX", degrees=False) + quat = ( + pose[3:7] + if np.linalg.norm(pose[3:7]) > 0 + else np.array([0.0, 0.0, 0.0, 1.0]) + ) + ypr = R.from_quat(quat).as_euler("ZYX", degrees=False) batch[self.output_key] = np.concatenate([xyz, ypr], axis=0) return batch @@ -414,10 +419,12 @@ def transform(self, batch): return batch + # --------------------------------------------------------------------------- # Type Transforms # --------------------------------------------------------------------------- + class NumpyToTensor(Transform): def __init__(self, keys: list[str]): self.keys = keys @@ -429,5 +436,7 @@ def transform(self, batch: dict) -> dict: elif isinstance(batch[key], torch.Tensor): batch[key] = batch[key].clone() else: - raise ValueError(f"NumpyToTensor expects key '{key}' to be a numpy array or torch tensor, got {type(batch[key])}") + raise ValueError( + f"NumpyToTensor expects key '{key}' to be a numpy array or torch tensor, got {type(batch[key])}" + ) return batch diff --git a/egomimic/rldb/zarr/zarr_dataset_multi.py b/egomimic/rldb/zarr/zarr_dataset_multi.py index bb7b08b7..09790cd9 100644 --- a/egomimic/rldb/zarr/zarr_dataset_multi.py +++ b/egomimic/rldb/zarr/zarr_dataset_multi.py @@ -740,15 +740,13 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor]: try: data = transform.transform(data) except Exception as e: - logger.error(f"Error transforming data: {e}") - logger.error(f"Data: {data}") - logger.error(f"Transform: {transform}") - logger.error(f"Error: {e}") - if idx == 0: - logger.error("Error in first frame") - raise e - else: - return self.__getitem__(0) + fallback = idx - 10 if idx > 0 else 1 + logger.warning( + f"Transform failed for episode {Path(self.episode_path).name} " + f"frame {idx} ({type(e).__name__}: {e}). " + f"Falling back to frame {fallback}." + ) + return self.__getitem__(fallback) for k, v in data.items(): if isinstance(v, np.ndarray): @@ -801,16 +799,13 @@ def get_item_keys(self, idx: int, keys) -> dict[str, torch.Tensor]: try: out = transform.transform(out) except Exception as e: - logger.error(f"Error transforming data: {e}") - # NOTE: avoid dumping full arrays into logs - logger.error(f"Data keys: {list(out.keys())}") - logger.error(f"Transform: {transform}") - logger.error(f"Error: {e}") - if idx == 0: - logger.error("Error in first frame") - raise e - else: - return self.get_item_keys(0, keys) + fallback = idx - 10 if idx > 0 else 1 + logger.warning( + f"Transform failed for episode {Path(self.episode_path).name} " + f"frame {idx} ({type(e).__name__}: {e}). " + f"Falling back to frame {fallback}." + ) + return self.get_item_keys(fallback, keys) for k, v in out.items(): if isinstance(v, np.ndarray): diff --git a/egomimic/utils/pose_utils.py b/egomimic/utils/pose_utils.py index 5edf9ffe..6ab5cd3d 100644 --- a/egomimic/utils/pose_utils.py +++ b/egomimic/utils/pose_utils.py @@ -57,8 +57,10 @@ def _interpolate_quat_wxyz(seq: np.ndarray, chunk_length: int) -> np.ndarray: quat_xyzw = quat_wxyz[:, [1, 2, 3, 0]] norms = np.linalg.norm(quat_xyzw, axis=1, keepdims=True) - if np.any(norms <= 0): - raise ValueError("Found zero-norm quaternion in input sequence.") + zero_mask = (norms < 1e-6).squeeze(1) + if zero_mask.any(): + quat_xyzw[zero_mask] = [0.0, 0.0, 0.0, 1.0] # identity quaternion (xyzw) + norms[zero_mask] = 1.0 quat_xyzw = quat_xyzw / norms # Enforce sign continuity to avoid long-path interpolation. @@ -133,7 +135,10 @@ def _xyzwxyz_to_matrix(xyzwxyz: np.ndarray) -> np.ndarray: dtype = xyzwxyz.dtype if np.issubdtype(xyzwxyz.dtype, np.floating) else np.float64 mats = np.broadcast_to(np.eye(4, dtype=dtype), (B, 4, 4)).copy() - quat_xyzw = xyzwxyz[:, [4, 5, 6, 3]] + quat_xyzw = xyzwxyz[:, [4, 5, 6, 3]].copy() + zero_mask = np.linalg.norm(quat_xyzw, axis=1) < 1e-6 + if zero_mask.any(): + quat_xyzw[zero_mask] = [0.0, 0.0, 0.0, 1.0] # identity (xyzw) mats[:, :3, :3] = R.from_quat(quat_xyzw).as_matrix() mats[:, :3, 3] = xyzwxyz[:, :3]