GaTech-RL2 · aidang3019 · Mar 4, 2026
diff --git a/egomimic/algo/hpt.py b/egomimic/algo/hpt.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from collections import OrderedDict
 from functools import partial
@@ -26,6 +27,8 @@
     reverse_kl_from_samples,
 )
 
+_log = logging.getLogger(__name__)
+
 
 class HPTModel(nn.Module):
     """
@@ -979,6 +982,21 @@ def process_batch_for_training(self, batch):
             processed_batch[embodiment_id] = self.data_schematic.normalize_data(
                 processed_batch[embodiment_id], embodiment_id
             )
+
+            norm_actions = processed_batch[embodiment_id][ac_key]
+            bad_batch_mask = norm_actions.abs().amax(dim=(-1, -2)) > 5
+            if bad_batch_mask.any():
+                demo_numbers = processed_batch[embodiment_id].get("demo_number")
+                indices = processed_batch[embodiment_id].get("_index")
+                for i in bad_batch_mask.nonzero(as_tuple=True)[0].tolist():
+                    ep = demo_numbers[i].item() if demo_numbers is not None else "?"
+                    idx = indices[i].item() if indices is not None else "?"
+                    max_val = norm_actions[i].abs().amax().item()
+                    _log.warning(
+                        f"Post-normalization action value out of range (max={max_val:.3f} > 5) "
+                        f"| episode={ep}, episode_index={idx}"
+                    )
+
             processed_batch[embodiment_id]["embodiment"] = torch.tensor(
                 [embodiment_id], device=self.device, dtype=torch.int64
             )

diff --git a/egomimic/hydra_configs/data/eva_human_cotrain.yaml b/egomimic/hydra_configs/data/eva_human_cotrain.yaml
@@ -10,20 +10,28 @@ train_datasets:
       transform_list:
         _target_: egomimic.rldb.embodiment.eva.Eva.get_transform_list
     filters:
-      episode_hash: "2025-12-26-18-07-46-296000"
-    mode: total
-  aria_bimanual:
+      task:
+        - "fold_clothes"
+      lab: "rl2"
+      robot_name: "eva_bimanual"
+      is_deleted: false
+    mode: train
+  scale_bimanual:
     _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
     resolver:
       _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
-      folder_path: /coc/flash7/scratch/egoverseDebugDatasets/aria
+      folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
       key_map:
-        _target_: egomimic.rldb.embodiment.human.Aria.get_keymap
+        _target_: egomimic.rldb.embodiment.human.Scale.get_keymap
       transform_list:
-        _target_: egomimic.rldb.embodiment.human.Aria.get_transform_list
+        _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
     filters:
-      episode_hash: "2025-09-20-17-47-54-000000"
-    mode: total
+      task:
+        - "[flagship] Folding Clothes"
+      lab: "scale"
+      robot_name: "scale_bimanual"
+      is_deleted: false
+    mode: train
 valid_datasets:
   eva_bimanual:
     _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
@@ -35,31 +43,40 @@ valid_datasets:
       transform_list:
         _target_: egomimic.rldb.embodiment.eva.Eva.get_transform_list
     filters:
-      episode_hash: "2025-12-26-18-07-46-296000"
-    mode: total
-  aria_bimanual:
+      task:
+        - "fold_clothes"
+      lab: "rl2"
+      robot_name: "eva_bimanual"
+      is_deleted: false
+    mode: valid
+  scale_bimanual:
     _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
     resolver:
       _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
-      folder_path: /coc/flash7/scratch/egoverseDebugDatasets/aria
+      folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
       key_map:
-        _target_: egomimic.rldb.embodiment.human.Aria.get_keymap
+        _target_: egomimic.rldb.embodiment.human.Scale.get_keymap
       transform_list:
-        _target_: egomimic.rldb.embodiment.human.Aria.get_transform_list
+        _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
     filters:
-      episode_hash: "2025-09-20-17-47-54-000000"
-    mode: total
+      task:
+        - "Folding Clothes"
+        - "[flagship] Folding Clothes"
+      lab: "scale"
+      robot_name: "scale_bimanual"
+      is_deleted: false
+    mode: valid
 train_dataloader_params:
   eva_bimanual:
-    batch_size: 32
+    batch_size: 64
     num_workers: 10
-  aria_bimanual:
-    batch_size: 32
+  scale_bimanual:
+    batch_size: 64
     num_workers: 10
 valid_dataloader_params:
   eva_bimanual:
-    batch_size: 32
+    batch_size: 64
     num_workers: 10
-  aria_bimanual:
-    batch_size: 32
+  scale_bimanual:
+    batch_size: 64
     num_workers: 10
diff --git a/egomimic/hydra_configs/data/scale.yaml b/egomimic/hydra_configs/data/scale.yaml
@@ -5,32 +5,42 @@ train_datasets:
     _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
     resolver:
       _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
-      folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale
+      folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
       key_map:
         _target_: egomimic.rldb.embodiment.human.Scale.get_keymap
       transform_list:
         _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
     filters:
-      episode_hash: "69199812208123403bbdb24f"
-    mode: total
+      task:
+        - "[flagship] Folding Clothes"
+        - "Folding Clothes"
+      lab: "scale"
+      robot_name: "scale_bimanual"
+      is_deleted: false
+    mode: train
 valid_datasets:
   scale_bimanual:
     _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
     resolver:
       _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
-      folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale
+      folder_path: /coc/flash7/scratch/egoverseS3ZarrDataset/scale
       key_map:
         _target_: egomimic.rldb.embodiment.human.Scale.get_keymap
       transform_list:
         _target_: egomimic.rldb.embodiment.human.Scale.get_transform_list
     filters:
-      episode_hash: "69199812208123403bbdb24f"
-    mode: total
+      task:
+        - "Folding Clothes"
+        - "[flagship] Folding Clothes"
+      lab: "scale"
+      robot_name: "scale_bimanual"
+      is_deleted: false
+    mode: valid
 train_dataloader_params:
   scale_bimanual:
-    batch_size: 32
+    batch_size: 64
     num_workers: 10
 valid_dataloader_params:
   scale_bimanual:
-    batch_size: 32
+    batch_size: 64
     num_workers: 10
diff --git a/egomimic/hydra_configs/hydra/launcher/submitit.yaml b/egomimic/hydra_configs/hydra/launcher/submitit.yaml
@@ -10,9 +10,9 @@ account: "rl2-lab"                              # Slurm account (e.g., 'my_accou
 cpus_per_task: 12                                   # Number of CPUs per task
 nodes: ${launch_params.nodes}                       # Number of nodes
 tasks_per_node: ${launch_params.gpus_per_node}      # Use variable for tasks per node
-gres: "gpu:a40:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count
+gres: "gpu:l40s:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count
 qos: "short"                                        # Slurm QoS
 timeout_min: 2880                                   # Timeout in minutes (48 hours)
-exclude: "protocol, puma"     # Nodes to exclude
+exclude: "protocol, puma, bishop"     # Nodes to exclude
 additional_parameters:
   requeue: true
diff --git a/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml b/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml
@@ -112,7 +112,7 @@ robomimic_model:
 optimizer:
   _target_: torch.optim.AdamW
   _partial_: true
-  lr: 3e-4
+  lr: 1e-5
   weight_decay: 0.0001
 
 scheduler:

diff --git a/egomimic/hydra_configs/model/hpt_bc_flow_scale_300M.yaml b/egomimic/hydra_configs/model/hpt_bc_flow_scale_300M.yaml
@@ -0,0 +1,116 @@
+_target_: egomimic.pl_utils.pl_model.ModelWrapper
+
+robomimic_model:
+  _target_: egomimic.algo.hpt.HPT
+  data_schematic: _${data.dataset.data_schematic}
+  camera_transforms:
+    scale_bimanual:
+      _target_: egomimic.utils.egomimicUtils.CameraTransforms
+      intrinsics_key: "scale" # change to base_half if using half res 
+      extrinsics_key: "scale"
+
+  diffusion: true 
+  6dof: true
+  ac_keys:
+    scale_bimanual: "actions_cartesian"
+  trunk:
+    embed_dim: 840 # changed from 256  #84
+    num_blocks: 24 # changed from 16
+    num_heads: 10 # changed from 8
+    token_postprocessing: "action_token"
+    observation_horizon: 1
+    action_horizon: 64
+    no_trunk: false
+    use_domain_embedding: true
+    drop_path: 0.1
+    weight_init_style: "pytorch"
+
+  multitask: false
+  pretrained: false
+  pretrained_checkpoint: "" # TODO
+  reverse_kl_samples: 8
+
+  domains: ["scale_bimanual"]
+  shared_obs_keys: ["front_img_1"]
+
+  shared_stem_specs:
+    front_img_1:
+      _target_: egomimic.models.hpt_nets.MLPPolicyStem
+      input_dim: 840 # changed from 512
+      output_dim: 840 #changed
+      widths: [840] # changed from 840
+      specs:
+        random_horizon_masking: false
+        cross_attn:
+          crossattn_latent: 18
+          crossattn_heads: 10
+          crossattn_dim_head: 140 # changed from 256
+          crossattn_modality_dropout: 0.1
+          modality_embed_dim: 840 # changed from 840
+
+  stem_specs:
+    scale_bimanual:
+      state_ee_pose:
+        _target_: egomimic.models.hpt_nets.MLPPolicyStem
+        input_dim: 12
+        output_dim: 840 # changed from 840
+        widths: [840] # changed from 840
+        specs:
+          random_horizon_masking: false
+          cross_attn:
+            crossattn_latent: 18
+            crossattn_heads: 10
+            crossattn_dim_head: 140 # changed from 256 changed from 1024
+            crossattn_modality_dropout: 0.1
+            modality_embed_dim: 840 # changed from 840 changed from 1536
+  head_specs:
+    scale_bimanual: 
+      _target_: egomimic.models.fm_policy.FMPolicy
+      action_horizon: 100
+      num_inference_steps: 50
+      pooling: null
+      time_dist: "beta"
+      infer_ac_dims:
+        scale_bimanual: 12
+      model:
+        _target_: egomimic.models.denoising_nets.CrossTransformer
+        nblocks: 6
+        cond_dim: 840 # changed from 256 changed from 1536
+        hidden_dim: 320 #changed from 128
+        act_dim: 12
+        act_seq: 100
+        n_heads: 5 # changed from 4 changed from 16
+        dropout: 0.1
+        mlp_layers: 5 # edit num of mlp layers
+        mlp_ratio: 5
+  encoder_specs:
+    front_img_1:
+      _target_: egomimic.models.hpt_nets.ResNet
+      output_dim: 840 # changed from 512 changed from 1536
+  train_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.ColorJitter
+        brightness: 0.1
+        contrast: 0.1
+        saturation: 0.1
+        hue: 0.05
+      - _target_: torchvision.transforms.Normalize
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+  eval_image_augs:
+    _target_: torchvision.transforms.Compose
+    transforms:
+      - _target_: torchvision.transforms.Normalize
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+optimizer:
+  _target_: torch.optim.AdamW
+  _partial_: true
+  lr: 5e-5
+  weight_decay: 0.0001
+scheduler:
+  _target_: torch.optim.lr_scheduler.CosineAnnealingLR
+  _partial_: true
+  T_max: 5500
+  eta_min: 1e-5
diff --git a/egomimic/hydra_configs/model/hpt_cotrain_scale_flow_shared_head.yaml b/egomimic/hydra_configs/model/hpt_cotrain_scale_flow_shared_head.yaml
@@ -59,7 +59,7 @@ robomimic_model:
 optimizer:
   _target_: torch.optim.AdamW
   _partial_: true
-  lr: 1e-4
+  lr: 5e-5
   weight_decay: 0.0001
 scheduler:
   _target_: torch.optim.lr_scheduler.CosineAnnealingLR

diff --git a/egomimic/hydra_configs/train_zarr.yaml b/egomimic/hydra_configs/train_zarr.yaml
@@ -1,10 +1,10 @@
 defaults:
-  - model: hpt_bc_flow_eva
+  - model: hpt_bc_flow_scale
   - paths: default
   - trainer: ddp
   - debug: null
   - logger: wandb
-  - data: eva
+  - data: scale
   - callbacks: checkpoints
   - override hydra/launcher: submitit
   - _self_

diff --git a/egomimic/rldb/zarr/action_chunk_transforms.py b/egomimic/rldb/zarr/action_chunk_transforms.py
@@ -15,9 +15,9 @@
 from abc import abstractmethod
 
 import numpy as np
+import torch
 from projectaria_tools.core.sophus import SE3
 from scipy.spatial.transform import Rotation as R
-import torch
 
 from egomimic.utils.pose_utils import (
     _interpolate_euler,
@@ -224,7 +224,12 @@ def transform(self, batch: dict) -> dict:
                 f"'{self.pose_key}'"
             )
         xyz = pose[:3]
-        ypr = R.from_quat(pose[3:7]).as_euler("ZYX", degrees=False)
+        quat = (
+            pose[3:7]
+            if np.linalg.norm(pose[3:7]) > 0
+            else np.array([0.0, 0.0, 0.0, 1.0])
+        )
+        ypr = R.from_quat(quat).as_euler("ZYX", degrees=False)
         batch[self.output_key] = np.concatenate([xyz, ypr], axis=0)
         return batch
 
@@ -414,10 +419,12 @@ def transform(self, batch):
 
         return batch
 
+
 # ---------------------------------------------------------------------------
 # Type Transforms
 # ---------------------------------------------------------------------------
 
+
 class NumpyToTensor(Transform):
     def __init__(self, keys: list[str]):
         self.keys = keys
@@ -429,5 +436,7 @@ def transform(self, batch: dict) -> dict:
             elif isinstance(batch[key], torch.Tensor):
                 batch[key] = batch[key].clone()
             else:
-                raise ValueError(f"NumpyToTensor expects key '{key}' to be a numpy array or torch tensor, got {type(batch[key])}")
+                raise ValueError(
+                    f"NumpyToTensor expects key '{key}' to be a numpy array or torch tensor, got {type(batch[key])}"
+                )
         return batch