diff --git a/egomimic/hydra_configs/data/scale_zarr_test.yaml b/egomimic/hydra_configs/data/scale_zarr_test.yaml
new file mode 100644
index 00000000..80784f19
--- /dev/null
+++ b/egomimic/hydra_configs/data/scale_zarr_test.yaml
@@ -0,0 +1,71 @@
+_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper
+train_datasets:
+  scale_bimanual:
+    _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
+    resolver:
+      _target_: egomimic.rldb.zarr.zarr_dataset_multi.LocalEpisodeResolver
+      folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale/2026-02-24-01-49-24-166324
+      key_map:
+        observations.images.front_img_1: #batch key
+          key_type: camera_keys # key type
+          zarr_key: images.front_1
+        right.action_ee_pose:
+          key_type: action_keys
+          zarr_key: right.obs_ee_pose
+          horizon: 30
+        left.action_ee_pose:
+          key_type: action_keys
+          zarr_key: left.obs_ee_pose
+          horizon: 30
+        right.obs_ee_pose:
+          key_type: proprio_keys
+          zarr_key: right.obs_ee_pose
+        left.obs_ee_pose:
+          key_type: proprio_keys
+          zarr_key: left.obs_ee_pose
+        obs_head_pose:
+          zarr_key: obs_head_pose
+          key_type: proprio_keys
+      transform_list:
+        _target_: egomimic.rldb.zarr.action_chunk_transforms.build_aria_bimanual_transform_list
+        stride: 1
+    mode: train
+valid_datasets:
+  scale_bimanual:
+    _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
+    resolver:
+      _target_: egomimic.rldb.zarr.zarr_dataset_multi.LocalEpisodeResolver
+      folder_path: /coc/flash7/scratch/egoverseDebugDatasets/scale/2026-02-24-01-49-24-166324
+      key_map:
+        observations.images.front_img_1: #batch key
+          key_type: camera_keys # key type
+          zarr_key: images.front_1
+        right.action_ee_pose:
+          key_type: action_keys
+          zarr_key: right.obs_ee_pose
+          horizon: 30
+        left.action_ee_pose:
+          key_type: action_keys
+          zarr_key: left.obs_ee_pose
+          horizon: 30
+        right.obs_ee_pose:
+          key_type: proprio_keys
+          zarr_key: right.obs_ee_pose
+        left.obs_ee_pose:
+          key_type: proprio_keys
+          zarr_key: left.obs_ee_pose
+        obs_head_pose:
+          zarr_key: obs_head_pose
+          key_type: proprio_keys
+      transform_list:
+        _target_: egomimic.rldb.zarr.action_chunk_transforms.build_aria_bimanual_transform_list
+        stride: 1
+    mode: valid
+train_dataloader_params:
+  scale_bimanual:
+    batch_size: 32
+    num_workers: 10
+valid_dataloader_params:
+  scale_bimanual:
+    batch_size: 32
+    num_workers: 10
diff --git a/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml b/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml
index 2c9d66ef..d4d84f76 100644
--- a/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml
+++ b/egomimic/hydra_configs/model/hpt_bc_flow_scale.yaml
@@ -53,7 +53,7 @@ robomimic_model:
     scale_bimanual:
       state_ee_pose:
         _target_: egomimic.models.hpt_nets.MLPPolicyStem
-        input_dim: 14
+        input_dim: 12
         output_dim: 256
         widths: [256]
         specs:
@@ -73,13 +73,13 @@ robomimic_model:
       pooling: null
       time_dist: "beta"
       infer_ac_dims:
-        scale_bimanual: 14
+        scale_bimanual: 12
       model:
         _target_: egomimic.models.denoising_nets.CrossTransformer
         nblocks: 6
         cond_dim: 256
         hidden_dim: 128
-        act_dim: 14
+        act_dim: 12
         act_seq: 100
         n_heads: 4
         dropout: 0.1
diff --git a/egomimic/hydra_configs/train_zarr.yaml b/egomimic/hydra_configs/train_zarr.yaml
index 06c530a1..53afea6f 100644
--- a/egomimic/hydra_configs/train_zarr.yaml
+++ b/egomimic/hydra_configs/train_zarr.yaml
@@ -4,7 +4,7 @@ defaults:
   - trainer: debug
   - debug: null
   - logger: debug
-  - data: eva_bc_zarr.yaml
+  - data: eva_bc_zarr
   - callbacks: checkpoints
   - override hydra/launcher: submitit
   - _self_
@@ -115,4 +115,4 @@ data_schematic: # Dynamically fill in these shapes from the dataset
     mecka_bimanual:
       front_img_1
     scale_bimanual:
-      front_img_1
\ No newline at end of file
+      front_img_1
diff --git a/egomimic/scripts/zarr_data_viz.ipynb b/egomimic/scripts/zarr_data_viz.ipynb
index 822580fb..c3b0b085 100644
--- a/egomimic/scripts/zarr_data_viz.ipynb
+++ b/egomimic/scripts/zarr_data_viz.ipynb
@@ -328,8 +328,8 @@
     "# EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/scale/697a9070da7b91acaf3f2d88_episode_000000.zarr\") # Scale\n",
     "# intrinsics_key = \"scale\"\n",
     "\n",
-    "EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/1767671007927.zarr/\") # Aria\n",
-    "intrinsics_key = \"base\"\n",
+    "EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/scale/2026-02-24-01-49-24-166324/697a9070da7b91acaf3f2d88_episode_000002.zarr\") # Aria\n",
+    "intrinsics_key = \"scale\"\n",
     "\n",
     "\n",
     "key_map = {\n",
@@ -350,6 +350,47 @@
     ")\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b4c03ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Full MultiDataset via LocalEpisodeResolver (mirrors test_multi_zarr.yaml)\n",
+    "from egomimic.rldb.zarr.zarr_dataset_multi import LocalEpisodeResolver, MultiDataset\n",
+    "from egomimic.rldb.zarr.action_chunk_transforms import build_aria_bimanual_transform_list\n",
+    "\n",
+    "SCALE_FOLDER = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/scale/2026-02-24-01-49-24-166324\")\n",
+    "\n",
+    "key_map = {\n",
+    "    \"images.front_1\": {\"zarr_key\": \"images.front_1\"},\n",
+    "    \"right.obs_ee_pose\": {\"zarr_key\": \"right.obs_ee_pose\"},\n",
+    "    \"left.obs_ee_pose\": {\"zarr_key\": \"left.obs_ee_pose\"},\n",
+    "    \"right.action_ee_pose\": {\"zarr_key\": \"right.obs_ee_pose\", \"horizon\": 30},\n",
+    "    \"left.action_ee_pose\": {\"zarr_key\": \"left.obs_ee_pose\", \"horizon\": 30},\n",
+    "    \"obs_head_pose\": {\"zarr_key\": \"obs_head_pose\"},\n",
+    "}\n",
+    "\n",
+    "transform_list = build_aria_bimanual_transform_list(\n",
+    "    stride=1,\n",
+    ")\n",
+    "\n",
+    "resolver = LocalEpisodeResolver(\n",
+    "    folder_path=SCALE_FOLDER,\n",
+    "    key_map=key_map,\n",
+    "    transform_list=transform_list,\n",
+    ")\n",
+    "\n",
+    "multi_ds = MultiDataset._from_resolver(resolver, mode=\"total\")\n",
+    "print(f\"MultiDataset total frames: {len(multi_ds)}\")\n",
+    "print(f\"Underlying episodes: {list(multi_ds.datasets.keys())}\")\n",
+    "\n",
+    "loader = torch.utils.data.DataLoader(multi_ds, batch_size=1, shuffle=False)\n",
+    "batch = next(iter(loader))\n",
+    "print(\"Batch keys:\", list(batch.keys()))"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -379,7 +420,61 @@
    "outputs": [],
    "source": [
     "batch = next(iter(loader))\n",
-    "nds(batch)"
+    "nds(batch)\n",
+    "print(\"Batch keys:\", list(batch.keys()))\n",
+    "print(batch[\"right.action_ee_pose\"][0, 0])\n",
+    "print(batch[\"left.action_ee_pose\"][0, 0])\n",
+    "print(batch[\"obs_head_pose\"][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e94799c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "left_hand_pose = []\n",
+    "right_hand_pose = []\n",
+    "head_pose = []\n",
+    "for i, batch in enumerate(loader):\n",
+    "    left_hand_pose.append(batch[\"left.action_ee_pose\"][0, 0])\n",
+    "    right_hand_pose.append(batch[\"right.action_ee_pose\"][0, 0])\n",
+    "    head_pose.append(batch[\"obs_head_pose\"][0])\n",
+    "    \n",
+    "    if i > 400:\n",
+    "        break\n",
+    "left_hand_pose = np.array(left_hand_pose)\n",
+    "right_hand_pose = np.array(right_hand_pose)\n",
+    "head_pose = np.array(head_pose)\n",
+    "\n",
+    "# chunk the pose to actions(N, 100, 3)\n",
+    "left_hand_pose_actions = []\n",
+    "right_hand_pose_actions = []\n",
+    "head_pose_actions = []\n",
+    "for i in range(left_hand_pose.shape[0] - 100):\n",
+    "    action_left_hand = left_hand_pose[i:i+100, :]\n",
+    "    action_right_hand = right_hand_pose[i:i+100, :]\n",
+    "    action_head = head_pose[i:i+100, :]\n",
+    "    left_hand_pose_actions.append(action_left_hand)\n",
+    "    right_hand_pose_actions.append(action_right_hand)\n",
+    "    head_pose_actions.append(action_head)\n",
+    "left_hand_pose_actions = np.array(left_hand_pose_actions)\n",
+    "right_hand_pose_actions = np.array(right_hand_pose_actions)\n",
+    "head_pose_actions = np.array(head_pose_actions)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6517f061",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from egomimic.utils.egomimicUtils import render_3d_traj_frames\n",
+    "\n",
+    "frames = render_3d_traj_frames([left_hand_pose_actions, right_hand_pose_actions, head_pose_actions], labels=[\"left hand\", \"right hand\", \"head\"], stride=10)\n",
+    "mpy.show_video(frames, fps=30)\n"
    ]
   },
   {
diff --git a/external/scale/scripts/sfs_to_egoverse_zarr.py b/external/scale/scripts/sfs_to_egoverse_zarr.py
index 113200e6..057819f1 100644
--- a/external/scale/scripts/sfs_to_egoverse_zarr.py
+++ b/external/scale/scripts/sfs_to_egoverse_zarr.py
@@ -3,13 +3,13 @@
 Scale SFS -> EgoVerse Zarr converter.
 
 Output keys per episode:
-    left.obs_ee_pose                 (T, 6)         xyzypr
-    right.obs_ee_pose                (T, 6)         xyzypr
+    left.obs_ee_pose                 (T, 7)         xyz + quat(w, x, y, z)
+    right.obs_ee_pose                (T, 7)         xyz + quat(w, x, y, z)
     left.obs_keypoints               (T, 63)        21 keypoints * 3 (xyz)
     right.obs_keypoints              (T, 63)        21 keypoints * 3 (xyz)
-    left.obs_wrist_pose              (T, 6)         xyzypr
-    right.obs_wrist_pose             (T, 6)         xyzypr
-    obs_head_pose                    (T, 6)         xyzypr
+    left.obs_wrist_pose              (T, 7)         xyz + quat(w, x, y, z)
+    right.obs_wrist_pose             (T, 7)         xyz + quat(w, x, y, z)
+    obs_head_pose                    (T, 7)         xyz + quat(w, x, y, z)
     images.front_1                   (T, H, W, 3)   JPEG-compressed by ZarrWriter
 
 Usage:
@@ -63,6 +63,21 @@
 
 
 
+def _batch_euler_to_quat(euler_zyx: np.ndarray) -> np.ndarray:
+    """(N, 3) euler ZYX -> (N, 4) quaternion wxyz."""
+    q_xyzw = R.from_euler("ZYX", euler_zyx, degrees=False).as_quat()  # scipy: xyzw
+    return q_xyzw[..., [3, 0, 1, 2]].astype(np.float32)              # reorder -> wxyz
+
+def _batch_pose6_to_pose7(pose6: np.ndarray) -> np.ndarray:
+    """(N, 6) [xyz ypr] -> (N, 7) [xyz quat_wxyz].  Invalid sentinels → zeros."""
+    N = pose6.shape[0]
+    out = np.zeros((N, 7), dtype=np.float32)
+    valid = ~np.any(pose6 >= INVALID_VALUE - 1, axis=1)
+    if valid.any():
+        out[valid, :3] = pose6[valid, :3]
+        out[valid, 3:] = _batch_euler_to_quat(pose6[valid, 3:6])
+    return out
+
 
 # ---------------------------------------------------------------------------
 # Data structures & extraction (unchanged from original)
@@ -245,17 +260,16 @@ def extract_all_frames_metadata(self) -> list[FrameData]:
             )
         return frames
 
-    def load_images_for_range(self, start_idx: int, end_idx: int) -> list[np.ndarray | None]:
+    def load_all_images(self) -> list[np.ndarray | None]:
+        """Read every frame of the video sequentially (no seeking). Index i == video frame i."""
         cap = cv2.VideoCapture(self.video_path)
         if not cap.isOpened():
-            return [None] * (end_idx - start_idx)
-        cap.set(cv2.CAP_PROP_POS_FRAMES, start_idx)
+            return []
         images: list[np.ndarray | None] = []
-        for _ in range(end_idx - start_idx):
+        while True:
             ret, frame = cap.read()
             if not ret:
-                images.append(None)
-                continue
+                break
             images.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         cap.release()
         return images
@@ -273,21 +287,25 @@ def _compute_palm_centroid(keypoints: np.ndarray) -> np.ndarray:
     return np.mean(palm_kps[valid_mask], axis=0).astype(np.float32)
 
 
-def _compute_palm_orientation(keypoints: np.ndarray, mirror_y: bool = False) -> np.ndarray:
+def _compute_palm_orientation(keypoints: np.ndarray, flip_x: bool = False) -> np.ndarray:
+    """Hand frame: x=right, y=down (palm normal toward ground), z=forward (toward fingers).
+    flip_x=True for the right hand so that x is rightward for both hands.
+    """
     wrist, index1, middle1, pinky1 = keypoints[0], keypoints[5], keypoints[9], keypoints[17]
-    if any(np.any(kp >= INVALID_VALUE - 1) for kp in (wrist, index1, pinky1)):
+    if any(np.any(kp >= INVALID_VALUE - 1) for kp in (wrist, index1, middle1, pinky1)):
         return np.zeros(3, dtype=np.float32)
-    x_axis = middle1 - wrist
-    x_axis /= np.linalg.norm(x_axis) + 1e-8
-    temp_y = pinky1 - wrist
-    z_axis = np.cross(x_axis, temp_y)
+    # z: forward — from wrist toward middle finger
+    z_axis = middle1 - wrist
     z_axis /= np.linalg.norm(z_axis) + 1e-8
+    # x: right — across palm, orthogonalized against z
+    # left hand:  index1 - pinky1 is rightward
+    # right hand: pinky1 - index1 is rightward  (flip_x=True)
+    across = (pinky1 - index1) if flip_x else (index1 - pinky1)
+    across -= np.dot(across, z_axis) * z_axis
+    x_axis = across / (np.linalg.norm(across) + 1e-8)
+    # y: down (palm normal toward ground) = cross(z, x)
     y_axis = np.cross(z_axis, x_axis)
     y_axis /= np.linalg.norm(y_axis) + 1e-8
-    if mirror_y:
-        y_axis = -y_axis
-        z_axis = np.cross(x_axis, y_axis)
-        z_axis /= np.linalg.norm(z_axis) + 1e-8
     rot = np.column_stack([x_axis, y_axis, z_axis])
     try:
         return R.from_matrix(rot).as_euler("ZYX", degrees=False).astype(np.float32)
@@ -295,11 +313,11 @@ def _compute_palm_orientation(keypoints: np.ndarray, mirror_y: bool = False) ->
         return np.zeros(3, dtype=np.float32)
 
 
-def _compute_palm_6dof(keypoints: np.ndarray, mirror_y: bool = False) -> np.ndarray:
+def _compute_palm_6dof(keypoints: np.ndarray, flip_x: bool = False) -> np.ndarray:
     centroid = _compute_palm_centroid(keypoints)
     if np.any(centroid >= INVALID_VALUE - 1):
         return np.full(6, INVALID_VALUE, dtype=np.float32)
-    ypr = _compute_palm_orientation(keypoints, mirror_y=mirror_y)
+    ypr = _compute_palm_orientation(keypoints, flip_x=flip_x)
     return np.concatenate([centroid, ypr]).astype(np.float32)
 
 
@@ -310,22 +328,25 @@ def _compute_wrist_position(keypoints: np.ndarray) -> np.ndarray:
     return wrist.astype(np.float32)
 
 
-def _compute_wrist_orientation(keypoints: np.ndarray, mirror_y: bool = False) -> np.ndarray:
+def _compute_wrist_orientation(keypoints: np.ndarray, flip_x: bool = False) -> np.ndarray:
+    """Hand frame: x=right, y=down (palm normal toward ground), z=forward (toward fingers).
+    flip_x=True for the right hand so that x is rightward for both hands.
+    """
     wrist, index1, middle1, pinky1 = keypoints[0], keypoints[5], keypoints[9], keypoints[17]
-    if any(np.any(kp >= INVALID_VALUE - 1) for kp in (wrist, index1, pinky1)):
+    if any(np.any(kp >= INVALID_VALUE - 1) for kp in (wrist, index1, middle1, pinky1)):
         return np.zeros(3, dtype=np.float32)
-
-    x_axis = middle1 - wrist
-    x_axis /= np.linalg.norm(x_axis) + 1e-8
-    temp_y = pinky1 - wrist
-    z_axis = np.cross(x_axis, temp_y)
+    # z: forward — from wrist toward middle finger
+    z_axis = middle1 - wrist
     z_axis /= np.linalg.norm(z_axis) + 1e-8
+    # x: right — across palm, orthogonalized against z
+    # left hand:  index1 - pinky1 is rightward
+    # right hand: pinky1 - index1 is rightward  (flip_x=True)
+    across = (pinky1 - index1) if flip_x else (index1 - pinky1)
+    across -= np.dot(across, z_axis) * z_axis
+    x_axis = across / (np.linalg.norm(across) + 1e-8)
+    # y: down (palm normal toward ground) = cross(z, x)
     y_axis = np.cross(z_axis, x_axis)
     y_axis /= np.linalg.norm(y_axis) + 1e-8
-    if mirror_y:
-        y_axis = -y_axis
-        z_axis = np.cross(x_axis, y_axis)
-        z_axis /= np.linalg.norm(z_axis) + 1e-8
     rot = np.column_stack([x_axis, y_axis, z_axis])
     try:
         return R.from_matrix(rot).as_euler("ZYX", degrees=False).astype(np.float32)
@@ -333,11 +354,11 @@ def _compute_wrist_orientation(keypoints: np.ndarray, mirror_y: bool = False) ->
         return np.zeros(3, dtype=np.float32)
 
 
-def _compute_wrist_6dof(keypoints: np.ndarray, mirror_y: bool = False) -> np.ndarray:
+def _compute_wrist_6dof(keypoints: np.ndarray, flip_x: bool = False) -> np.ndarray:
     wrist_xyz = _compute_wrist_position(keypoints)
     if np.any(wrist_xyz >= INVALID_VALUE - 1):
         return np.full(6, INVALID_VALUE, dtype=np.float32)
-    wrist_ypr = _compute_wrist_orientation(keypoints, mirror_y=mirror_y)
+    wrist_ypr = _compute_wrist_orientation(keypoints, flip_x=flip_x)
     return np.concatenate([wrist_xyz, wrist_ypr]).astype(np.float32)
 
 
@@ -432,34 +453,48 @@ def _nonempty(p: str | None) -> bool:
     if n_frames <= ACTION_WINDOW:
         raise ValueError(f"Task {task_id} has too few frames ({n_frames})")
 
+    print(f"[{task_id}] Loading all video frames sequentially...")
+    t_vid = time.perf_counter()
+    all_images = extractor.load_all_images()
+    print(f"[{task_id}] Loaded {len(all_images)} video frames in {time.perf_counter() - t_vid:.1f}s  (SFS frames={n_frames})")
+    if len(all_images) != n_frames:
+        print(f"[{task_id}] WARNING: video frame count ({len(all_images)}) != SFS frame count ({n_frames}) — index drift possible")
+
     task_desc = _task_description(frames, extractor.demonstration_metadata)
     valid_frame_count = n_frames - ACTION_WINDOW
 
     # ------------------------------------------------------------------
     # Precompute all per-frame data into dense arrays (once)
     # ------------------------------------------------------------------
-    left_world = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
-    right_world = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
-    left_wrist = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
-    right_wrist = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
+    left_world_6 = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
+    right_world_6 = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
+    left_wrist_6 = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
+    right_wrist_6 = np.full((n_frames, 6), INVALID_VALUE, dtype=np.float32)
     left_kps = np.full((n_frames, 63), INVALID_VALUE, dtype=np.float32)
     right_kps = np.full((n_frames, 63), INVALID_VALUE, dtype=np.float32)
-    head_pose_world = np.zeros((n_frames, 6), dtype=np.float32)
+    head_pose_6 = np.zeros((n_frames, 6), dtype=np.float32)
 
     for i, frame in enumerate(frames):
         if frame.hand_keypoints.left is not None:
-            left_world[i] = _compute_palm_6dof(frame.hand_keypoints.left)
-            left_wrist[i] = _compute_wrist_6dof(frame.hand_keypoints.left)
+            left_world_6[i] = _compute_palm_6dof(frame.hand_keypoints.left)
+            left_wrist_6[i] = _compute_wrist_6dof(frame.hand_keypoints.left)
             left_kps[i] = frame.hand_keypoints.left.flatten().astype(np.float32)
         if frame.hand_keypoints.right is not None:
-            right_world[i] = _compute_palm_6dof(frame.hand_keypoints.right, mirror_y=True)
-            right_wrist[i] = _compute_wrist_6dof(frame.hand_keypoints.right, mirror_y=True)
+            right_world_6[i] = _compute_palm_6dof(frame.hand_keypoints.right, flip_x=True)
+            right_wrist_6[i] = _compute_wrist_6dof(frame.hand_keypoints.right, flip_x=True)
             right_kps[i] = frame.hand_keypoints.right.flatten().astype(np.float32)
-        head_pose_world[i, :3] = frame.camera_pose.position.astype(np.float32)
-        head_pose_world[i, 3:] = R.from_matrix(frame.camera_pose.rotation_matrix).as_euler(
+        head_pose_6[i, :3] = frame.camera_pose.position.astype(np.float32)
+        head_pose_6[i, 3:] = R.from_matrix(frame.camera_pose.rotation_matrix).as_euler(
             "ZYX", degrees=False
         ).astype(np.float32)
 
+    # Batch-convert all (N, 6) [xyz + euler ZYX] -> (N, 7) [xyz + quat xyzw]
+    left_world = _batch_pose6_to_pose7(left_world_6)
+    right_world = _batch_pose6_to_pose7(right_world_6)
+    left_wrist = _batch_pose6_to_pose7(left_wrist_6)
+    right_wrist = _batch_pose6_to_pose7(right_wrist_6)
+    head_pose_world = _batch_pose6_to_pose7(head_pose_6)
+
     # ------------------------------------------------------------------
     # Filter valid frame indices (same criteria as old script)
     # ------------------------------------------------------------------
@@ -497,16 +532,16 @@ def _nonempty(p: str | None) -> bool:
         if len(sub) < 10:
             continue
 
-        min_frame = min(sub)
-        max_frame = max(sub)
-        image_batch = extractor.load_images_for_range(min_frame, max_frame+1)
-
         # First pass: figure out which frames have images
         kept: list[int] = []
+        none_count = 0
         for t in sub:
-            img = image_batch[t - min_frame]
+            img = all_images[t] if t < len(all_images) else None
             if img is not None:
                 kept.append(t)
+            else:
+                none_count += 1
+        print(f"[ep{written}] sub={len(sub)}  kept={len(kept)}  dropped(no image)={none_count}  frames=[{sub[0]}..{sub[-1]}]")
         if len(kept) < 10:
             continue
 
@@ -514,18 +549,18 @@ def _nonempty(p: str | None) -> bool:
 
         # ---- Per-frame current state (vectorised) ----
         kept_arr = np.array(kept)
-        left_curr_6 = left_world[kept_arr]   # (T, 6)
-        right_curr_6 = right_world[kept_arr]
-        left_curr_6 = np.where(left_curr_6 >= INVALID_VALUE - 1, 0.0, left_curr_6).astype(
+        left_curr_7 = left_world[kept_arr]   # (T, 7)
+        right_curr_7 = right_world[kept_arr]
+        left_curr_7 = np.where(left_curr_7 >= INVALID_VALUE - 1, 0.0, left_curr_7).astype(
             np.float32
         )
-        right_curr_6 = np.where(right_curr_6 >= INVALID_VALUE - 1, 0.0, right_curr_6).astype(
+        right_curr_7 = np.where(right_curr_7 >= INVALID_VALUE - 1, 0.0, right_curr_7).astype(
             np.float32
         )
-        left_wrist_curr_6 = np.where(
+        left_wrist_curr_7 = np.where(
             left_wrist[kept_arr] >= INVALID_VALUE - 1, 0.0, left_wrist[kept_arr]
         ).astype(np.float32)
-        right_wrist_curr_6 = np.where(
+        right_wrist_curr_7 = np.where(
             right_wrist[kept_arr] >= INVALID_VALUE - 1, 0.0, right_wrist[kept_arr]
         ).astype(np.float32)
 
@@ -540,19 +575,20 @@ def _nonempty(p: str | None) -> bool:
 
         # ---- Build image array ----
         images = np.stack(
-            [cv2.resize(image_batch[t - min_frame], IMAGE_SIZE, interpolation=cv2.INTER_LINEAR)
+            [cv2.resize(all_images[t], IMAGE_SIZE, interpolation=cv2.INTER_LINEAR)
              for t in kept],
             axis=0,
         ).astype(np.uint8)
+        print(f"[ep{written}] images.shape={images.shape}  kept_arr.shape={kept_arr.shape}  match={images.shape[0] == len(kept_arr)}")
 
         # ---- Numeric data ----
         numeric_data = {
-            "left.obs_ee_pose": left_curr_6,
-            "right.obs_ee_pose": right_curr_6,
+            "left.obs_ee_pose": left_curr_7,
+            "right.obs_ee_pose": right_curr_7,
             "left.obs_keypoints": left_keypoints,
             "right.obs_keypoints": right_keypoints,
-            "left.obs_wrist_pose": left_wrist_curr_6,
-            "right.obs_wrist_pose": right_wrist_curr_6,
+            "left.obs_wrist_pose": left_wrist_curr_7,
+            "right.obs_wrist_pose": right_wrist_curr_7,
             "obs_head_pose": actions_head,
         }
         image_data = {
diff --git a/training_aws.md b/training_aws.md
deleted file mode 100644
index fafe1c15..00000000
--- a/training_aws.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# EgoVerse Dataset Setup and Training Guide
-
-This guide provides step-by-step instructions for setting up the dataset and training a model in the **EgoVerse** repository.
-
----
-
-## 1. Setting Up the Data Directory
-
-Start by navigating out of the **EgoVerse** repository and creating a `data` directory:
-
-```bash
-cd ..
-mkdir data
-cd data
-```
-
----
-
-## 2. Downloading the Processed Data
-
-Download the processed dataset from AWS S3:
-
-```bash
-aws s3 cp s3://rldb/processed/{processed_data_directory}/ {my_processed_data_directory} --recursive
-```
-
-Replace `{processed_data_directory}` with the name of the dataset you want to download, and `{my_processed_data_directory}` with your desired local directory name.
-
----
-
-## 3. Modifying Configuration Files
-
-Once the dataset is downloaded, navigate back to the **EgoVerse** repository:
-
-```bash
-cd ..
-cd EgoVerse
-```
-
-### **Modify `multi-data.yaml`**
-Open the configuration file located at:  
-📌 **`hydra_configs/data/multi_data.yaml`**
-
-Update the following segments to match your local dataset path:
-
-```yaml
-train_datasets:
-  dataset1:
-    _target_: rldb.utils.RLDBDataset
-    repo_id: "egoverse/smallShirtFold"
-    root: "{path/to/data/my_processed_data_directory/lerobot}"
-    local_files_only: true
-    mode: "train"
-
-valid_datasets:
-  dataset1:
-    _target_: rldb.utils.RLDBDataset
-    repo_id: "egoverse/smallShirtFold"
-    root: "{path/to/data/my_processed_data_directory/lerobot}"
-    local_files_only: true
-    mode: "valid"
-```
-🔹 Replace `{path/to/data/my_processed_data_directory/lerobot}` with the actual path to the lerobot folder in your downloaded dataset.
-
----
-
-### **Modify `train.yaml`**
-Open the configuration file located at:  
-📌 **`hydra_configs/train.yaml`**
-
-Modify the **data schematic** section as follows:
-
-```yaml
-data_schematic: # Dynamically fill in these shapes from the dataset
-  _target_: rldb.utils.DataSchematic
-  schematic_dict:
-    aria_bimanual:
-      front_img_1:
-        key_type: camera_keys
-        lerobot_key: observations.images.front_img_1
-      ee_pose:
-        key_type: proprio_keys
-        lerobot_key: observations.state.ee_pose
-      actions_cartesian:
-        key_type: action_keys
-        lerobot_key: actions_cartesian
-      embodiment:
-        key_type: metadata_keys
-        lerobot_key: metadata.embodiment
-  viz_img_key:
-    aria_bimanual:
-      front_img_1
-```
-
----
-
-## 4. Launch Training
-
-### **Activate Your Environment**
-Before running training, activate your **Conda** or **UV** environment:
-
-```bash
-conda activate <your_env_name>  # If using Conda
-```
-or
-```bash
-uv venv <your_env_name> && source <your_env_name>/bin/activate  # If using UV
-```
-
-### **Run Training on a GPU Node**
-Execute the training script:
-
-```bash
-python trainHydra.py
-```
----
-