diff --git a/.gitignore b/.gitignore
index 315c803a..98ed51ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,4 +33,6 @@ lerobot_test/
 **/lerobot_test/
 **/lerobot_test/**
 **/robot/models/**
-**/robot/models/
\ No newline at end of file
+**/robot/models/
+*parquet*
+*.zarr*
\ No newline at end of file
diff --git a/egomimic/hydra_configs/data/clothe_eva_aria_mecka.yaml b/egomimic/hydra_configs/data/clothe_eva_aria_mecka.yaml
new file mode 100644
index 00000000..c250ad41
--- /dev/null
+++ b/egomimic/hydra_configs/data/clothe_eva_aria_mecka.yaml
@@ -0,0 +1,70 @@
+train_datasets:
+  dataset1:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: eva_bimanual
+    filters:
+      task: fold clothes
+    local_files_only: true
+  indomain:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold_clothes_indomain
+    local_files_only: true
+  everse_rl2:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: rl2
+      operator: rl2
+    local_files_only: true
+  everse_song:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: song
+    local_files_only: true
+  everse_wang:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: wang
+    local_files_only: true
+  everse_eth:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: eth
+    local_files_only: true
+  mecka_flagship:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: mecka_bimanual
+    filters:
+      task: fold_clothes
+    local_files_only: true
+  mecka_freeform:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: mecka_bimanual
+    filters:
+      task: folding_clothes
+    local_files_only: true
diff --git a/egomimic/hydra_configs/data/mecka_test.yaml b/egomimic/hydra_configs/data/mecka_test.yaml
index 7f190339..f7cf3494 100644
--- a/egomimic/hydra_configs/data/mecka_test.yaml
+++ b/egomimic/hydra_configs/data/mecka_test.yaml
@@ -3,19 +3,19 @@ _target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper
 train_datasets:
   dataset1:
     _target_: rldb.utils.RLDBDataset
-    repo_id: "mecka_test"
+    repo_id: "aria_bimanual"
     mode: train
-    embodiment: "mecka_bimanual"
-    root: "/coc/flash7/acheluva3/EgoVerse/mecka_demo"
+    embodiment: "aria_bimanual"
+    root: "/nethome/paphiwetsa3/flash/projects/EgoVerse/datasets"
     local_files_only: True
 
 valid_datasets:
   dataset1:
     _target_: rldb.utils.RLDBDataset
-    repo_id: "mecka_test"
+    repo_id: "aria_bimanual"
     mode: valid
-    embodiment: "mecka_bimanual"
-    root: "/coc/flash7/acheluva3/EgoVerse/mecka_demo"
+    embodiment: "aria_bimanual"
+    root: "/nethome/paphiwetsa3/flash/projects/EgoVerse/datasets"
     local_files_only: True
 
 train_dataloader_params:
@@ -26,4 +26,4 @@ train_dataloader_params:
 valid_dataloader_params:
   dataset1:
     batch_size: 32
-    num_workers: 10
\ No newline at end of file
+    num_workers: 10
diff --git a/egomimic/hydra_configs/data/test_bimanual.yaml b/egomimic/hydra_configs/data/test_bimanual.yaml
index 7ef496ba..39ce0a0e 100644
--- a/egomimic/hydra_configs/data/test_bimanual.yaml
+++ b/egomimic/hydra_configs/data/test_bimanual.yaml
@@ -5,7 +5,7 @@ train_datasets:
     datasets:
       rl2_lab:
         _target_: rldb.utils.FolderRLDBDataset
-        folder_path: /coc/cedarp-dxu345-0/datasets/egoverse/put_cup_on_saucer_egoverse/put_cup_on_saucer_rl2
+        folder_path: /nethome/paphiwetsa3/flash/projects/EgoVerse/datasets
         embodiment: aria_bimanual
         mode: train
         local_files_only: true
@@ -17,8 +17,7 @@ valid_datasets:
     datasets:
       eth_lab:
         _target_: rldb.utils.FolderRLDBDataset
-        folder_path: /coc/cedarp-dxu345-0/datasets/egoverse/put_cup_on_saucer_egoverse/put_cup_on_saucer_song
-        embodiment: aria_bimanual
+        folder_path: /nethome/paphiwetsa3/flash/projects/EgoVerse/datasets
         mode: valid
         local_files_only: true
     embodiment: "aria_bimanual"
@@ -31,4 +30,4 @@ train_dataloader_params:
 valid_dataloader_params:
   dataset1:
     batch_size: 2
-    num_workers: 10
\ No newline at end of file
+    num_workers: 10
diff --git a/egomimic/hydra_configs/data/viz_data.yaml b/egomimic/hydra_configs/data/viz_data.yaml
new file mode 100644
index 00000000..243932df
--- /dev/null
+++ b/egomimic/hydra_configs/data/viz_data.yaml
@@ -0,0 +1,64 @@
+_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper
+train_datasets:
+  dataset1:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: eva_bimanual
+    filters:
+      task: fold_clothes
+    local_files_only: true
+
+  dataset2:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold_clothes
+    local_files_only: true
+
+  # dataset3:
+  #   _target_: egomimic.rldb.utils.S3RLDBDataset
+  #   bucket_name: rldb
+  #   mode: total
+  #   valid_ratio: 0.5
+  #   embodiment: mecka_bimanual
+  #   cache_root: "/coc/flash7/rpunamiya6/.cache"
+  #   filters:
+  #     task: fold_clothes
+  #   local_files_only: true
+
+valid_datasets:
+  dataset1:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: eva_bimanual
+    filters:
+      task: fold clothes
+    local_files_only: true
+  dataset2:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold_clothes_indomain
+    local_files_only: true
+
+train_dataloader_params:
+  dataset1:
+    batch_size: 256
+    num_workers: 10
+  dataset2:
+    batch_size: 256
+    num_workers: 10
+
+valid_dataloader_params:
+  dataset1:
+    batch_size: 256
+    num_workers: 10
+  dataset2:
+    batch_size: 256
+    num_workers: 10
diff --git a/egomimic/hydra_configs/data/viz_data2.yaml b/egomimic/hydra_configs/data/viz_data2.yaml
new file mode 100644
index 00000000..2a89c3cf
--- /dev/null
+++ b/egomimic/hydra_configs/data/viz_data2.yaml
@@ -0,0 +1,54 @@
+train_datasets:
+  dataset1:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: eva_bimanual
+    filters:
+      task: fold clothes
+    local_files_only: true
+  indomain:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold_clothes_indomain
+    local_files_only: true
+  everse_rl2:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: rl2
+      operator: rl2
+    local_files_only: true
+  everse_song:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: song
+    local_files_only: true
+  everse_wang:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: wang
+    local_files_only: true
+  everse_eth:
+    _target_: egomimic.rldb.utils.S3RLDBDataset
+    bucket_name: rldb
+    mode: total
+    embodiment: aria_bimanual
+    filters:
+      task: fold clothes
+      lab: eth
+    local_files_only: true
diff --git a/egomimic/hydra_configs/train.yaml b/egomimic/hydra_configs/train.yaml
index 828a21b6..c1299d34 100644
--- a/egomimic/hydra_configs/train.yaml
+++ b/egomimic/hydra_configs/train.yaml
@@ -1,10 +1,10 @@
 defaults:
-  - model: hpt_bc_flow_eva
+  - model: hpt_bc_flow_aria
   - paths: default
   - trainer: ddp
   - debug: null
   - logger: wandb
-  - data: eva_bc_s3
+  - data: mecka_test
   - callbacks: checkpoints
   - override hydra/launcher: submitit
   - _self_
@@ -16,7 +16,7 @@ train: true
 eval: false
 
 eval_class:
-  _target_ : egomimic.scripts.evaluation.Eve
+  _target_: egomimic.scripts.evaluation.Eve
   mode: real
   arm: both
   eval_path: "./logs/eval/${name}_${now:%Y-%m-%d_%H-%M-%S}"
@@ -93,10 +93,7 @@ data_schematic: # Dynamically fill in these shapes from the dataset
       embodiment:
         key_type: metadata_keys
         lerobot_key: metadata.embodiment
-  viz_img_key: 
-    eva_bimanual:
-      front_img_1
-    aria_bimanual:
-      front_img_1
-    mecka_bimanual:
-      front_img_1
+  viz_img_key:
+    eva_bimanual: front_img_1
+    aria_bimanual: front_img_1
+    mecka_bimanual: front_img_1
diff --git a/egomimic/models/hpt_nets.py b/egomimic/models/hpt_nets.py
index 7214c29c..26765cb5 100644
--- a/egomimic/models/hpt_nets.py
+++ b/egomimic/models/hpt_nets.py
@@ -34,6 +34,9 @@
 
 from functools import partial
 
+from transformers import CLIPVisionModel
+from transformers import AutoImageProcessor, AutoModel
+
 from transformers import T5Tokenizer, T5Model, AutoTokenizer
 from transformers import CLIPTextModel, CLIPVisionModel  # TODO: add CLIP
 
@@ -606,6 +609,97 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return y
 
 
+class DinoV3(PolicyStem):
+    def __init__(
+        self,
+        output_dim,
+        model_type: str = "facebook/dinov3-vits16plus-pretrain-lvd1689m",
+        freeze_backbone: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.model = AutoModel.from_pretrained(
+            model_type 
+        )
+        self.freeze_backbone = freeze_backbone
+        
+        if "conv" in model_type:
+            self.proj = nn.Linear(self.model.config.hidden_sizes[-1], output_dim)
+        else:
+            self.proj = nn.Linear(self.model.config.hidden_size, output_dim)
+            
+        if self.freeze_backbone:
+            for p in self.model.parameters():
+                p.requires_grad = False
+            
+            self.model.eval()
+        else:
+            # fix for ViT DinoV3 to prevent unused params error
+            for name, p in self.model.named_parameters():
+                if "mask_token" in name:
+                    p.requires_grad = False
+            self.model.train()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Performs a forward pass of the model.
+        Args:
+            x: Image tensor with shape [B, T, N, 3, H, W] representing the batch size, 
+            horizon, instance (e.g. num of views)
+        Returns:
+            Flatten tensor with shape [B, M, 512]     
+        """        
+        B, T, N, C, H, W = x.shape
+        x = x.view(B * T * N, C, H, W)
+        
+        outputs = self.model(pixel_values=x)
+        outputs = outputs.last_hidden_state
+        outputs = self.proj(outputs)
+        
+        return outputs
+                
+class CLIP(PolicyStem):
+    def __init__(
+        self,
+        output_dim,
+        model_type: str = "openai/clip-vit-base-patch32",
+        freeze_backbone: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        
+        self.model = CLIPVisionModel.from_pretrained(model_type)
+        self.freeze_backbone = freeze_backbone
+        self.proj = nn.Linear(self.model.config.hidden_size, output_dim)
+        
+        if self.freeze_backbone:
+            for p in self.model.parameters():
+                p.requires_grad = False
+            
+            self.model.eval()
+            
+        else:
+            self.model.train()
+    
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Performs a forward pass of the model.
+        Args:
+            x: Image tensor with shape [B, T, N, 3, H, W] representing the batch size, 
+            horizon, instance (e.g. num of views)
+        Returns:
+            Flatten tensor with shape [B, M, 512]     
+        """        
+        B, T, N, C, H, W = x.shape
+        x = x.view(B * T * N, C, H, W)
+        
+        outputs = self.model(pixel_values=x)
+        outputs = outputs.pooler_output
+        outputs = self.proj(outputs)
+        outputs = outputs.view(B, T*N, -1)
+        
+        return outputs
+    
+
 class ResNet(PolicyStem):
     def __init__(
         self,
diff --git a/egomimic/scripts/data_visualisation.ipynb b/egomimic/scripts/data_visualisation.ipynb
index 8eec78ef..479f4a71 100644
--- a/egomimic/scripts/data_visualisation.ipynb
+++ b/egomimic/scripts/data_visualisation.ipynb
@@ -2,9 +2,18 @@
     "cells": [
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 1,
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stderr",
+                    "output_type": "stream",
+                    "text": [
+                        "/nethome/paphiwetsa3/flash/projects/EgoVerse/.venv/lib/python3.11/site-packages/torch/cuda/__init__.py:61: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.\n",
+                        "  import pynvml  # type: ignore[import]\n"
+                    ]
+                }
+            ],
             "source": [
                 "# IMPORTS\n",
                 "from egomimic.rldb.utils import *\n",
@@ -17,12 +26,27 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 2,
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "data": {
+                        "application/vnd.jupyter.widget-view+json": {
+                            "model_id": "8b62fc7671f24672b715b88fadf6e295",
+                            "version_major": 2,
+                            "version_minor": 0
+                        },
+                        "text/plain": [
+                            "Generating train split: 0 examples [00:00, ? examples/s]"
+                        ]
+                    },
+                    "metadata": {},
+                    "output_type": "display_data"
+                }
+            ],
             "source": [
                 "# Load dataset\n",
-                "root = \"/coc/flash7/paphiwetsa3/datasets/eva_test_data2/proc2/lerobot_test\"\n",
+                "root = \"/nethome/paphiwetsa3/flash/projects/EgoVerse/datasets\"\n",
                 "repo_id = \"rpuns/aria_laundry_rl2\"\n",
                 "\n",
                 "episodes = [0, 1]\n",
@@ -33,9 +57,17 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 3,
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "{'observations.state.ee_pose': {'dtype': 'float64', 'shape': (12,), 'names': ['dim_0']}, 'observations.images.front_img_1': {'dtype': 'image', 'shape': (480, 640, 3), 'names': ['channel', 'height', 'width']}, 'actions_cartesian': {'dtype': 'prestacked_float64', 'shape': (100, 12), 'names': ['chunk_length', 'action_dim']}, 'metadata.embodiment': {'dtype': 'int32', 'shape': (1,), 'names': ['dim_0']}, 'timestamp': {'dtype': 'float32', 'shape': (1,), 'names': None}, 'frame_index': {'dtype': 'int64', 'shape': (1,), 'names': None}, 'episode_index': {'dtype': 'int64', 'shape': (1,), 'names': None}, 'index': {'dtype': 'int64', 'shape': (1,), 'names': None}, 'task_index': {'dtype': 'int64', 'shape': (1,), 'names': None}}\n"
+                    ]
+                }
+            ],
             "source": [
                 "# Get metadata\n",
                 "print(dataset.meta.info[\"features\"])\n",
@@ -46,26 +78,34 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 4,
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "5\n"
+                    ]
+                }
+            ],
             "source": [
                 "print(dataset.embodiment)"
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 11,
             "metadata": {},
             "outputs": [],
             "source": [
                 "# Make data_loader\n",
-                "data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)"
+                "data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False)"
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": 19,
+            "execution_count": 12,
             "metadata": {},
             "outputs": [],
             "source": [
@@ -74,12 +114,12 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 19,
             "metadata": {},
             "outputs": [],
             "source": [
                 "def visualize_actions(ims, actions, extrinsics, intrinsics, arm=\"both\"):\n",
-                "    for b in range(ims.shape[0]):\n",
+                "    for b in range(actions.shape[0]):\n",
                 "        if actions.shape[-1] == 7 or actions.shape[-1] == 14:\n",
                 "            ac_type = \"joints\"\n",
                 "        elif actions.shape[-1] == 3 or actions.shape[-1] == 6:\n",
@@ -96,33 +136,63 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 20,
             "metadata": {},
-            "outputs": [],
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "(1, 100, 6)\n",
+                        "Saved batch 0 images to ./visualization/\n",
+                        "(1, 100, 6)\n",
+                        "Saved batch 1 images to ./visualization/\n",
+                        "(1, 100, 6)\n",
+                        "Saved batch 2 images to ./visualization/\n",
+                        "(1, 100, 6)\n",
+                        "Saved batch 3 images to ./visualization/\n",
+                        "(1, 100, 6)\n",
+                        "Saved batch 4 images to ./visualization/\n",
+                        "(1, 100, 6)\n",
+                        "Saved batch 5 images to ./visualization/\n",
+                        "(1, 100, 6)\n",
+                        "Saved batch 6 images to ./visualization/\n"
+                    ]
+                }
+            ],
             "source": [
                 "save_dir = \"./visualization/\"\n",
                 "os.makedirs(save_dir, exist_ok=True)\n",
                 "\n",
-                "num_batches = 1\n",
+                "num_batches = 6\n",
                 "\n",
                 "for i, data in enumerate(data_loader):\n",
                 "    if i > num_batches:\n",
                 "        break\n",
                 "    ims = (data[image_key].permute(0, 2, 3, 1).cpu().numpy() * 255.0).astype(np.uint8)\n",
                 "    actions = data[actions_key].cpu().numpy()\n",
-                "    # print(actions_key)\n",
-                "    print(actions[:10, :])\n",
-                "\n",
-                "    ims_viz = visualize_actions(ims, actions[:, :3], camera_transforms.extrinsics, camera_transforms.intrinsics)\n",
+                "    actions = actions[:1, ...]\n",
+                "    ims = ims[:1, ...]\n",
+                "    left_actions = actions[..., :3]\n",
+                "    right_actions = actions[..., 7:10]\n",
+                "    both_actions = np.concatenate([left_actions, right_actions], axis=-1)\n",
+                "    print(both_actions.shape)\n",
+                "    ims_viz = visualize_actions(ims, both_actions, camera_transforms.extrinsics, camera_transforms.intrinsics)\n",
                 "\n",
                 "    for j, im in enumerate(ims_viz):\n",
                 "        img_tensor = torch.from_numpy(im).permute(2, 0, 1)\n",
                 "        save_path = os.path.join(save_dir, f\"image_{i}_{j}.png\")\n",
                 "        io.write_png(img_tensor, save_path)\n",
                 "\n",
-                "    print(f\"Saved batch {i} images to {save_dir}\")\n",
-                "    break"
+                "    print(f\"Saved batch {i} images to {save_dir}\")"
             ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": []
         }
     ],
     "metadata": {
@@ -141,9 +211,9 @@
             "name": "python",
             "nbconvert_exporter": "python",
             "pygments_lexer": "ipython3",
-            "version": "3.10.16"
+            "version": "3.11.11"
         }
     },
     "nbformat": 4,
     "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/egomimic/scripts/visualization_process/debug/check_zar.py b/egomimic/scripts/visualization_process/debug/check_zar.py
new file mode 100644
index 00000000..dd044654
--- /dev/null
+++ b/egomimic/scripts/visualization_process/debug/check_zar.py
@@ -0,0 +1,83 @@
+import json
+from pathlib import Path
+
+import pandas as pd
+import zarr
+
+
+def main():
+    # Default to the outputs produced by `process_image.py`
+    data_dir = Path("egomimic/scripts/visualization_process/fold_clothes_aria_eva")
+    manifest_path = data_dir / "manifest.json"
+    manifest = json.loads(manifest_path.read_text())
+
+    print("[INFO] manifest_path =", manifest_path)
+    print("[INFO] n_frames      =", manifest["n_frames"])
+    print("[INFO] embedding_dim =", manifest["embedding_dim"])
+    print("[INFO] embed_store   =", manifest["embed_store"])
+
+    # Load metadata parquet
+    meta_path = Path(manifest["metadata_parquet"])
+    meta_df = pd.read_parquet(meta_path)
+    print("[INFO] metadata rows =", len(meta_df))
+    print("[INFO] metadata cols =", len(meta_df.columns))
+    # basic columns we expect
+    for col in ("global_index", "episode_hash"):
+        print("[INFO] has {} = {}".format(col, col in meta_df.columns))
+    if len(meta_df) > 0:
+        first_row = meta_df.iloc[100].to_dict()
+        breakpoint()
+        print("[INFO] metadata[0] keys =", sorted(list(first_row.keys()))[:40], "...")
+        print("[INFO] metadata[0] =", first_row)
+
+    # Load embeddings zarr for first image key
+    first_key = manifest["image_keys"][0]
+    zarr_path = Path(manifest["embeddings"][first_key])
+    root = zarr.open_group(str(zarr_path), mode="r")
+    arr = root["embeddings"]
+    print("[INFO] zarr_path     =", zarr_path)
+    print("[INFO] zarr array    =", "embeddings")
+    print("[INFO] shape/dtype   =", arr.shape, arr.dtype, "chunks=", arr.chunks)
+
+    # Sanity: embeddings rows should match metadata rows for 1:1 alignment
+    if arr.shape[0] != len(meta_df):
+        raise RuntimeError(
+            "Row mismatch: embeddings has {} rows but metadata has {} rows".format(
+                arr.shape[0], len(meta_df)
+            )
+        )
+
+    # Explicitly access a latent (embedding) row.
+    # This is the vector aligned with metadata row 0.
+    x0 = arr[0, :]  # (D,)
+    x_last = arr[arr.shape[0] - 1, :]
+    print("[INFO] first latent shape =", getattr(x0, "shape", None), "dtype=", getattr(x0, "dtype", None))
+    print("[INFO] last  latent shape =", getattr(x_last, "shape", None), "dtype=", getattr(x_last, "dtype", None))
+    # Print only a small slice to keep logs readable
+    try:
+        x0_slice = x0[:16]
+        print("[INFO] latent[0][:16] =", x0_slice)
+        # quick stats
+        x0_f = x0.astype("float32", copy=False)
+        print(
+            "[INFO] latent[0] stats min/max/mean =",
+            float(x0_f.min()),
+            float(x0_f.max()),
+            float(x0_f.mean()),
+        )
+
+        y = root["tsne_2d"][:10]  # (10, 2)
+        print("tsne_2d[:10] =\n", y)
+        print("min_xy =", y.min(axis=0), "max_xy =", y.max(axis=0), "mean_xy =", y.mean(axis=0))
+    except Exception as e:
+        print("[WARN] Could not slice/stats latent[0]:", e)
+
+    # Check global_index alignment (expected: 0..n-1 in this one-batch run)
+    if "global_index" in meta_df.columns:
+        gi_min = int(meta_df["global_index"].min())
+        gi_max = int(meta_df["global_index"].max())
+        print("[INFO] global_index min/max =", gi_min, gi_max)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egomimic/scripts/visualization_process/debug/test_dinov3.py b/egomimic/scripts/visualization_process/debug/test_dinov3.py
new file mode 100644
index 00000000..7c80ed9e
--- /dev/null
+++ b/egomimic/scripts/visualization_process/debug/test_dinov3.py
@@ -0,0 +1,37 @@
+"""
+Quick smoke test for DinoV3 from `egomimic/models/hpt_nets.py`.
+
+Runs a random forward pass and prints output shapes
+
+Checking if DinoV3 is working as expected.
+"""
+
+import torch
+
+from egomimic.models.hpt_nets import DinoV3
+
+
+def main():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # `DinoV3.forward` expects images shaped [B, T, N, 3, H, W]
+    B, T, N, C, H, W = 2, 3, 1, 3, 224, 224
+    x = torch.randn(B, T, N, C, H, W, device=device)
+
+    model = DinoV3(
+        output_dim=256,
+        # default is "facebook/dinov3-vits16plus-pretrain-lvd1689m"
+        freeze_backbone=True,
+    ).to(device)
+
+    model.eval()
+    with torch.no_grad():
+        y = model(x)
+
+    print("input:  {}".format(tuple(x.shape)))
+    # typically [(B*T*N), tokens, output_dim]
+    print("output: {}".format(tuple(y.shape)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egomimic/scripts/visualization_process/dim_reduce.py b/egomimic/scripts/visualization_process/dim_reduce.py
new file mode 100644
index 00000000..85f23f90
--- /dev/null
+++ b/egomimic/scripts/visualization_process/dim_reduce.py
@@ -0,0 +1,244 @@
+"""
+Run dimensionality reduction on saved embedding latents and store 2D coords back into the zarr.
+
+Reads:
+- manifest.json (to find the embeddings zarr path)
+- embeddings zarr group (expects dataset name "embeddings")
+
+Writes:
+- dataset "<method>_2d" into the same zarr group by default (tsne_2d/umap_2d/pca_2d),
+  shape (N, 2), float32
+"""
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+import zarr
+
+
+def _load_embeddings(zarr_path: Path) -> np.ndarray:
+    root = zarr.open_group(str(zarr_path), mode="r")
+    if "embeddings" not in root:
+        raise KeyError(
+            "Expected dataset 'embeddings' in zarr group. Found keys: {}".format(
+                list(root.array_keys())
+            )
+        )
+    arr = root["embeddings"]
+    # Load entire array into memory for t-SNE
+    x = arr[:]
+    print("x.shape =", x.shape)
+    # cuML prefers float32
+    if x.dtype != np.float32:
+        x = x.astype(np.float32, copy=False)
+    return x
+
+
+def _to_numpy(x):
+    # Convert cupy -> numpy if needed
+    try:
+        import cupy as cp
+
+        if isinstance(x, cp.ndarray):
+            return cp.asnumpy(x)
+    except Exception:
+        pass
+    return np.asarray(x)
+
+
+def _run_cuml_tsne(
+    x: np.ndarray, *, perplexity: float, random_state: int, learning_rate: float
+) -> np.ndarray:
+    try:
+        from cuml import TSNE
+    except Exception as e:
+        raise RuntimeError(
+            "cuml is required. Make sure RAPIDS/cuML is installed in this environment."
+        ) from e
+
+    # cuML TSNE returns a (N, 2) array-like (often cupy-backed); convert to numpy.
+    tsne = TSNE(
+        n_components=2,
+        perplexity=perplexity,
+        random_state=random_state,
+        init="random",
+        # NOTE: scikit-learn supports learning_rate="auto", but cuML expects numeric.
+        learning_rate=float(learning_rate),
+    )
+    y = tsne.fit_transform(x)
+
+    y = _to_numpy(y)
+    if y.ndim != 2 or y.shape[1] != 2:
+        raise RuntimeError("Unexpected TSNE output shape: {}".format(y.shape))
+    return y.astype(np.float32, copy=False)
+
+
+def _run_cuml_umap(
+    x: np.ndarray, *, n_neighbors: int, min_dist: float, metric: str, random_state: int
+) -> np.ndarray:
+    try:
+        from cuml import UMAP
+    except Exception as e:
+        raise RuntimeError(
+            "cuml is required for UMAP. Make sure RAPIDS/cuML is installed in this environment."
+        ) from e
+
+    umap = UMAP(
+        n_components=2,
+        n_neighbors=int(n_neighbors),
+        min_dist=float(min_dist),
+        metric=str(metric),
+        random_state=int(random_state),
+    )
+    y = umap.fit_transform(x)
+    y = _to_numpy(y)
+    if y.ndim != 2 or y.shape[1] != 2:
+        raise RuntimeError("Unexpected UMAP output shape: {}".format(y.shape))
+    return y.astype(np.float32, copy=False)
+
+
+def _run_pca(x: np.ndarray, *, n_components: int, random_state: int) -> np.ndarray:
+    # Prefer GPU PCA if available; otherwise fall back to sklearn.
+    try:
+        from cuml import PCA  # type: ignore
+
+        pca = PCA(n_components=int(n_components), random_state=int(random_state))
+        y = pca.fit_transform(x)
+        y = _to_numpy(y)
+    except Exception:
+        try:
+            from sklearn.decomposition import PCA  # type: ignore
+        except Exception as e:
+            raise RuntimeError(
+                "PCA requires either cuML (preferred) or scikit-learn installed."
+            ) from e
+
+        pca = PCA(n_components=int(n_components), random_state=int(random_state))
+        y = pca.fit_transform(x)
+        y = np.asarray(y)
+
+    if y.ndim != 2 or y.shape[1] != int(n_components):
+        raise RuntimeError("Unexpected PCA output shape: {}".format(y.shape))
+    if y.shape[1] != 2:
+        raise RuntimeError("This script only supports 2D outputs; got PCA dim {}".format(y.shape[1]))
+    return y.astype(np.float32, copy=False)
+
+
+def _write_2d(zarr_path: Path, *, y2d: np.ndarray, name: str, overwrite: bool) -> None:
+    root = zarr.open_group(str(zarr_path), mode="a")
+
+    if name in root and not overwrite:
+        raise FileExistsError(
+            "Zarr dataset '{}' already exists at {}. Use --overwrite to replace.".format(
+                name, zarr_path
+            )
+        )
+
+    chunks = (min(8192, y2d.shape[0]), 2)
+    root.create_dataset(
+        name,
+        shape=y2d.shape,
+        chunks=chunks,
+        dtype=np.float32,
+        overwrite=overwrite,
+    )
+    root[name][:] = y2d
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument(
+        "--manifest",
+        type=str,
+        default="egomimic/scripts/visualization_process/fold_clothes_aria_eva_all_labs/manifest.json",
+        help="Path to manifest.json produced by process_image.py",
+    )
+    ap.add_argument(
+        "--image-key",
+        type=str,
+        default="",
+        help="Optional image key to select from manifest['embeddings'] (defaults to first).",
+    )
+    ap.add_argument(
+        "--method",
+        type=str,
+        default="tsne",
+        choices=("tsne", "umap", "pca"),
+        help="Dimensionality reduction method.",
+    )
+    ap.add_argument(
+        "--out-name",
+        type=str,
+        default="",
+        help="Dataset name to write in zarr. Defaults to <method>_2d.",
+    )
+    # TSNE args
+    ap.add_argument("--perplexity", type=float, default=30.0, help="TSNE perplexity (tsne only).")
+    ap.add_argument(
+        "--learning-rate",
+        type=float,
+        default=200.0,
+        help="cuML TSNE learning rate (tsne only; must be numeric).",
+    )
+    # UMAP args
+    ap.add_argument("--n-neighbors", type=int, default=15, help="UMAP n_neighbors (umap only).")
+    ap.add_argument("--min-dist", type=float, default=0.1, help="UMAP min_dist (umap only).")
+    ap.add_argument("--metric", type=str, default="euclidean", help="UMAP metric (umap only).")
+    # PCA args
+    ap.add_argument("--pca-components", type=int, default=2, help="PCA n_components (pca only).")
+    ap.add_argument("--seed", type=int, default=0)
+    ap.add_argument("--overwrite", action="store_true")
+    args = ap.parse_args()
+
+    manifest_path = Path(args.manifest)
+    manifest = json.loads(manifest_path.read_text())
+
+    if manifest.get("embed_store") != "zarr":
+        raise RuntimeError("This script expects manifest embed_store == 'zarr'.")
+
+    if args.image_key:
+        image_key = args.image_key
+    else:
+        image_key = manifest["image_keys"][0]
+
+    zarr_path = Path(manifest["embeddings"][image_key])
+    print("[INFO] zarr_path =", zarr_path)
+    print("[INFO] reading embeddings for key =", image_key)
+
+    x = _load_embeddings(zarr_path)
+    print("[INFO] embeddings shape/dtype =", x.shape, x.dtype)
+
+    if args.out_name:
+        out_name = args.out_name
+    else:
+        out_name = f"{args.method}_2d"
+
+    if args.method == "tsne":
+        y2d = _run_cuml_tsne(
+            x, perplexity=args.perplexity, random_state=args.seed, learning_rate=args.learning_rate
+        )
+    elif args.method == "umap":
+        y2d = _run_cuml_umap(
+            x,
+            n_neighbors=args.n_neighbors,
+            min_dist=args.min_dist,
+            metric=args.metric,
+            random_state=args.seed,
+        )
+    elif args.method == "pca":
+        if int(args.pca_components) != 2:
+            raise ValueError("--pca-components must be 2 for this script (got {})".format(args.pca_components))
+        y2d = _run_pca(x, n_components=args.pca_components, random_state=args.seed)
+    else:
+        raise RuntimeError("Unsupported method: {}".format(args.method))
+
+    print("[INFO] {} shape/dtype =".format(out_name), y2d.shape, y2d.dtype)
+
+    _write_2d(zarr_path, y2d=y2d, name=out_name, overwrite=args.overwrite)
+    print("[DONE] wrote {} into {}".format(out_name, zarr_path))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egomimic/scripts/visualization_process/process_image.py b/egomimic/scripts/visualization_process/process_image.py
new file mode 100644
index 00000000..2e0e313d
--- /dev/null
+++ b/egomimic/scripts/visualization_process/process_image.py
@@ -0,0 +1,531 @@
+"""
+create_dino.py
+==============
+
+Downloads RLDB datasets via `S3RLDBDataset`, writes a metadata table (Parquet),
+and computes image embeddings using a DINO-family model (defaults to DINOv2 via torch.hub).
+
+Outputs (by default) to:
+- metadata parquet: <out_dir>/metadata.parquet
+- embeddings:       <out_dir>/embeddings.zarr  (or .npy memmap)
+
+Notes
+-----
+- "DINOv3" is not guaranteed to be available via torch.hub. This script will try to
+  load the requested hub repo, and falls back to DINOv2 if that fails.
+- The RLDB datasets are HuggingFace / LeRobot datasets. Instantiating S3RLDBDataset
+  will sync needed episode folders locally.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+
+import hydra
+import numpy as np
+from omegaconf import OmegaConf
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader, Subset
+
+from egomimic.rldb.utils import S3RLDBDataset, MultiRLDBDataset
+from egomimic.algo.hpt import DinoV3
+from egomimic.utils.aws.aws_sql import create_default_engine, episode_table_to_df
+
+
+def _parse_json_or_empty(s: str) -> dict:
+    if not s:
+        return {}
+    return json.loads(s)
+
+
+def _safe_get(d: dict, k: str, default=None):
+    try:
+        return d.get(k, default)
+    except Exception:
+        return default
+
+
+def _image_to_torch_uint8_bchw(x) -> torch.Tensor:
+    """
+    Accept common dataset image formats:
+    - numpy uint8: HWC or CHW
+    - torch uint8/float: HWC/CHW/BCHW/BHWC
+    Returns uint8 BCHW.
+    """
+    if isinstance(x, torch.Tensor):
+        t = x
+    else:
+        t = torch.from_numpy(np.asarray(x))
+
+    if t.ndim == 3:
+        # HWC or CHW -> add batch
+        t = t.unsqueeze(0)
+    if t.ndim != 4:
+        raise ValueError(f"Expected 3D/4D image tensor/array, got shape={tuple(t.shape)}")
+
+    # If last dim looks like channels -> BHWC -> BCHW
+    if t.shape[-1] in (1, 3) and t.shape[1] not in (1, 3):
+        t = t.permute(0, 3, 1, 2).contiguous()
+    # Else assume already BCHW (or ambiguous)
+
+    if t.dtype != torch.uint8:
+        # If floats in [0,1] or [-1,1], bring to uint8 best-effort
+        if t.is_floating_point():
+            t = t.to(torch.float32)
+            t = torch.clamp(t, 0.0, 1.0) if t.max() <= 1.0 else torch.clamp(t, -1.0, 1.0) * 0.5 + 0.5
+            t = torch.round(t * 255.0).to(torch.uint8)
+        else:
+            t = t.to(torch.uint8)
+    return t
+
+
+def _bchw_u8_to_list_hwc_u8(img_bchw_u8: torch.Tensor) -> List[np.ndarray]:
+    """
+    Convert uint8 BCHW torch tensor to a list of uint8 HWC numpy arrays.
+    """
+    if img_bchw_u8.ndim != 4:
+        raise ValueError(f"Expected BCHW, got {tuple(img_bchw_u8.shape)}")
+    if img_bchw_u8.dtype != torch.uint8:
+        raise ValueError(f"Expected uint8 image tensor, got {img_bchw_u8.dtype}")
+    x = img_bchw_u8.permute(0, 2, 3, 1).contiguous().cpu().numpy()  # BHWC uint8
+    return [x[i] for i in range(x.shape[0])]
+
+
+def _load_hpt_dinov3(model_id: str, *, output_dim: int, device: str):
+    """
+    Load DINOv3 backbone via `DinoV3` from egomimic's HPT code, but keep a HF
+    `AutoImageProcessor` for correct pixel preprocessing.
+
+    `DinoV3.forward` expects input shaped [B, T, N, 3, H, W] and returns projected
+    token embeddings shaped [(B*T*N), num_tokens, output_dim].
+    """
+    try:
+        from transformers import AutoImageProcessor
+    except Exception as e:
+        raise RuntimeError(
+            "Transformers is required for HF DINO models. Install with `pip install transformers`."
+        ) from e
+
+    processor = AutoImageProcessor.from_pretrained(model_id)
+    stem = DinoV3(output_dim=output_dim, model_type=model_id, freeze_backbone=True)
+    stem.eval()
+    stem.to(device)
+    return processor, stem
+
+
+@torch.no_grad()
+def _embed_batch_dinov3(processor, stem: torch.nn.Module, images_hwc_u8: List[np.ndarray], device: str) -> torch.Tensor:
+    """
+    Returns (B, D) embeddings (CLS token) using:
+    - HF processor -> pixel_values (B,C,H,W)
+    - DinoV3 stem  -> token embeddings, then take token 0 (CLS)
+    """
+    inputs = processor(images=images_hwc_u8, return_tensors="pt")
+    if "pixel_values" not in inputs:
+        raise RuntimeError("HF processor did not return 'pixel_values'.")
+    pixel_values = inputs["pixel_values"].to(device)  # (B,C,H,W), float
+    # DinoV3 expects [B, T, N, C, H, W]; we treat each frame as T=1, N=1
+    x = pixel_values.unsqueeze(1).unsqueeze(2)
+    tok = stem(x)  # (B, num_tokens, D) because B*T*N == B
+    if tok.ndim != 3:
+        raise RuntimeError(f"Unexpected DinoV3 output shape: {tuple(tok.shape)}")
+    return tok[:, 0, :]  # CLS token
+
+
+def _flatten_metadata(sample: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Make a metadata dict that is safe for DataFrame/Parquet.
+    We keep common RLDB/LeRobot fields if present and also include any `metadata.*` keys.
+    """
+    out: Dict[str, Any] = {}
+
+    # Common keys we expect in LeRobot datasets
+    for k in ("episode_index", "frame_index", "timestamp", "annotations", "task", "task_description"):
+        if k in sample:
+            out[k] = sample[k]
+
+    # Some datasets include these:
+    for k in ("dataset_index", "index", "step", "episode_id"):
+        if k in sample and k not in out:
+            out[k] = sample[k]
+
+    # Include all metadata.* keys (e.g. metadata.embodiment, etc.)
+    for k, v in sample.items():
+        if isinstance(k, str) and k.startswith("metadata."):
+            out[k] = v
+
+    # Make sure tensors/numpy become scalars where appropriate
+    for k, v in list(out.items()):
+        if isinstance(v, torch.Tensor):
+            if v.numel() == 1:
+                out[k] = v.item()
+            else:
+                out[k] = v.detach().cpu().numpy()
+        elif isinstance(v, np.ndarray):
+            if v.size == 1:
+                out[k] = v.item()
+    return out
+
+
+def _py_scalar(v: Any) -> Any:
+    """Best-effort conversion for pandas/numpy scalars for Parquet friendliness."""
+    try:
+        import pandas as _pd  # local import to avoid hard dependency patterns
+
+        if isinstance(v, _pd.Timestamp):
+            return v.isoformat()
+    except Exception:
+        pass
+
+    # numpy scalar -> python scalar
+    try:
+        if isinstance(v, np.generic):
+            return v.item()
+    except Exception:
+        pass
+
+    return v
+
+
+def _instantiate_hydra(cfg_path: str):
+    """
+    Instantiate a dataset from a Hydra-style YAML config.
+
+    Example:
+      cfg_path="egomimic/hydra_configs/data/viz_data.yaml"
+    """
+    try:
+        from hydra.utils import instantiate
+        from omegaconf import OmegaConf
+    except Exception as e:
+        raise RuntimeError(
+            "Hydra instantiation requires `hydra-core` and `omegaconf`."
+        ) from e
+
+    cfg = OmegaConf.load(cfg_path)
+    return instantiate(cfg)
+
+
+def _ensure_out_dir(out_dir: Path) -> None:
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument(
+        "--embodiment",
+        type=str,
+        default="",
+        help='e.g. "eva_right_arm" or "eva_bimanual". Required unless using --data-config.',
+    )
+    ap.add_argument("--out-dir", type=str, default="egomimic/scripts/visualization_process/data2")
+    ap.add_argument(
+        "--data-config",
+        type=str,
+        default="",
+        help=(
+            "Optional Hydra YAML path for dataset instantiation, e.g. "
+            "egomimic/hydra_configs/data/viz_data.yaml. If set, the dataset is "
+            "created via hydra `instantiate()` from --data-split/--dataset-name."
+        ),
+    )
+    ap.add_argument(
+        "--image-keys",
+        type=str,
+        nargs="+",
+        default=["observations.images.front_img_1"],
+        help="LeRobot image keys to embed (can pass multiple).",
+    )
+    ap.add_argument(
+        "--model",
+        type=str,
+        default="facebook/dinov3-vitl16-pretrain-lvd1689m",
+        help="HuggingFace model id for DINO (e.g. facebook/dinov3-vitl16-pretrain-lvd1689m).",
+    )
+    ap.add_argument("--batch-size", type=int, default=240)
+    ap.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu")
+    ap.add_argument("--num-frames", type=int, default=-1, help="Limit number of frames for debugging")
+    ap.add_argument(
+        "--every-k-datapoint",
+        type=int,
+        default=15,
+        help="Keep only every k-th datapoint (0,k,2k,...) to reduce compute. Set to 1 to keep all.",
+    )
+    ap.add_argument(
+        "--debug-first-batch",
+        action="store_true",
+        help="Process/save only the first batch, then exit (useful for debugging).",
+    )
+
+    ap.add_argument(
+        "--embed-store",
+        type=str,
+        default="zarr",
+        choices=["zarr", "npy"],
+        help="Embedding storage format. zarr is chunked; npy is memmap.",
+    )
+    ap.add_argument("--embed-dtype", type=str, default="float16", choices=["float16", "float32"])
+    ap.add_argument("--chunk-size", type=int, default=8192, help="Write chunk size for zarr")
+
+    args = ap.parse_args()
+
+    out_dir = Path(args.out_dir)
+    _ensure_out_dir(out_dir)
+
+
+    # Dataset instantiation
+    # If --data-config is provided, treat it as a MultiDataModuleWrapper-style config
+    # and ONLY use its train_datasets (ignore valid_datasets entirely).
+    dataset_dict: Dict[str, torch.utils.data.Dataset] = {}
+    if args.data_config:
+        cfg = OmegaConf.load(args.data_config)
+        cfg_data = cfg.data if ("data" in cfg and "train_datasets" in cfg.data) else cfg
+        if "train_datasets" not in cfg_data:
+            raise KeyError(
+                "Expected 'train_datasets' in --data-config (or in data.train_datasets)."
+            )
+        for dataset_name, ds_cfg in cfg_data.train_datasets.items():
+            dataset_dict[str(dataset_name)] = hydra.utils.instantiate(ds_cfg)
+    else:
+        # CLI-configured dataset; instantiation triggers S3 sync + local load.
+        if not args.embodiment:
+            raise ValueError("--embodiment is required when not using --data-config")
+        filters = {"task": "fold_clothes"}
+        ds = S3RLDBDataset(embodiment=args.embodiment, mode="total", filters=filters)
+        dataset_dict[str(args.embodiment)] = ds
+
+    if not dataset_dict:
+        raise RuntimeError("No datasets were instantiated.")
+
+    dataset_names = list(dataset_dict.keys())
+
+    # Compute effective per-dataset lengths + global offsets into the shared embedding array
+    per_dataset_n: Dict[str, int] = {}
+    per_dataset_offset: Dict[str, int] = {}
+    per_dataset_keep_indices: Dict[str, List[int]] = {}
+    running = 0
+    k_stride = int(args.every_k_datapoint)
+    if k_stride <= 0:
+        k_stride = 1
+    for dataset_name in dataset_names:
+        ds_i = dataset_dict[dataset_name]
+        n_i = len(ds_i)
+        if args.num_frames > 0:
+            n_i = min(n_i, args.num_frames)
+        if args.debug_first_batch:
+            n_i = min(n_i, args.batch_size)
+        keep_idx = list(range(0, n_i, k_stride))
+        per_dataset_keep_indices[dataset_name] = keep_idx
+        per_dataset_offset[dataset_name] = running
+        per_dataset_n[dataset_name] = len(keep_idx)
+        running += len(keep_idx)
+
+    n_total = running
+    print(
+        "[INFO] Using {} train datasets; total frames to process = {}".format(
+            len(dataset_names), n_total
+        )
+    )
+
+    # Model (HPT DinoV3 stem + HF processor)
+    # If the CLI flag was removed, default to 1024 (common for ViT-L features).
+    dino_output_dim = getattr(args, "dino_output_dim", 1024)
+    processor, stem = _load_hpt_dinov3(
+        args.model, output_dim=dino_output_dim, device=args.device
+    )
+
+    # Probe embedding dim
+    first = dataset_dict[dataset_names[0]][0]
+    probe_key = args.image_keys[0]
+    if probe_key not in first:
+        raise KeyError(
+            f"Image key '{probe_key}' not found in sample. Available keys (truncated): {list(first.keys())[:30]}"
+        )
+    probe_img = _image_to_torch_uint8_bchw(first[probe_key])  # uint8 BCHW
+    probe_list = _bchw_u8_to_list_hwc_u8(probe_img)
+    probe_emb = _embed_batch_dinov3(processor, stem, probe_list, args.device)
+    emb_dim = int(probe_emb.shape[-1])
+    print(f"[INFO] Embedding dim: {emb_dim}")
+
+    embed_dtype = np.float16 if args.embed_dtype == "float16" else np.float32
+
+    # Storage setup
+    embed_paths = {}
+    embed_writers = {}
+    for k in args.image_keys:
+        safe_name = k.replace("/", "_").replace(".", "_")
+        if args.embed_store == "npy":
+            path = out_dir / f"embeddings__{safe_name}.npy"
+            arr = np.memmap(path, mode="w+", dtype=embed_dtype, shape=(n_total, emb_dim))
+            embed_paths[k] = path
+            embed_writers[k] = arr
+        else:
+            # zarr
+            try:
+                import zarr
+            except Exception as e:
+                raise RuntimeError(
+                    "zarr is not installed but --embed-store=zarr was requested. "
+                    "Either install zarr (pip install zarr numcodecs) or use --embed-store npy."
+                ) from e
+            path = out_dir / f"embeddings__{safe_name}.zarr"
+            root = zarr.open_group(str(path), mode="w")
+            # Chunk over first dim
+            chunks = (min(args.chunk_size, n_total), emb_dim)
+            root.create_dataset(
+                "embeddings",
+                shape=(n_total, emb_dim),
+                chunks=chunks,
+                dtype=embed_dtype,
+                overwrite=True,
+            )
+            embed_paths[k] = path
+            embed_writers[k] = root["embeddings"]
+
+    # Metadata rows (we’ll write parquet at the end; for huge datasets you can switch to incremental writing)
+    meta_rows: List[Dict[str, Any]] = []
+
+    engine = create_default_engine()
+    df = episode_table_to_df(engine)
+    # Cache episode-level DB metadata by episode_hash for fast per-frame lookup.
+    # We prefix these keys as "db.*" when writing per-frame metadata rows.
+    episode_meta_by_hash: Dict[str, Dict[str, Any]] = {}
+    if "episode_hash" in df.columns:
+        df_unique = df.drop_duplicates(subset=["episode_hash"])
+        for row in df_unique.to_dict(orient="records"):
+            ep_hash = row.get("episode_hash")
+            if ep_hash is None:
+                continue
+            # store sanitized scalars
+            episode_meta_by_hash[str(ep_hash)] = {k: _py_scalar(v) for k, v in row.items()}
+
+    # Batch loop across train datasets, writing into one shared embeddings array per image key
+    bs = args.batch_size
+    processed = 0
+    for dataset_name in dataset_names:
+        ds = dataset_dict[dataset_name]
+        keep_idx = per_dataset_keep_indices[dataset_name]
+        n_eff = len(keep_idx)
+        offset = per_dataset_offset[dataset_name]
+
+        # Only load/process the kept indices (efficient: filters before model forward)
+        ds_for_loader = ds if (n_eff == len(ds) and keep_idx == list(range(len(ds)))) else Subset(ds, keep_idx)
+        loader = DataLoader(
+            ds_for_loader,
+            batch_size=bs,
+            shuffle=False,
+            num_workers=8,
+            collate_fn=lambda batch: batch,  # keep list[dict] (no tensor stacking)
+        )
+
+        for batch_idx, batch_samples in enumerate(loader):
+            start = batch_idx * bs
+            end = start + len(batch_samples)
+            global_start = offset + start
+            global_end = offset + end
+
+            # metadata
+            for i, sample in enumerate(batch_samples):
+                m = _flatten_metadata(sample)
+                m["dataset_name"] = dataset_name
+                m["dataset_offset"] = offset
+                # Index within the Subset (i.e., after every-k subsample), then map back
+                # to the original dataset index.
+                subset_pos = batch_idx * bs + i  # == start + i
+                orig_ds_idx = keep_idx[subset_pos] if subset_pos < len(keep_idx) else subset_pos
+                m["dataset_local_index"] = int(orig_ds_idx)
+                m["embedding_global_index"] = int(global_start + i)
+
+                # Per-sample index_map lookup (batch may span multiple episodes).
+                try:
+                    idx_map_name, _ = ds.index_map[int(orig_ds_idx)]
+                    if isinstance(idx_map_name, MultiRLDBDataset):
+                        raise ValueError("idx_map_name is a MultiRLDBDataset, which is not supported")
+                    ep_hash = str(idx_map_name)
+                    m["episode_hash"] = ep_hash
+
+                    # Attach episode-level DB metadata (same for all frames in an episode)
+                    db_row = episode_meta_by_hash.get(ep_hash)
+                    if db_row:
+                        for k, v in db_row.items():
+                            if k == "episode_hash":
+                                continue
+                            m[str(k)] = v
+                except Exception:
+                    pass
+
+                meta_rows.append(m)
+
+            # embeddings per image key
+            for key in args.image_keys:
+                imgs_bchw = []
+                for sample in batch_samples:
+                    if key not in sample:
+                        raise KeyError(
+                            f"Missing image key '{key}' in sample. Keys: {list(sample.keys())[:30]}"
+                        )
+                    imgs_bchw.append(_image_to_torch_uint8_bchw(sample[key]))
+                img_bchw = torch.cat(imgs_bchw, dim=0)  # uint8 BCHW on CPU
+                images_hwc = _bchw_u8_to_list_hwc_u8(img_bchw)  # list[np.uint8 HWC]
+                emb_t = _embed_batch_dinov3(processor, stem, images_hwc, args.device)
+                emb = emb_t.detach().cpu().numpy().astype(embed_dtype, copy=False)
+
+                writer = embed_writers[key]
+                writer[global_start:global_end, :] = emb
+
+            processed = global_end
+            if (processed // bs) % 10 == 0:
+                print(f"[INFO] Processed {processed}/{n_total}")
+
+            if args.debug_first_batch:
+                print("[DEBUG] Exiting after first batch (--debug-first-batch).")
+                break
+
+        if args.debug_first_batch:
+            break
+
+    # Finalize memmaps
+    if args.embed_store == "npy":
+        for k, arr in embed_writers.items():
+            if isinstance(arr, np.memmap):
+                arr.flush()
+
+    # Write metadata
+    meta_df = pd.DataFrame(meta_rows)
+    meta_path = out_dir / "metadata.parquet"
+    meta_df.to_parquet(meta_path, index=False)
+
+    # Small manifest
+    manifest = {
+        "n_frames": n_total,
+        "embedding_dim": emb_dim,
+        "image_keys": list(args.image_keys),
+        "embed_store": args.embed_store,
+        "embed_dtype": args.embed_dtype,
+        "every_k_datapoint": int(args.every_k_datapoint),
+        "embeddings": {k: str(p) for k, p in embed_paths.items()},
+        "metadata_parquet": str(meta_path),
+        "datasets": {
+            name: {
+                "n_frames": int(per_dataset_n[name]),
+                "offset": int(per_dataset_offset[name]),
+            }
+            for name in dataset_names
+        },
+    }
+    (out_dir / "manifest.json").write_text(json.dumps(manifest, indent=2))
+
+    print(f"[DONE] Wrote metadata: {meta_path}")
+    for k, p in embed_paths.items():
+        print(f"[DONE] Wrote embeddings for {k}: {p}")
+    print(f"[DONE] Wrote manifest: {out_dir / 'manifest.json'}")
+
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/egomimic/scripts/visualization_process/process_image.sbatch b/egomimic/scripts/visualization_process/process_image.sbatch
new file mode 100644
index 00000000..c740205b
--- /dev/null
+++ b/egomimic/scripts/visualization_process/process_image.sbatch
@@ -0,0 +1,12 @@
+#!/bin/bash
+#SBATCH --partition=hoffman-lab
+#SBATCH --account=hoffman-lab
+#SBATCH --nodes=1
+#SBATCH --gres=gpu:a40:1
+#SBATCH --cpus-per-task=12
+#SBATCH --output=logs/process_image/%j.out
+cd /nethome/paphiwetsa3/flash/projects/EgoVerse
+
+source .venv/bin/activate
+
+srun --cpus-per-task=12 python egomimic/scripts/visualization_process/process_image.py --data-config /nethome/paphiwetsa3/flash/projects/EgoVerse/egomimic/hydra_configs/data/clothe_eva_aria_mecka.yaml --out-dir egomimic/scripts/visualization_process/fold_clothes_aria_eva_mecka_all
diff --git a/egomimic/scripts/visualization_process/visualization.py b/egomimic/scripts/visualization_process/visualization.py
new file mode 100644
index 00000000..d1fcb490
--- /dev/null
+++ b/egomimic/scripts/visualization_process/visualization.py
@@ -0,0 +1,339 @@
+"""
+Gigantic 2D t-SNE scatter plot colored by a chosen metadata column.
+
+Reads:
+- manifest.json (for zarr + metadata paths)
+- metadata.parquet (label column is configurable; defaults to lab-like columns)
+- embeddings zarr group (expects dataset 'tsne_2d' by default)
+
+Writes:
+- a large PNG scatter plot to the data directory
+"""
+
+import argparse
+import json
+from pathlib import Path
+import re
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import zarr
+
+
+mpl.rcParams["font.family"] = "monospace"
+mpl.rcParams["font.monospace"] = [
+    "SF Mono",
+    "Menlo",
+    "Monaco",
+    "Source Code Pro",
+    "IBM Plex Mono",
+    "DejaVu Sans Mono",
+    "Liberation Mono",
+]
+
+
+def _pick_label_column(df: pd.DataFrame, label_col: str) -> str:
+    """
+    Resolve which metadata column to use as labels/colors.
+    - If label_col is provided, require it to exist.
+    - Else, fall back to common "lab" column names.
+    """
+    if label_col:
+        if label_col not in df.columns:
+            raise KeyError(
+                "Requested --label-col '{}' not found in metadata. Available columns (truncated): {}".format(
+                    label_col, list(df.columns)[:50]
+                )
+            )
+        return label_col
+
+    for c in ("lab", "db.lab", "metadata.lab"):
+        if c in df.columns:
+            return c
+    raise KeyError(
+        "Could not infer a default label column. Tried: lab, db.lab, metadata.lab. "
+        "Pass --label-col to choose a column explicitly. Available columns (truncated): {}".format(
+            list(df.columns)[:50]
+        )
+    )
+
+
+def _load_omit_configs(*, omit_configs_json: str, omit_configs_file: str) -> list[dict]:
+    """
+    Loads omit configs as a list of dicts.
+
+    Semantics:
+    - Each dict is a conjunction (AND) of column==value matches.
+    - The list is a disjunction (OR) across dicts.
+    - Any row matching ANY omit dict is removed from the plot.
+    """
+
+    omit_configs: list[dict] = []
+
+    if omit_configs_json:
+        parsed = json.loads(omit_configs_json)
+        if not isinstance(parsed, list) or not all(isinstance(x, dict) for x in parsed):
+            raise TypeError("--omit-configs-json must be a JSON list of dicts")
+        omit_configs.extend(parsed)
+
+    if omit_configs_file:
+        p = Path(omit_configs_file)
+        parsed = json.loads(p.read_text())
+        if not isinstance(parsed, list) or not all(isinstance(x, dict) for x in parsed):
+            raise TypeError("--omit-configs-file must point to a JSON file containing a list of dicts")
+        omit_configs.extend(parsed)
+
+    # normalize any weird entries (e.g. empty dicts)
+    omit_configs = [d for d in omit_configs if len(d) > 0]
+    return omit_configs
+
+
+def _apply_omit_configs(
+    meta_df: pd.DataFrame, y: np.ndarray, *, omit_configs: list[dict]
+) -> tuple[pd.DataFrame, np.ndarray]:
+    if not omit_configs:
+        return meta_df, y
+
+    for i, cfg in enumerate(omit_configs):
+        missing = [k for k in cfg.keys() if k not in meta_df.columns]
+        if missing:
+            raise KeyError(
+                "omit_configs[{}] refers to missing columns: {}. Available columns (truncated): {}".format(
+                    i, missing, list(meta_df.columns)[:50]
+                )
+            )
+
+    omit_mask = np.zeros(len(meta_df), dtype=bool)
+    for cfg in omit_configs:
+        m = pd.Series(True, index=meta_df.index)
+        for k, v in cfg.items():
+            col = meta_df[k]
+            if v is None:
+                m = m & col.isna()
+            elif isinstance(v, str):
+                m = m & (col.astype(str) == v)
+            else:
+                m = m & (col == v)
+        omit_mask |= m.to_numpy(dtype=bool)
+
+    keep_mask = ~omit_mask
+    kept = int(keep_mask.sum())
+    removed = int(omit_mask.sum())
+    print(
+        "[INFO] omit_configs removed {} / {} rows (kept {})".format(
+            removed, len(meta_df), kept
+        )
+    )
+    meta_df = meta_df.loc[keep_mask].reset_index(drop=True)
+    y = y[keep_mask]
+    return meta_df, y
+
+
+def _apply_sample_every_k(
+    meta_df: pd.DataFrame, y: np.ndarray, *, sample_every_k: int
+) -> tuple[pd.DataFrame, np.ndarray]:
+    if sample_every_k <= 1:
+        return meta_df, y
+    meta_df = meta_df.iloc[::sample_every_k].reset_index(drop=True)
+    y = y[::sample_every_k]
+    print(
+        "[INFO] sample_every_k={} kept {} / {} rows".format(
+            sample_every_k, len(meta_df), len(y) * sample_every_k
+        )
+    )
+    return meta_df, y
+
+
+def _safe_filename(s: str, *, max_len: int = 120) -> str:
+    s = s.strip()
+    # Replace whitespace with underscores
+    s = re.sub(r"\s+", "_", s)
+    # Keep only common safe characters
+    s = re.sub(r"[^A-Za-z0-9._-]+", "_", s)
+    # Collapse repeats and trim
+    s = re.sub(r"_+", "_", s).strip("._-")
+    if not s:
+        s = "plot"
+    if len(s) > max_len:
+        s = s[:max_len].rstrip("._-")
+    return s
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument(
+        "--manifest",
+        type=str,
+        default="egomimic/scripts/visualization_process/fold_clothes_aria_eva_all_labs/manifest.json",
+    )
+    ap.add_argument("--image-key", type=str, default="", help="Defaults to first manifest image key.")
+    ap.add_argument(
+        "--reduce-method",
+        type=str,
+        default="tsne",
+        choices=("tsne", "umap", "pca"),
+        help="Which 2D reduction result to visualize (selects <method>_2d by default).",
+    )
+    ap.add_argument(
+        "--reduce-name",
+        dest="reduce_name",
+        type=str,
+        default=None,
+        help="Dataset name inside the zarr group to visualize (overrides --reduce-method).",
+    )
+    # Backwards-compatible alias (tsne-name historically meant "which 2D coords dataset to plot")
+    ap.add_argument(
+        "--tsne-name",
+        dest="reduce_name",
+        type=str,
+        default=None,
+        help="(Deprecated) Same as --reduce-name.",
+    )
+    ap.add_argument(
+        "--label-col",
+        type=str,
+        default="robot_name",
+        help=(
+            "Metadata column to color points by (e.g. 'lab', 'db.operator', 'task', 'episode_hash'). "
+            "If omitted, tries lab-like columns: lab, db.lab, metadata.lab."
+        ),
+    )
+    ap.add_argument("--out", type=str, default="", help="Output png path (defaults next to manifest).")
+    ap.add_argument("--figsize", type=float, nargs=2, default=(12, 12), help="Figure size in inches (W H).")
+    ap.add_argument("--dpi", type=int, default=400)
+    ap.add_argument("--point-size", type=float, default=40.0)
+    ap.add_argument("--alpha", type=float, default=0.2)
+    ap.add_argument(
+        "--title",
+        type=str,
+        default="",
+        help="If provided, overrides the default plot title.",
+    )
+    ap.add_argument(
+        "--omit-configs-json",
+        type=str,
+        default="",
+        help=(
+            "JSON list of dicts specifying metadata rows to OMIT. "
+            "Example: '[{\"robot_name\":\"eva_bimanual\"}, {\"lab\":\"song\",\"operator\":\"rl2\"}]'. "
+            "Each dict is an AND across keys; the list is OR across dicts."
+        ),
+    )
+    ap.add_argument(
+        "--omit-configs-file",
+        type=str,
+        default="",
+        help="Path to a JSON file containing a list of dicts (same format as --omit-configs-json).",
+    )
+    ap.add_argument(
+        "--sample-every-k",
+        type=int,
+        default=1,
+        help="Keep every k-th datapoint (applied after omit filters). Use 1 to disable.",
+    )
+    args = ap.parse_args()
+
+    manifest_path = Path(args.manifest)
+    manifest = json.loads(manifest_path.read_text())
+
+    if args.image_key:
+        image_key = args.image_key
+    else:
+        image_key = manifest["image_keys"][0]
+
+    zarr_path = Path(manifest["embeddings"][image_key])
+    meta_path = Path(manifest["metadata_parquet"])
+
+    meta_df = pd.read_parquet(meta_path)
+    label_col = _pick_label_column(meta_df, args.label_col)
+
+    root = zarr.open_group(str(zarr_path), mode="r")
+    reduce_name = args.reduce_name if args.reduce_name else f"{args.reduce_method}_2d"
+    if reduce_name not in root:
+        raise KeyError(
+            "Could not find '{}' in zarr group. Available arrays: {}".format(
+                reduce_name, list(root.array_keys())
+            )
+        )
+    y = np.asarray(root[reduce_name][:])  # (N,2)
+    if y.ndim != 2 or y.shape[1] != 2:
+        raise RuntimeError("Unexpected 2D reduction shape for '{}': {}".format(reduce_name, y.shape))
+
+    if len(meta_df) != y.shape[0]:
+        raise RuntimeError(
+            "Row mismatch: metadata has {} rows but '{}' has {} rows".format(
+                len(meta_df), reduce_name, y.shape[0]
+            )
+        )
+
+    omit_configs = _load_omit_configs(
+        omit_configs_json=args.omit_configs_json,
+        omit_configs_file=args.omit_configs_file,
+    )
+    meta_df, y = _apply_omit_configs(meta_df, y, omit_configs=omit_configs)
+    meta_df, y = _apply_sample_every_k(meta_df, y, sample_every_k=args.sample_every_k)
+
+    labels = meta_df[label_col].astype(str).fillna("unknown").to_numpy()
+    uniq_labels, label_codes = np.unique(labels, return_inverse=True)
+
+    # Build a categorical colormap with enough distinct colors
+    cmap = plt.get_cmap("tab20", max(1, len(uniq_labels)))
+
+    fig, ax = plt.subplots(figsize=tuple(args.figsize), dpi=args.dpi)
+    ax.scatter(
+        y[:, 0],
+        y[:, 1],
+        c=label_codes,
+        cmap=cmap,
+        s=args.point_size,
+        alpha=args.alpha,
+        linewidths=0,
+        rasterized=True,
+    )
+
+    if args.title:
+        title = args.title
+    else:
+        title = "t-SNE of embeddings (colored by {}: {})".format("label", label_col)
+    # Title at the very top (above legend + axes)
+    fig.suptitle(title, y=0.99, fontsize=24)
+    ax.grid(False)
+
+    # Legend (label key): place at top, horizontal layout (figure-level for tighter spacing)
+    handles = [
+        plt.Line2D([0], [0], marker="o", linestyle="", color=cmap(i), markersize=6)
+        for i in range(len(uniq_labels))
+    ]
+    ncol = min(max(1, len(uniq_labels)), 10)
+    fig.legend(
+        handles,
+        uniq_labels.tolist(),
+        loc="upper center",
+        bbox_to_anchor=(0.5, 0.96),
+        frameon=False,
+        fontsize=16,
+        ncol=ncol,
+        borderaxespad=0.0,
+        columnspacing=1.0,
+    )
+
+    # Reserve minimal top space for suptitle + legend
+    fig.tight_layout(rect=(0.0, 0.0, 1.0, 0.97))
+
+    if args.out:
+        out_path = Path(args.out)
+    else:
+        if args.title:
+            out_path = manifest_path.parent / f"{_safe_filename(args.title)}.png"
+        else:
+            safe_label = label_col.replace("/", "_").replace(".", "_")
+            out_path = manifest_path.parent / f"tsne_by_{safe_label}.png"
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(out_path, bbox_inches="tight")
+    print("[DONE] wrote", out_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egomimic/scripts/visualization_process/visualization_clothes_embodiment.py b/egomimic/scripts/visualization_process/visualization_clothes_embodiment.py
new file mode 100644
index 00000000..4b0c6776
--- /dev/null
+++ b/egomimic/scripts/visualization_process/visualization_clothes_embodiment.py
@@ -0,0 +1,514 @@
+"""
+Gigantic 2D t-SNE scatter plot colored by a chosen metadata column.
+
+Reads:
+- manifest.json (for zarr + metadata paths)
+- metadata.parquet (label column is configurable; defaults to lab-like columns)
+- embeddings zarr group (expects dataset 'tsne_2d' by default)
+
+Writes:
+- a large PNG scatter plot to the data directory
+
+Plot config notes:
+- `plot_background_color`: figure/axes background (e.g. "#ecdbc7"). Empty/None disables.
+- `plot_background_alpha`: optional float in [0,1] (defaults to 1.0).
+"""
+
+import argparse
+import json
+from pathlib import Path
+import re
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from matplotlib.colors import to_rgba
+import numpy as np
+import pandas as pd
+import zarr
+
+
+mpl.rcParams["font.family"] = "monospace"
+mpl.rcParams["font.monospace"] = [
+    "SF Mono",
+    "Menlo",
+    "Monaco",
+    "Source Code Pro",
+    "IBM Plex Mono",
+    "DejaVu Sans Mono",
+    "Liberation Mono",
+]
+
+
+def _pick_label_column(df: pd.DataFrame, label_col: str) -> str:
+    """
+    Resolve which metadata column to use as labels/colors.
+    - If label_col is provided, require it to exist.
+    - Else, fall back to common "lab" column names.
+    """
+    if label_col:
+        if label_col not in df.columns:
+            raise KeyError(
+                "Requested --label-col '{}' not found in metadata. Available columns (truncated): {}".format(
+                    label_col, list(df.columns)[:50]
+                )
+            )
+        return label_col
+
+    for c in ("lab", "db.lab", "metadata.lab"):
+        if c in df.columns:
+            return c
+    raise KeyError(
+        "Could not infer a default label column. Tried: lab, db.lab, metadata.lab. "
+        "Pass --label-col to choose a column explicitly. Available columns (truncated): {}".format(
+            list(df.columns)[:50]
+        )
+    )
+
+
+def _load_omit_configs(*, omit_configs_json: str, omit_configs_file: str) -> list[dict]:
+    """
+    Loads omit configs as a list of dicts.
+
+    Semantics:
+    - Each dict is a conjunction (AND) of column==value matches.
+    - The list is a disjunction (OR) across dicts.
+    - Any row matching ANY omit dict is removed from the plot.
+    """
+
+    omit_configs: list[dict] = []
+
+    if omit_configs_json:
+        parsed = json.loads(omit_configs_json)
+        if not isinstance(parsed, list) or not all(isinstance(x, dict) for x in parsed):
+            raise TypeError("--omit-configs-json must be a JSON list of dicts")
+        omit_configs.extend(parsed)
+
+    if omit_configs_file:
+        p = Path(omit_configs_file)
+        parsed = json.loads(p.read_text())
+        if not isinstance(parsed, list) or not all(isinstance(x, dict) for x in parsed):
+            raise TypeError("--omit-configs-file must point to a JSON file containing a list of dicts")
+        omit_configs.extend(parsed)
+
+    # normalize any weird entries (e.g. empty dicts)
+    omit_configs = [d for d in omit_configs if len(d) > 0]
+    return omit_configs
+
+
+def _apply_omit_configs(
+    meta_df: pd.DataFrame, y: np.ndarray, *, omit_configs: list[dict]
+) -> tuple[pd.DataFrame, np.ndarray]:
+    if not omit_configs:
+        return meta_df, y
+
+    for i, cfg in enumerate(omit_configs):
+        missing = [k for k in cfg.keys() if k not in meta_df.columns]
+        if missing:
+            raise KeyError(
+                "omit_configs[{}] refers to missing columns: {}. Available columns (truncated): {}".format(
+                    i, missing, list(meta_df.columns)[:50]
+                )
+            )
+
+    omit_mask = np.zeros(len(meta_df), dtype=bool)
+    for cfg in omit_configs:
+        m = pd.Series(True, index=meta_df.index)
+        for k, v in cfg.items():
+            col = meta_df[k]
+            if v is None:
+                m = m & col.isna()
+            elif isinstance(v, str):
+                m = m & (col.astype(str) == v)
+            else:
+                m = m & (col == v)
+        omit_mask |= m.to_numpy(dtype=bool)
+
+    keep_mask = ~omit_mask
+    kept = int(keep_mask.sum())
+    removed = int(omit_mask.sum())
+    print(
+        "[INFO] omit_configs removed {} / {} rows (kept {})".format(
+            removed, len(meta_df), kept
+        )
+    )
+    meta_df = meta_df.loc[keep_mask].reset_index(drop=True)
+    y = y[keep_mask]
+    return meta_df, y
+
+
+def _apply_sample_every_k(
+    meta_df: pd.DataFrame, y: np.ndarray, *, sample_every_k: int
+) -> tuple[pd.DataFrame, np.ndarray]:
+    if sample_every_k <= 1:
+        return meta_df, y
+    before = len(meta_df)
+    meta_df = meta_df.iloc[::sample_every_k].reset_index(drop=True)
+    y = y[::sample_every_k]
+    print(
+        "[INFO] sample_every_k={} kept {} / {} rows".format(
+            sample_every_k, len(meta_df), before
+        )
+    )
+    return meta_df, y
+
+
+def _load_plot_config(*, plot_config_json: str, plot_config_file: str) -> dict:
+    """
+    Load a plotting config dict.
+
+    Supported schema (both forms accepted):
+    - {"label_col": "robot_name",
+       "label_col_name": [{"eva_bimanual": {"color": "#...", "legend_name": "Robot"}}, ...]}
+    - {"label_col": "robot_name",
+       "label_col_name": {"eva_bimanual": {"color": "#...", "legend_name": "Robot"}, ...}}
+    """
+    cfg: dict = {}
+    if plot_config_json:
+        cfg = json.loads(plot_config_json)
+        if not isinstance(cfg, dict):
+            raise TypeError("--plot-config-json must be a JSON object (dict)")
+        return cfg
+    if plot_config_file:
+        p = Path(plot_config_file)
+        cfg = json.loads(p.read_text())
+        if not isinstance(cfg, dict):
+            raise TypeError("--plot-config-file must point to a JSON file containing an object (dict)")
+        return cfg
+    return {}
+
+
+def _normalize_label_styles(plot_cfg: dict) -> tuple[list[str], dict[str, dict]]:
+    """
+    Returns (ordered_label_values, label_value->style_dict).
+    """
+    label_col_name = plot_cfg.get("label_col_name", None)
+    if not label_col_name:
+        return [], {}
+
+    if isinstance(label_col_name, dict):
+        ordered = list(label_col_name.keys())
+        styles = label_col_name
+    elif isinstance(label_col_name, list):
+        ordered = []
+        styles = {}
+        for entry in label_col_name:
+            if not isinstance(entry, dict) or len(entry) != 1:
+                raise TypeError(
+                    "plot_config['label_col_name'] entries must be dicts with a single key, got: {}".format(
+                        entry
+                    )
+                )
+            (k, v), = entry.items()
+            ordered.append(str(k))
+            styles[str(k)] = v if isinstance(v, dict) else {}
+    else:
+        raise TypeError(
+            "plot_config['label_col_name'] must be a dict or list, got: {}".format(type(label_col_name))
+        )
+    return ordered, {str(k): (v if isinstance(v, dict) else {}) for k, v in styles.items()}
+
+
+def _build_colors_and_legend(
+    labels: np.ndarray,
+    *,
+    ordered_styles: list[str],
+    style_map: dict[str, dict],
+) -> tuple[np.ndarray, list[plt.Line2D], list[str]]:
+    """
+    Returns (per_point_rgba Nx4, legend_handles, legend_names).
+
+    - Labels listed in ordered_styles get their provided colors (if any) and legend names (if any).
+    - Remaining labels get colors from tab20.
+    - Legend order: ordered_styles first (if present in data), then remaining in first-seen order.
+    """
+    labels = labels.astype(str)
+    present = set(labels.tolist())
+    ordered_present = [v for v in ordered_styles if v in present]
+
+    # Stable "first seen" order for labels not in ordered_styles
+    remainder = []
+    seen = set(ordered_present)
+    for v in labels.tolist():
+        if v in present and v not in seen:
+            seen.add(v)
+            remainder.append(v)
+
+    # Assign colors
+    label_to_rgba: dict[str, tuple[float, float, float, float]] = {}
+    for v in ordered_present:
+        style = style_map.get(v, {})
+        if "color" in style and style["color"]:
+            label_to_rgba[v] = to_rgba(style["color"])
+        else:
+            # fallback color if not provided
+            label_to_rgba[v] = to_rgba("#4a4e69")
+
+    if remainder:
+        cmap = plt.get_cmap("tab20", max(1, len(remainder)))
+        for i, v in enumerate(remainder):
+            label_to_rgba[v] = cmap(i)
+
+    point_colors = np.asarray([label_to_rgba[v] for v in labels], dtype=float)
+
+    # Legend labels (names)
+    legend_order = ordered_present + remainder
+    legend_names = []
+    handles = []
+    for v in legend_order:
+        style = style_map.get(v, {})
+        legend_names.append(str(style.get("legend_name", v)))
+        handles.append(plt.Line2D([0], [0], marker="o", linestyle="", color=label_to_rgba[v], markersize=12))
+
+    return point_colors, handles, legend_names
+
+
+def _safe_filename(s: str, *, max_len: int = 120) -> str:
+    s = s.strip()
+    # Replace whitespace with underscores
+    s = re.sub(r"\s+", "_", s)
+    # Keep only common safe characters
+    s = re.sub(r"[^A-Za-z0-9._-]+", "_", s)
+    # Collapse repeats and trim
+    s = re.sub(r"_+", "_", s).strip("._-")
+    if not s:
+        s = "plot"
+    if len(s) > max_len:
+        s = s[:max_len].rstrip("._-")
+    return s
+
+
+def _apply_plot_background(*, fig: plt.Figure, ax: plt.Axes, plot_cfg: dict) -> None:
+    """
+    Apply a plot background (figure + axes facecolor) from plot_cfg.
+    """
+    bg = plot_cfg.get("plot_background_color", None)
+    if bg is None:
+        return
+    bg = str(bg).strip()
+    if not bg:
+        return
+
+    alpha = plot_cfg.get("plot_background_alpha", 1.0)
+    try:
+        alpha = float(alpha)
+    except Exception:
+        alpha = 1.0
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    rgba = to_rgba(bg, alpha=alpha)
+    fig.patch.set_facecolor(rgba)
+    ax.set_facecolor(rgba)
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument(
+        "--manifest",
+        type=str,
+        default="egomimic/scripts/visualization_process/fold_clothes_aria_eva_all_labs/manifest.json",
+    )
+    ap.add_argument("--image-key", type=str, default="", help="Defaults to first manifest image key.")
+    ap.add_argument(
+        "--reduce-method",
+        type=str,
+        default="tsne",
+        choices=("tsne", "umap", "pca"),
+        help="Which 2D reduction result to visualize (selects <method>_2d by default).",
+    )
+    ap.add_argument(
+        "--reduce-name",
+        dest="reduce_name",
+        type=str,
+        default=None,
+        help="Dataset name inside the zarr group to visualize (overrides --reduce-method).",
+    )
+    # Backwards-compatible alias (tsne-name historically meant "which 2D coords dataset to plot")
+    ap.add_argument(
+        "--tsne-name",
+        dest="reduce_name",
+        type=str,
+        default=None,
+        help="(Deprecated) Same as --reduce-name.",
+    )
+    ap.add_argument(
+        "--label-col",
+        type=str,
+        default="robot_name",
+        help=(
+            "Metadata column to color points by (e.g. 'lab', 'db.operator', 'task', 'episode_hash'). "
+            "If omitted, tries lab-like columns: lab, db.lab, metadata.lab."
+        ),
+    )
+    ap.add_argument("--out", type=str, default="", help="Output png path (defaults next to manifest).")
+    ap.add_argument("--figsize", type=float, nargs=2, default=(12, 12), help="Figure size in inches (W H).")
+    ap.add_argument("--dpi", type=int, default=400)
+    ap.add_argument("--point-size", type=float, default=40.0)
+    ap.add_argument("--alpha", type=float, default=0.2)
+    ap.add_argument(
+        "--title",
+        type=str,
+        default="",
+        help="If provided, overrides the default plot title.",
+    )
+    ap.add_argument(
+        "--omit-configs-json",
+        type=str,
+        default="",
+        help=(
+            "JSON list of dicts specifying metadata rows to OMIT. "
+            "Example: '[{\"robot_name\":\"eva_bimanual\"}, {\"lab\":\"song\",\"operator\":\"rl2\"}]'. "
+            "Each dict is an AND across keys; the list is OR across dicts."
+        ),
+    )
+    ap.add_argument(
+        "--omit-configs-file",
+        type=str,
+        default="",
+        help="Path to a JSON file containing a list of dicts (same format as --omit-configs-json).",
+    )
+    ap.add_argument(
+        "--plot-config-json",
+        type=str,
+        default="",
+        help=(
+            "JSON object configuring label styles (colors/legend names). "
+            "If provided, overrides the in-script default mapping."
+        ),
+    )
+    ap.add_argument(
+        "--plot-config-file",
+        type=str,
+        default="",
+        help="Path to a JSON file containing a plotting config object (same as --plot-config-json).",
+    )
+    ap.add_argument(
+        "--sample-every-k",
+        type=int,
+        default=1,
+        help="Keep every k-th datapoint (applied after omit filters). Use 1 to disable.",
+    )
+    args = ap.parse_args()
+
+    default_plot_config = {
+        "label_col": args.label_col,
+        "plot_background_color": "#FFFFFF",
+        "label_col_name": [
+            {"eva_bimanual": {
+                "color": "#009e73",
+                "legend_name": "Robot"
+            }},
+            {"aria_bimanual": {
+                "color": "#2462a3",
+                "legend_name": "EgoVerse-A"
+            }},
+            {"mecka_bimanual": {
+                "color": "#e5a423",
+                "legend_name": "EgoVerse-I"
+            }}
+        ]
+    }
+
+    plot_cfg = default_plot_config | _load_plot_config(
+        plot_config_json=args.plot_config_json,
+        plot_config_file=args.plot_config_file,
+    )
+    label_col = plot_cfg.get("label_col", args.label_col)
+
+    manifest_path = Path(args.manifest)
+    manifest = json.loads(manifest_path.read_text())
+
+    if args.image_key:
+        image_key = args.image_key
+    else:
+        image_key = manifest["image_keys"][0]
+
+    zarr_path = Path(manifest["embeddings"][image_key])
+    meta_path = Path(manifest["metadata_parquet"])
+
+    meta_df = pd.read_parquet(meta_path)
+    label_col = _pick_label_column(meta_df, label_col)
+
+    root = zarr.open_group(str(zarr_path), mode="r")
+    reduce_name = args.reduce_name if args.reduce_name else f"{args.reduce_method}_2d"
+    if reduce_name not in root:
+        raise KeyError(
+            "Could not find '{}' in zarr group. Available arrays: {}".format(
+                reduce_name, list(root.array_keys())
+            )
+        )
+    y = np.asarray(root[reduce_name][:])  # (N,2)
+    if y.ndim != 2 or y.shape[1] != 2:
+        raise RuntimeError("Unexpected 2D reduction shape for '{}': {}".format(reduce_name, y.shape))
+
+    if len(meta_df) != y.shape[0]:
+        raise RuntimeError(
+            "Row mismatch: metadata has {} rows but '{}' has {} rows".format(
+                len(meta_df), reduce_name, y.shape[0]
+            )
+        )
+
+    omit_configs = _load_omit_configs(
+        omit_configs_json=args.omit_configs_json,
+        omit_configs_file=args.omit_configs_file,
+    )
+    meta_df, y = _apply_omit_configs(meta_df, y, omit_configs=omit_configs)
+    meta_df, y = _apply_sample_every_k(meta_df, y, sample_every_k=args.sample_every_k)
+
+    labels = meta_df[label_col].astype(str).fillna("unknown").to_numpy()
+    ordered_styles, style_map = _normalize_label_styles(plot_cfg)
+    point_colors, legend_handles, legend_names = _build_colors_and_legend(
+        labels, ordered_styles=ordered_styles, style_map=style_map
+    )
+
+    fig, ax = plt.subplots(figsize=tuple(args.figsize), dpi=args.dpi)
+    _apply_plot_background(fig=fig, ax=ax, plot_cfg=plot_cfg)
+    ax.scatter(
+        y[:, 0],
+        y[:, 1],
+        c=point_colors,
+        s=args.point_size,
+        alpha=args.alpha,
+        linewidths=0,
+        rasterized=True,
+    )
+
+    if args.title:
+        title = args.title
+    else:
+        title = "t-SNE of embeddings (colored by {}: {})".format("label", label_col)
+    # Title at the very top (above legend + axes)
+    fig.suptitle(title, y=0.99, fontsize=24)
+    ax.grid(False)
+
+    # Legend (label key): place at top, horizontal layout (figure-level for tighter spacing)
+    ncol = min(max(1, len(legend_names)), 10)
+    fig.legend(
+        legend_handles,
+        legend_names,
+        loc="upper center",
+        bbox_to_anchor=(0.5, 0.96),
+        frameon=False,
+        fontsize=24,
+        ncol=ncol,
+        borderaxespad=0.0,
+        columnspacing=1.0,
+    )
+
+    # Reserve minimal top space for suptitle + legend
+    fig.tight_layout(rect=(0.0, 0.0, 1.0, 0.97))
+
+    if args.out:
+        out_path = Path(args.out)
+    else:
+        if args.title:
+            out_path = manifest_path.parent / f"{_safe_filename(args.title)}.png"
+        else:
+            safe_label = label_col.replace("/", "_").replace(".", "_")
+            out_path = manifest_path.parent / f"tsne_by_{safe_label}.png"
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(out_path, bbox_inches="tight", facecolor=fig.get_facecolor())
+    print("[DONE] wrote", out_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egomimic/scripts/visualization_process/visualization_setups.txt b/egomimic/scripts/visualization_process/visualization_setups.txt
new file mode 100644
index 00000000..25156794
--- /dev/null
+++ b/egomimic/scripts/visualization_process/visualization_setups.txt
@@ -0,0 +1,14 @@
+python egomimic/scripts/visualization_process/visualization.py \
+  --title "omit robot data" \
+  --omit-configs-json '[{"robot_name":"eva_bimanual"}]' \
+  --label-col scene
+
+python egomimic/scripts/visualization_process/visualization.py \
+  --reduce-method umap \
+  --label-col robot_name \
+  --title "UMAP" \
+  --manifest "/nethome/paphiwetsa3/flash/projects/EgoVerse/egomimic/scripts/visualization_process/fold_clothes_aria_eva_mecka/manifest.json"
+
+python egomimic/scripts/visualization_process/dim_reduce.py \
+  --manifest "/nethome/paphiwetsa3/flash/projects/EgoVerse/egomimic/scripts/visualization_process/fold_clothes_aria_eva_mecka/manifest.json" \
+  --method umap
diff --git a/egomimic/utils/aws/sql_tutorial.ipynb b/egomimic/utils/aws/sql_tutorial.ipynb
index 78f6edc8..9e3e452b 100644
--- a/egomimic/utils/aws/sql_tutorial.ipynb
+++ b/egomimic/utils/aws/sql_tutorial.ipynb
@@ -179,7 +179,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "emimic",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -193,7 +193,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.14"
+   "version": "3.11.11"
   }
  },
  "nbformat": 4,