Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ Set `git config --global submodule.recurse true` if you want `git pull` to autom
Set your wandb project in ``egomimic/hydra_configs/logger/wandb.yaml``

## Quick Start

### AWS Configure
```
aws configure
<fill in credentials simar sent>
./egomimic/utils/aws/setup_secret.sh
```
`setup_secret.sh` will allow your current env to download data from cloudflare.

### Processing your own data for training
![Data Streams](./assets/train_data.png)
See [``data_processing.md``](./data_processing.md)
Expand Down
182 changes: 92 additions & 90 deletions egomimic/hydra_configs/data/eva_bc_zarr.yaml
Original file line number Diff line number Diff line change
@@ -1,100 +1,102 @@
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper
train_datasets:
eva_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset
datasets:
single_episode:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.ZarrDataset
Episode_path: /coc/flash7/scratch/egoverseDebugDatasets/eva/1767495035712.zarr
key_map:
observations.images.front_img_1:
key_type: camera_keys
zarr_key: images.front_1
observations.images.right_wrist_img:
key_type: camera_keys
zarr_key: images.right_wrist
observations.images.left_wrist_img:
key_type: camera_keys
zarr_key: images.left_wrist
right.obs_ee_pose:
key_type: proprio_keys
zarr_key: right.obs_ee_pose
right.obs_gripper:
key_type: proprio_keys
zarr_key: right.gripper
left.obs_ee_pose:
key_type: proprio_keys
zarr_key: left.obs_ee_pose
left.obs_gripper:
key_type: proprio_keys
zarr_key: left.gripper
right.gripper:
key_type: action_keys
zarr_key: right.gripper
horizon: 45
left.gripper:
key_type: action_keys
zarr_key: left.gripper
horizon: 45
right.cmd_ee_pose:
key_type: action_keys
zarr_key: right.cmd_ee_pose
horizon: 45
left.cmd_ee_pose:
key_type: action_keys
zarr_key: left.cmd_ee_pose
horizon: 45
transform_list:
_target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: /coc/flash7/scratch/egoverseDebugDatasets/egoverseS3DatasetTest/
key_map:
observations.images.front_img_1:
key_type: camera_keys
zarr_key: images.front_1
observations.images.right_wrist_img:
key_type: camera_keys
zarr_key: images.right_wrist
observations.images.left_wrist_img:
key_type: camera_keys
zarr_key: images.left_wrist
right.obs_ee_pose:
key_type: proprio_keys
zarr_key: right.obs_ee_pose
right.obs_gripper:
key_type: proprio_keys
zarr_key: right.gripper
left.obs_ee_pose:
key_type: proprio_keys
zarr_key: left.obs_ee_pose
left.obs_gripper:
key_type: proprio_keys
zarr_key: left.gripper
right.gripper:
key_type: action_keys
zarr_key: right.gripper
horizon: 45
left.gripper:
key_type: action_keys
zarr_key: left.gripper
horizon: 45
right.cmd_ee_pose:
key_type: action_keys
zarr_key: right.cmd_ee_pose
horizon: 45
left.cmd_ee_pose:
key_type: action_keys
zarr_key: left.cmd_ee_pose
horizon: 45
transform_list:
_target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list
filters:
episode_hash: "2025-12-26-18-07-46-296000"
mode: total

valid_datasets:
eva_bimanual:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset
datasets:
single_episode:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.ZarrDataset
Episode_path: /coc/flash7/scratch/egoverseDebugDatasets/eva/1767495035712.zarr
key_map:
observations.images.front_img_1 :
key_type: camera_keys
zarr_key: images.front_1
observations.images.right_wrist_img:
key_type: camera_keys
zarr_key: images.right_wrist
observations.images.left_wrist_img:
key_type: camera_keys
zarr_key: images.left_wrist
right.obs_ee_pose:
key_type: proprio_keys
zarr_key: right.obs_ee_pose
right.obs_gripper:
key_type: proprio_keys
zarr_key: right.gripper
left.obs_ee_pose:
key_type: proprio_keys
zarr_key: left.obs_ee_pose
left.obs_gripper:
key_type: proprio_keys
zarr_key: left.gripper
right.gripper:
key_type: action_keys
zarr_key: right.gripper
horizon: 45
left.gripper:
key_type: action_keys
zarr_key: left.gripper
horizon: 45
right.cmd_ee_pose:
key_type: action_keys
zarr_key: right.cmd_ee_pose
horizon: 45
left.cmd_ee_pose:
key_type: action_keys
zarr_key: left.cmd_ee_pose
horizon: 45
transform_list:
_target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list
_target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver
resolver:
_target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver
folder_path: /coc/flash7/scratch/egoverseDebugDatasets/egoverseS3DatasetTest/
key_map:
observations.images.front_img_1:
key_type: camera_keys
zarr_key: images.front_1
observations.images.right_wrist_img:
key_type: camera_keys
zarr_key: images.right_wrist
observations.images.left_wrist_img:
key_type: camera_keys
zarr_key: images.left_wrist
right.obs_ee_pose:
key_type: proprio_keys
zarr_key: right.obs_ee_pose
right.obs_gripper:
key_type: proprio_keys
zarr_key: right.gripper
left.obs_ee_pose:
key_type: proprio_keys
zarr_key: left.obs_ee_pose
left.obs_gripper:
key_type: proprio_keys
zarr_key: left.gripper
right.gripper:
key_type: action_keys
zarr_key: right.gripper
horizon: 45
left.gripper:
key_type: action_keys
zarr_key: left.gripper
horizon: 45
right.cmd_ee_pose:
key_type: action_keys
zarr_key: right.cmd_ee_pose
horizon: 45
left.cmd_ee_pose:
key_type: action_keys
zarr_key: left.cmd_ee_pose
horizon: 45
transform_list:
_target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list
filters:
episode_hash: "2025-12-26-18-07-46-296000"
mode: total

train_dataloader_params:
Expand Down
37 changes: 18 additions & 19 deletions egomimic/rldb/zarr/zarr_dataset_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import json
import logging
import os
import random
import subprocess
import tempfile
Expand All @@ -35,6 +36,7 @@
import zarr

# from action_chunk_transforms import Transform
from egomimic.utils.aws.aws_data_utils import load_env
from egomimic.utils.aws.aws_sql import (
create_default_engine,
episode_table_to_df,
Expand Down Expand Up @@ -145,7 +147,7 @@ def __init__(
self,
folder_path: Path,
bucket_name: str = "rldb",
main_prefix: str = "processed_v2",
main_prefix: str = "processed_v3",
key_map: dict | None = None,
transform_list: list | None = None,
):
Expand Down Expand Up @@ -214,11 +216,14 @@ def _get_filtered_paths(filters: dict | None = None) -> list[tuple[str, str]]:
(df[list(filters)] == series).all(axis=1),
["zarr_processed_path", "episode_hash"],
]
skipped = df[df["zarr_processed_path"].isnull()]["episode_hash"].tolist()
before_len = len(output)

output = output[
output["zarr_processed_path"].fillna("").astype(str).str.strip() != ""
]
logger.info(
f"Skipped {len(skipped)} episodes with null zarr_processed_path: {skipped}"
f"Skipped {before_len - len(output)} episodes with null zarr_processed_path: {output}"
)
output = output[~output["episode_hash"].isin(skipped)]

paths = list(output.itertuples(index=False, name=None))
logger.info(f"Paths: {paths}")
Expand Down Expand Up @@ -274,7 +279,15 @@ def _sync_s3_to_local(
try:
batch_path.write_text("\n".join(lines) + "\n")

cmd = ["s5cmd", "run", str(batch_path)]
load_env()
rl2_endpoint_url = os.environ.get("R2_ENDPOINT_URL")
access_key_id = os.environ["R2_ACCESS_KEY_ID"]
secret_access_key = os.environ["R2_SECRET_ACCESS_KEY"]
os.environ["AWS_ACCESS_KEY_ID"] = access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = secret_access_key
os.environ["AWS_DEFAULT_REGION"] = "auto"
os.environ["AWS_REGION"] = "auto"
cmd = ["s5cmd", "--endpoint-url", rl2_endpoint_url, "run", str(batch_path)]
logger.info("Running s5cmd batch (%d lines): %s", len(lines), " ".join(cmd))
subprocess.run(cmd, check=True)

Expand Down Expand Up @@ -831,17 +844,3 @@ def __len__(self) -> int:
def __repr__(self) -> str:
"""String representation of the episode."""
return f"ZarrEpisode(path={self._path}, frames={len(self)})"


if __name__ == "__main__":
import hydra
from omegaconf import OmegaConf

dataset_cfg_path = "/nethome/paphiwetsa3/flash/projects/EgoVerse/egomimic/hydra_configs/data/test_multi_zarr.yaml"
# Using Hydra to load the dataset config
dataset_cfg = OmegaConf.load(dataset_cfg_path)
datamodule = hydra.utils.instantiate(dataset_cfg)
dl = datamodule.train_dataloader()
batch = next(iter(dl))

breakpoint()
Loading