diff --git a/README.md b/README.md index 5ed65489..63c29b10 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,15 @@ Set `git config --global submodule.recurse true` if you want `git pull` to autom Set your wandb project in ``egomimic/hydra_configs/logger/wandb.yaml`` ## Quick Start + +### AWS Configure +``` +aws configure + +./egomimic/utils/aws/setup_secret.sh +``` +`setup_secret.sh` will allow your current env to download data from cloudflare. + ### Processing your own data for training ![Data Streams](./assets/train_data.png) See [``data_processing.md``](./data_processing.md) diff --git a/egomimic/hydra_configs/data/eva_bc_zarr.yaml b/egomimic/hydra_configs/data/eva_bc_zarr.yaml index 3f858d4a..153c7363 100644 --- a/egomimic/hydra_configs/data/eva_bc_zarr.yaml +++ b/egomimic/hydra_configs/data/eva_bc_zarr.yaml @@ -1,100 +1,102 @@ _target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper train_datasets: eva_bimanual: - _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset - datasets: - single_episode: - _target_: egomimic.rldb.zarr.zarr_dataset_multi.ZarrDataset - Episode_path: /coc/flash7/scratch/egoverseDebugDatasets/eva/1767495035712.zarr - key_map: - observations.images.front_img_1: - key_type: camera_keys - zarr_key: images.front_1 - observations.images.right_wrist_img: - key_type: camera_keys - zarr_key: images.right_wrist - observations.images.left_wrist_img: - key_type: camera_keys - zarr_key: images.left_wrist - right.obs_ee_pose: - key_type: proprio_keys - zarr_key: right.obs_ee_pose - right.obs_gripper: - key_type: proprio_keys - zarr_key: right.gripper - left.obs_ee_pose: - key_type: proprio_keys - zarr_key: left.obs_ee_pose - left.obs_gripper: - key_type: proprio_keys - zarr_key: left.gripper - right.gripper: - key_type: action_keys - zarr_key: right.gripper - horizon: 45 - left.gripper: - key_type: action_keys - zarr_key: left.gripper - horizon: 45 - right.cmd_ee_pose: - key_type: action_keys - zarr_key: right.cmd_ee_pose - horizon: 45 - left.cmd_ee_pose: - key_type: action_keys - zarr_key: left.cmd_ee_pose - horizon: 45 - transform_list: - _target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list + _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver + resolver: + _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver + folder_path: /coc/flash7/scratch/egoverseDebugDatasets/egoverseS3DatasetTest/ + key_map: + observations.images.front_img_1: + key_type: camera_keys + zarr_key: images.front_1 + observations.images.right_wrist_img: + key_type: camera_keys + zarr_key: images.right_wrist + observations.images.left_wrist_img: + key_type: camera_keys + zarr_key: images.left_wrist + right.obs_ee_pose: + key_type: proprio_keys + zarr_key: right.obs_ee_pose + right.obs_gripper: + key_type: proprio_keys + zarr_key: right.gripper + left.obs_ee_pose: + key_type: proprio_keys + zarr_key: left.obs_ee_pose + left.obs_gripper: + key_type: proprio_keys + zarr_key: left.gripper + right.gripper: + key_type: action_keys + zarr_key: right.gripper + horizon: 45 + left.gripper: + key_type: action_keys + zarr_key: left.gripper + horizon: 45 + right.cmd_ee_pose: + key_type: action_keys + zarr_key: right.cmd_ee_pose + horizon: 45 + left.cmd_ee_pose: + key_type: action_keys + zarr_key: left.cmd_ee_pose + horizon: 45 + transform_list: + _target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list + filters: + episode_hash: "2025-12-26-18-07-46-296000" mode: total valid_datasets: eva_bimanual: - _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset - datasets: - single_episode: - _target_: egomimic.rldb.zarr.zarr_dataset_multi.ZarrDataset - Episode_path: /coc/flash7/scratch/egoverseDebugDatasets/eva/1767495035712.zarr - key_map: - observations.images.front_img_1 : - key_type: camera_keys - zarr_key: images.front_1 - observations.images.right_wrist_img: - key_type: camera_keys - zarr_key: images.right_wrist - observations.images.left_wrist_img: - key_type: camera_keys - zarr_key: images.left_wrist - right.obs_ee_pose: - key_type: proprio_keys - zarr_key: right.obs_ee_pose - right.obs_gripper: - key_type: proprio_keys - zarr_key: right.gripper - left.obs_ee_pose: - key_type: proprio_keys - zarr_key: left.obs_ee_pose - left.obs_gripper: - key_type: proprio_keys - zarr_key: left.gripper - right.gripper: - key_type: action_keys - zarr_key: right.gripper - horizon: 45 - left.gripper: - key_type: action_keys - zarr_key: left.gripper - horizon: 45 - right.cmd_ee_pose: - key_type: action_keys - zarr_key: right.cmd_ee_pose - horizon: 45 - left.cmd_ee_pose: - key_type: action_keys - zarr_key: left.cmd_ee_pose - horizon: 45 - transform_list: - _target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list + _target_: egomimic.rldb.zarr.zarr_dataset_multi.MultiDataset._from_resolver + resolver: + _target_: egomimic.rldb.zarr.zarr_dataset_multi.S3EpisodeResolver + folder_path: /coc/flash7/scratch/egoverseDebugDatasets/egoverseS3DatasetTest/ + key_map: + observations.images.front_img_1: + key_type: camera_keys + zarr_key: images.front_1 + observations.images.right_wrist_img: + key_type: camera_keys + zarr_key: images.right_wrist + observations.images.left_wrist_img: + key_type: camera_keys + zarr_key: images.left_wrist + right.obs_ee_pose: + key_type: proprio_keys + zarr_key: right.obs_ee_pose + right.obs_gripper: + key_type: proprio_keys + zarr_key: right.gripper + left.obs_ee_pose: + key_type: proprio_keys + zarr_key: left.obs_ee_pose + left.obs_gripper: + key_type: proprio_keys + zarr_key: left.gripper + right.gripper: + key_type: action_keys + zarr_key: right.gripper + horizon: 45 + left.gripper: + key_type: action_keys + zarr_key: left.gripper + horizon: 45 + right.cmd_ee_pose: + key_type: action_keys + zarr_key: right.cmd_ee_pose + horizon: 45 + left.cmd_ee_pose: + key_type: action_keys + zarr_key: left.cmd_ee_pose + horizon: 45 + transform_list: + _target_: egomimic.rldb.zarr.action_chunk_transforms.build_eva_bimanual_transform_list + filters: + episode_hash: "2025-12-26-18-07-46-296000" mode: total train_dataloader_params: diff --git a/egomimic/rldb/zarr/zarr_dataset_multi.py b/egomimic/rldb/zarr/zarr_dataset_multi.py index 24dfb50f..280dcdf0 100644 --- a/egomimic/rldb/zarr/zarr_dataset_multi.py +++ b/egomimic/rldb/zarr/zarr_dataset_multi.py @@ -22,6 +22,7 @@ import json import logging +import os import random import subprocess import tempfile @@ -35,6 +36,7 @@ import zarr # from action_chunk_transforms import Transform +from egomimic.utils.aws.aws_data_utils import load_env from egomimic.utils.aws.aws_sql import ( create_default_engine, episode_table_to_df, @@ -145,7 +147,7 @@ def __init__( self, folder_path: Path, bucket_name: str = "rldb", - main_prefix: str = "processed_v2", + main_prefix: str = "processed_v3", key_map: dict | None = None, transform_list: list | None = None, ): @@ -214,11 +216,14 @@ def _get_filtered_paths(filters: dict | None = None) -> list[tuple[str, str]]: (df[list(filters)] == series).all(axis=1), ["zarr_processed_path", "episode_hash"], ] - skipped = df[df["zarr_processed_path"].isnull()]["episode_hash"].tolist() + before_len = len(output) + + output = output[ + output["zarr_processed_path"].fillna("").astype(str).str.strip() != "" + ] logger.info( - f"Skipped {len(skipped)} episodes with null zarr_processed_path: {skipped}" + f"Skipped {before_len - len(output)} episodes with null zarr_processed_path: {output}" ) - output = output[~output["episode_hash"].isin(skipped)] paths = list(output.itertuples(index=False, name=None)) logger.info(f"Paths: {paths}") @@ -274,7 +279,15 @@ def _sync_s3_to_local( try: batch_path.write_text("\n".join(lines) + "\n") - cmd = ["s5cmd", "run", str(batch_path)] + load_env() + rl2_endpoint_url = os.environ.get("R2_ENDPOINT_URL") + access_key_id = os.environ["R2_ACCESS_KEY_ID"] + secret_access_key = os.environ["R2_SECRET_ACCESS_KEY"] + os.environ["AWS_ACCESS_KEY_ID"] = access_key_id + os.environ["AWS_SECRET_ACCESS_KEY"] = secret_access_key + os.environ["AWS_DEFAULT_REGION"] = "auto" + os.environ["AWS_REGION"] = "auto" + cmd = ["s5cmd", "--endpoint-url", rl2_endpoint_url, "run", str(batch_path)] logger.info("Running s5cmd batch (%d lines): %s", len(lines), " ".join(cmd)) subprocess.run(cmd, check=True) @@ -831,17 +844,3 @@ def __len__(self) -> int: def __repr__(self) -> str: """String representation of the episode.""" return f"ZarrEpisode(path={self._path}, frames={len(self)})" - - -if __name__ == "__main__": - import hydra - from omegaconf import OmegaConf - - dataset_cfg_path = "/nethome/paphiwetsa3/flash/projects/EgoVerse/egomimic/hydra_configs/data/test_multi_zarr.yaml" - # Using Hydra to load the dataset config - dataset_cfg = OmegaConf.load(dataset_cfg_path) - datamodule = hydra.utils.instantiate(dataset_cfg) - dl = datamodule.train_dataloader() - batch = next(iter(dl)) - - breakpoint() diff --git a/egomimic/scripts/zarr_data_viz.ipynb b/egomimic/scripts/zarr_data_viz.ipynb index 3d8e17e4..0e075069 100644 --- a/egomimic/scripts/zarr_data_viz.ipynb +++ b/egomimic/scripts/zarr_data_viz.ipynb @@ -39,11 +39,23 @@ " draw_actions,\n", " nds,\n", ")\n", + "from egomimic.rldb.zarr.zarr_dataset_multi import S3EpisodeResolver\n", + "from egomimic.rldb.zarr.action_chunk_transforms import build_aria_bimanual_transform_list\n", "\n", "# Ensure mediapy can find an ffmpeg executable in this environment\n", "mpy.set_ffmpeg(imageio_ffmpeg.get_ffmpeg_exe())" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc9edba1", + "metadata": {}, + "outputs": [], + "source": [ + "TEMP_DIR = \"/coc/flash7/scratch/egoverseDebugDatasets/egoverseS3DatasetTest\"" + ] + }, { "cell_type": "code", "execution_count": null, @@ -52,7 +64,7 @@ "outputs": [], "source": [ "# Point this at a single episode directory, e.g. /path/to/episode_hash.zarr\n", - "EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/1767495035712.zarr\")\n", + "# EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/1767495035712.zarr\")\n", "\n", "key_map = {\n", " \"images.front_1\": {\"zarr_key\": \"images.front_1\"},\n", @@ -80,36 +92,25 @@ " stride=ACTION_STRIDE,\n", " # left_extra_batch_key={\"left_extrinsics_pose\": left_extrinsics_pose},\n", " # right_extra_batch_key={\"right_extrinsics_pose\": right_extrinsics_pose},\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7fbf37", - "metadata": {}, - "outputs": [], - "source": [ + ")\n", + "\n", + "\n", "# Build a MultiDataset with exactly one ZarrDataset inside\n", - "single_ds = ZarrDataset(Episode_path=EPISODE_PATH, key_map=key_map, transform_list=transform_list)\n", + "# single_ds = ZarrDataset(Episode_path=EPISODE_PATH, key_map=key_map, transform_list=transform_list)\n", "# single_ds = ZarrDataset(Episode_path=EPISODE_PATH, key_map=key_map)\n", - "multi_ds = MultiDataset(datasets={\"single_episode\": single_ds}, mode=\"total\")\n", "\n", - "print(\"len(single_ds):\", len(single_ds))\n", - "print(\"len(multi_ds):\", len(multi_ds))\n", + "# multi_ds = MultiDataset(datasets={\"single_episode\": single_ds}, mode=\"total\")\n", + "resolver = S3EpisodeResolver(\n", + " TEMP_DIR, key_map=key_map, transform_list=transform_list\n", + ")\n", + "filters = {\n", + " \"episode_hash\": \"2025-12-26-18-07-46-296000\"\n", + "}\n", + "multi_ds = MultiDataset._from_resolver(\n", + " resolver, filters=filters, sync_from_s3=True, mode=\"total\"\n", + ")\n", "\n", - "loader = torch.utils.data.DataLoader(multi_ds, batch_size=1, shuffle=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86338e96", - "metadata": {}, - "outputs": [], - "source": [ - "batch = next(iter(loader))\n", - "nds(batch)" + "loader = torch.utils.data.DataLoader(multi_ds, batch_size=1, shuffle=False)" ] }, { @@ -164,16 +165,9 @@ " vis, type=\"xyz\", color=\"Reds\",\n", " actions=right_xyz, extrinsics=None, intrinsics=intrinsics, arm=\"right\"\n", " )\n", - " return vis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "89be6d5b", - "metadata": {}, - "outputs": [], - "source": [ + " return vis\n", + "\n", + "\n", "def viz_batch_ypr(batch, image_key, action_key, intrinsics_key, axis_len_m=0.04):\n", " img = batch[image_key][0].detach().cpu()\n", " if img.shape[0] in (1, 3):\n", @@ -295,7 +289,7 @@ "for i, batch in enumerate(loader):\n", " vis = viz_batch(batch, image_key=image_key, action_key=action_key, intrinsics_key=\"base\")\n", " images.append(vis)\n", - " if i > 100:\n", + " if i > 10:\n", " break\n", "\n", "mpy.show_video(images, fps=30)" @@ -313,30 +307,13 @@ { "cell_type": "code", "execution_count": null, - "id": "38100d31", - "metadata": {}, - "outputs": [], - "source": [ - "from egomimic.utils.aws.aws_sql import timestamp_ms_to_episode_hash\n", - "\n", - "timestamp_ms_to_episode_hash(1764285211791)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8693d01c", + "id": "b7384468", "metadata": {}, "outputs": [], "source": [ - "# Aria-style chunking example: horizon=30 contiguous frames, sample anchors every 3 -> 10 points, then interpolate to 100.\n", - "\n", - "# EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/scale/697a9070da7b91acaf3f2d88_episode_000000.zarr\") # Scale\n", - "# intrinsics_key = \"scale\"\n", - "\n", - "EPISODE_PATH = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/scale/2026-02-24-01-49-24-166324/697a9070da7b91acaf3f2d88_episode_000002.zarr\") # Aria\n", - "intrinsics_key = \"scale\"\n", + "temp_dir = \"/coc/flash7/scratch/egoverseDebugDatasets/egoverseS3DatasetTest\"\n", "\n", + "intrinsics_key = \"base\"\n", "\n", "key_map = {\n", " \"images.front_1\": {\"zarr_key\": \"images.front_1\"},\n", @@ -350,139 +327,23 @@ "ACTION_CHUNK_LENGTH = 100\n", "ACTION_STRIDE = 3\n", "\n", - "transform_list = build_aria_bimanual_transform_list(\n", - " chunk_length=ACTION_CHUNK_LENGTH,\n", - " stride=ACTION_STRIDE,\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b4c03ca", - "metadata": {}, - "outputs": [], - "source": [ - "# Full MultiDataset via LocalEpisodeResolver (mirrors test_multi_zarr.yaml)\n", - "from egomimic.rldb.zarr.action_chunk_transforms import (\n", - " build_aria_bimanual_transform_list,\n", - ")\n", - "from egomimic.rldb.zarr.zarr_dataset_multi import LocalEpisodeResolver, MultiDataset\n", - "\n", - "SCALE_FOLDER = Path(\"/coc/flash7/scratch/egoverseDebugDatasets/scale/2026-02-24-01-49-24-166324\")\n", - "\n", - "key_map = {\n", - " \"images.front_1\": {\"zarr_key\": \"images.front_1\"},\n", - " \"right.obs_ee_pose\": {\"zarr_key\": \"right.obs_ee_pose\"},\n", - " \"left.obs_ee_pose\": {\"zarr_key\": \"left.obs_ee_pose\"},\n", - " \"right.action_ee_pose\": {\"zarr_key\": \"right.obs_ee_pose\", \"horizon\": 30},\n", - " \"left.action_ee_pose\": {\"zarr_key\": \"left.obs_ee_pose\", \"horizon\": 30},\n", - " \"obs_head_pose\": {\"zarr_key\": \"obs_head_pose\"},\n", - "}\n", - "\n", - "transform_list = build_aria_bimanual_transform_list(\n", - " stride=1,\n", - ")\n", - "\n", - "resolver = LocalEpisodeResolver(\n", - " folder_path=SCALE_FOLDER,\n", + "resolver = S3EpisodeResolver(\n", + " temp_dir,\n", " key_map=key_map,\n", - " transform_list=transform_list,\n", + " transform_list=build_aria_bimanual_transform_list(\n", + " chunk_length=ACTION_CHUNK_LENGTH,\n", + " stride=ACTION_STRIDE,\n", + " )\n", ")\n", "\n", - "multi_ds = MultiDataset._from_resolver(resolver, mode=\"total\")\n", - "print(f\"MultiDataset total frames: {len(multi_ds)}\")\n", - "print(f\"Underlying episodes: {list(multi_ds.datasets.keys())}\")\n", + "filters = {\"episode_hash\": \"2026-01-20-20-59-43-376000\"} #aria\n", + "# filters = {\"episode_hash\": \"692ee048ef7557106e6c4b8d\"} # mecka\n", "\n", - "loader = torch.utils.data.DataLoader(multi_ds, batch_size=1, shuffle=False)\n", - "batch = next(iter(loader))\n", - "print(\"Batch keys:\", list(batch.keys()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1da784ea", - "metadata": {}, - "outputs": [], - "source": [ - "# Build a MultiDataset with exactly one ZarrDataset inside\n", - "single_ds = ZarrDataset(Episode_path=EPISODE_PATH, key_map=key_map, transform_list=transform_list)\n", - "#single_ds = ZarrDataset(Episode_path=EPISODE_PATH, key_map=key_map)\n", - "multi_ds = MultiDataset(datasets={\"single_episode\": single_ds}, mode=\"total\")\n", - "\n", - "print(\"len(single_ds):\", len(single_ds))\n", - "print(\"len(multi_ds):\", len(multi_ds))\n", - "\n", - "loader = torch.utils.data.DataLoader(multi_ds, batch_size=1, shuffle=False)\n", - "# batch = next(iter(loader))\n", - "\n", - "# print(\"Batch keys:\", list(batch.keys()))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "507a1fd6", - "metadata": {}, - "outputs": [], - "source": [ - "batch = next(iter(loader))\n", - "nds(batch)\n", - "print(\"Batch keys:\", list(batch.keys()))\n", - "print(batch[\"right.action_ee_pose\"][0, 0])\n", - "print(batch[\"left.action_ee_pose\"][0, 0])\n", - "print(batch[\"obs_head_pose\"][0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0e94799c", - "metadata": {}, - "outputs": [], - "source": [ - "left_hand_pose = []\n", - "right_hand_pose = []\n", - "head_pose = []\n", - "for i, batch in enumerate(loader):\n", - " left_hand_pose.append(batch[\"left.action_ee_pose\"][0, 0])\n", - " right_hand_pose.append(batch[\"right.action_ee_pose\"][0, 0])\n", - " head_pose.append(batch[\"obs_head_pose\"][0])\n", - " \n", - " if i > 400:\n", - " break\n", - "left_hand_pose = np.array(left_hand_pose)\n", - "right_hand_pose = np.array(right_hand_pose)\n", - "head_pose = np.array(head_pose)\n", - "\n", - "# chunk the pose to actions(N, 100, 3)\n", - "left_hand_pose_actions = []\n", - "right_hand_pose_actions = []\n", - "head_pose_actions = []\n", - "for i in range(left_hand_pose.shape[0] - 100):\n", - " action_left_hand = left_hand_pose[i:i+100, :]\n", - " action_right_hand = right_hand_pose[i:i+100, :]\n", - " action_head = head_pose[i:i+100, :]\n", - " left_hand_pose_actions.append(action_left_hand)\n", - " right_hand_pose_actions.append(action_right_hand)\n", - " head_pose_actions.append(action_head)\n", - "left_hand_pose_actions = np.array(left_hand_pose_actions)\n", - "right_hand_pose_actions = np.array(right_hand_pose_actions)\n", - "head_pose_actions = np.array(head_pose_actions)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6517f061", - "metadata": {}, - "outputs": [], - "source": [ - "from egomimic.utils.egomimicUtils import render_3d_traj_frames\n", + "cloudflare_ds = MultiDataset._from_resolver(\n", + " resolver, filters=filters, sync_from_s3=True, mode=\"total\"\n", + ")\n", "\n", - "frames = render_3d_traj_frames([left_hand_pose_actions, right_hand_pose_actions, head_pose_actions], labels=[\"left hand\", \"right hand\", \"head\"], stride=10)\n", - "mpy.show_video(frames, fps=30)\n" + "loader = torch.utils.data.DataLoader(cloudflare_ds, batch_size=1, shuffle=False)" ] }, { @@ -507,7 +368,7 @@ " # for k, v in batch.items():\n", " # print(f\"{k}: {tuple(v.shape)}\")\n", " \n", - " if i > 200:\n", + " if i > 10:\n", " break\n", "\n", "mpy.show_video(ims, fps=30)\n" @@ -528,22 +389,12 @@ "for i, batch in enumerate(loader):\n", " vis_ypr = viz_batch_ypr(batch, image_key=image_key, action_key=action_key, intrinsics_key=\"base\")\n", " ims_ypr.append(vis_ypr)\n", - " if i > 200:\n", + " if i > 20:\n", " break\n", "\n", "mpy.show_video(ims_ypr, fps=30)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "36f120b8", - "metadata": {}, - "outputs": [], - "source": [ - "batch[\"actions_cartesian\"][0, 0]\n" - ] - }, { "cell_type": "markdown", "id": "efecaba7", @@ -563,8 +414,6 @@ "\n", "from egomimic.rldb.zarr.action_chunk_transforms import _xyzwxyz_to_matrix\n", "\n", - "EPISODE_PATH_KP = EPISODE_PATH\n", - "\n", "key_map_kp = {\n", " \"images.front_1\": {\"zarr_key\": \"images.front_1\"},\n", " \"left.obs_keypoints\": {\"zarr_key\": \"left.obs_keypoints\"},\n", @@ -572,10 +421,18 @@ " \"obs_head_pose\": {\"zarr_key\": \"obs_head_pose\"},\n", "}\n", "\n", - "single_ds_kp = ZarrDataset(Episode_path=EPISODE_PATH_KP, key_map=key_map_kp)\n", - "multi_ds_kp = MultiDataset(datasets={\"single_episode\": single_ds_kp}, mode=\"total\")\n", - "loader_kp = torch.utils.data.DataLoader(multi_ds_kp, batch_size=1, shuffle=False)\n", - "print(f\"Keypoint dataset: {len(single_ds_kp)} frames\")" + "filters = {\"episode_hash\": \"2026-01-20-20-59-43-376000\"}\n", + "\n", + "resolver = S3EpisodeResolver(\n", + " temp_dir,\n", + " key_map=key_map\n", + ")\n", + "\n", + "cloudflare_ds = MultiDataset._from_resolver(\n", + " resolver, filters=filters, sync_from_s3=True, mode=\"total\"\n", + ")\n", + "\n", + "loader_kp = torch.utils.data.DataLoader(cloudflare_ds, batch_size=1, shuffle=False)" ] }, { @@ -694,16 +551,24 @@ "for i, batch_kp in enumerate(loader_kp):\n", " vis = viz_keypoints(batch_kp)\n", " ims_kp.append(vis)\n", - " if i > 200:\n", + " if i > 10:\n", " break\n", "\n", "mpy.show_video(ims_kp, fps=30)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f4fbaec", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "emimic2 (3.11.14)", + "display_name": "emimic (3.11.14)", "language": "python", "name": "python3" }, diff --git a/egomimic/trainHydra.py b/egomimic/trainHydra.py index 2ecdad32..f4ccde0c 100644 --- a/egomimic/trainHydra.py +++ b/egomimic/trainHydra.py @@ -11,6 +11,7 @@ from egomimic.rldb.zarr.utils import DataSchematic, set_global_seed from egomimic.scripts.evaluation.eval import Eval +from egomimic.utils.aws.aws_data_utils import load_env from egomimic.utils.instantiators import instantiate_callbacks, instantiate_loggers from egomimic.utils.logging_utils import log_hyperparameters from egomimic.utils.pylogger import RankedLogger @@ -39,6 +40,7 @@ def train(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]: else: raise ValueError("Seed must be provided in cfg for reproducibility!") + load_env() # log.info(f"Instantiating data schematic <{cfg.data_schematic._target_}>") data_schematic: DataSchematic = hydra.utils.instantiate(cfg.data_schematic) diff --git a/egomimic/utils/aws/aws_data_utils.py b/egomimic/utils/aws/aws_data_utils.py index e8063aa4..e1318c82 100644 --- a/egomimic/utils/aws/aws_data_utils.py +++ b/egomimic/utils/aws/aws_data_utils.py @@ -7,6 +7,20 @@ from boto3.s3.transfer import TransferConfig +def load_env(path="~/.egoverse_env"): + p = Path(path).expanduser() + if not p.exists(): + raise ValueError( + f"Env file {p} does not exist, run ./egomimic/utils/aws/setup_secret.sh" + ) + for line in p.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + os.environ.setdefault(k, v.strip().strip("'").strip('"')) + + def s3_sync_to_local(bucket: str, key_prefix: str, local_dir: str | Path) -> None: """ Rough equivalent of: aws s3 sync s3://bucket/key_prefix/ local_dir/ diff --git a/egomimic/utils/aws/aws_sql.py b/egomimic/utils/aws/aws_sql.py index b9059997..bf51f094 100644 --- a/egomimic/utils/aws/aws_sql.py +++ b/egomimic/utils/aws/aws_sql.py @@ -1,4 +1,5 @@ import json +import logging import os from dataclasses import asdict, dataclass from datetime import datetime, timezone @@ -16,6 +17,10 @@ ) from sqlalchemy.exc import IntegrityError +logger = logging.getLogger(__name__) +YELLOW = "\033[33m" +RESET = "\033[0m" + @dataclass class TableRow: @@ -46,7 +51,12 @@ def create_default_engine(): SECRETS_ARN = os.environ.get("SECRETS_ARN") if SECRETS_ARN: secrets = boto3.client("secretsmanager") - sec = secrets.get_secret_value(SecretId=SECRETS_ARN)["SecretString"] + try: + sec = secrets.get_secret_value(SecretId=SECRETS_ARN)["SecretString"] + except Exception as e: + raise RuntimeError( + f"Failed to retrieve secrets from {SECRETS_ARN}. Did you run ./egomimic/utils/aws/setup_secret.sh ?: {e}" + ) from e cfg = json.loads(sec) HOST = cfg.get("host", cfg.get("HOST")) DBNAME = cfg.get("dbname", cfg.get("DBNAME", "appdb")) @@ -54,7 +64,11 @@ def create_default_engine(): PASSWORD = cfg.get("password", cfg.get("PASSWORD")) PORT = cfg.get("port", 5432) else: - print("Using hardcoded DB Credentials (ok for local testing)") + logger.warning( + "%sUsing hardcoded DB Credentials. Run ./egomimic/utils/aws/setup_secret.sh for better security!%s", + YELLOW, + RESET, + ) # Fallback to hardcoded values for local testing HOST = "lowuse-pg-east2.cdc8824mase4.us-east-2.rds.amazonaws.com" DBNAME = "appdb"