Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions egomimic/hydra_configs/data/mecka_all.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper


train_datasets:
dataset1:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
mode: train
local_mode: true
sample_percent: 0.1
filters:
lab: "mecka"
embodiment: "mecka_bimanual"

valid_datasets:
dataset1:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
mode: valid
local_mode: true
sample_percent: 0.1
filters:
lab: "mecka"
embodiment: "mecka_bimanual"

train_dataloader_params:
dataset1:
batch_size: 32
num_workers: 8

valid_dataloader_params:
dataset1:
batch_size: 32
num_workers: 8
50 changes: 50 additions & 0 deletions egomimic/hydra_configs/data/mecka_cotrain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper

train_datasets:
# dataset1:
# _target_: egomimic.rldb.utils.S3RLDBDataset
# bucket_name: "rldb"
# embodiment: "mecka_bimanual"
# mode: train
# filters:
# task: "potting_soil"
# local_files_only: True
dataset2:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
embodiment: "aria_right_arm"
mode: train
filters:
task: "object in container"
lab: "song"
local_files_only: True

valid_datasets:
# dataset1:
# _target_: egomimic.rldb.utils.S3RLDBDataset
# bucket_name: "rldb"
# embodiment: "mecka_bimanual"
# mode: valid
# filters:
# task: "potting_soil"
# local_files_only: True
dataset2:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
embodiment: "aria_right_arm"
mode: valid
filters:
task: "object in container"
lab: "song"
local_files_only: True


train_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10

valid_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10
36 changes: 36 additions & 0 deletions egomimic/hydra_configs/data/mecka_human_fold_clothes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper

train_datasets:
dataset1:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
main_prefix: "mecka"
mode: train
embodiment: "mecka_bimanual"
temp_root: "/coc/flash7/scratch/egoverseS3Dataset"
filters:
lab: "mecka"
task: "fold_clothes"
local_files_only: True

valid_datasets:
dataset1:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
mode: valid
embodiment: "mecka_bimanual"
temp_root: "/coc/flash7/scratch/egoverseS3Dataset"
filters:
lab: "mecka"
task: "fold_clothes"
local_files_only: True

train_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10

valid_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10
27 changes: 20 additions & 7 deletions egomimic/hydra_configs/data/mecka_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,41 @@ _target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper

train_datasets:
dataset1:
_target_: rldb.utils.RLDBDataset
repo_id: "mecka_test"
_target_: rldb.utils.S3RLDBDataset
bucket_name: "rldb"
mode: train
local_mode: True
embodiment: "mecka_bimanual"
root: "/coc/flash7/acheluva3/EgoVerse/mecka_demo"
filters:
task: "fold_clothes"
local_files_only: True


valid_datasets:
dataset1:
_target_: rldb.utils.RLDBDataset
repo_id: "mecka_test"
_target_: rldb.utils.S3RLDBDataset
bucket_name: "rldb"
mode: valid
local_mode: True
embodiment: "mecka_bimanual"
root: "/coc/flash7/acheluva3/EgoVerse/mecka_demo"
filters:
task: "fold_clothes"
local_files_only: True



train_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10
# dataset2:
# batch_size: 32
# num_workers: 10

valid_dataloader_params:
dataset1:
batch_size: 32
num_workers: 10
num_workers: 10
# dataset2:
# batch_size: 32
# num_workers: 10
44 changes: 44 additions & 0 deletions egomimic/hydra_configs/data/obj_cont_eva.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
_target_: egomimic.pl_utils.pl_data_utils.MultiDataModuleWrapper
use_tokenizer: false

train_datasets:
dataset1:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
temp_root: "/coc/cedarp-dxu345-0/datasets/egoverse"
mode: train
embodiment: "eva_right_arm"
filters:
lab: "rl2"
task: "object in container"
robot_name: "eva_right_arm"
# objects:
# - "green bowl"
# - "caprisun"
local_files_only: True

valid_datasets:
dataset1:
_target_: egomimic.rldb.utils.S3RLDBDataset
bucket_name: "rldb"
temp_root: "/coc/cedarp-dxu345-0/datasets/egoverse"
mode: valid
embodiment: "eva_right_arm"
filters:
lab: "rl2"
task: "object in container"
robot_name: "eva_right_arm"
# objects:
# - "green bowl"
# - "caprisun"
local_files_only: True

train_dataloader_params:
dataset1:
batch_size: 32
num_workers: 8

valid_dataloader_params:
dataset1:
batch_size: 32
num_workers: 8
6 changes: 3 additions & 3 deletions egomimic/hydra_configs/hydra/launcher/submitit_pace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher

# Slurm configuration
name: ${hydra.job.name} # Default job name
partition: "gpu-h200" # Slurm partition
# partition: "gpu-h200" # Slurm partition
account: "gts-dxu345-rl2" # Slurm account
cpus_per_task: 8 # Number of CPUs per task (max 4:1 CPU:GPU ratio)
cpus_per_task: 4 # Number of CPUs per task (max 4:1 CPU:GPU ratio)
nodes: ${launch_params.nodes} # Number of nodes
tasks_per_node: ${launch_params.gpus_per_node} # Use variable for tasks per node
gres: "gpu:h200:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}" # GPU type and count (h100 for H100 GPUs)
qos: "short" # Slurm QoS
qos: "inferno" # Slurm QoS
mem_per_gpu: 250G
timeout_min: 2880 # Timeout in minutes (48 hours)
# exclude: "protocol, puma" # Nodes to exclude
Expand Down
23 changes: 15 additions & 8 deletions egomimic/hydra_configs/hydra/launcher/submitit_skynet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,23 @@ defaults:
_target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher

name: ${hydra.job.name}
partition: "rl2-lab"
account: "rl2-lab"
partition: "hoffman-lab"
account: "hoffman-lab"
# Override nodes via: hydra.launcher.nodes=<num_nodes>
nodes: 1
cpus_per_task: 15
# Override via: hydra.launcher.tasks_per_node=<num_gpus>
tasks_per_node: 4

nodes: ${launch_params.nodes}
tasks_per_node: ${launch_params.gpus_per_node}
gres: "gpu:a40:${eval:'${launch_params.gpus_per_node} * ${launch_params.nodes}'}"
mem_per_gpu: 250G

qos: "short"
timeout_min: 2880
timeout_min: 2880 # 48 hours

# env:
# NCCL_TIMEOUT: 7200
# NCCL_DEBUG: INFO
# NCCL_ASYNC_ERROR_HANDLING: "1"

additional_parameters:
requeue: true
gpus-per-node: "a40:1"
requeue: true
2 changes: 1 addition & 1 deletion egomimic/hydra_configs/logger/wandb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ wandb:
offline: False
id: "${name}_${description}_${now:%Y-%m-%d_%H-%M-%S}" # pass correct id to resume experiment!
anonymous: null # enable anonymous logging
project: "everse_flagship"
project: "everse_mecka_hpt_ossification"
log_model: False # upload lightning ckpts
prefix: "" # a string to put at the beginning of metric keys
entity: "rl2-group" # set to name of your wandb team
Expand Down
Loading