diff --git a/conda_environment.yaml b/conda_environment.yaml index 36eb619..fcf40dd 100644 --- a/conda_environment.yaml +++ b/conda_environment.yaml @@ -17,6 +17,10 @@ dependencies: - -e git+https://github.com/ARISE-Initiative/robomimic@main#egg=robomimic - diffusers - zarr + - h5py + - robomimic + - diffusers + - zarr - einops - tqdm - pybullet diff --git a/eval.py b/eval.py index a99f7b4..0781d5a 100644 --- a/eval.py +++ b/eval.py @@ -55,7 +55,6 @@ def eval_main(cfg): # run policy in environment success_count = 0 for i in range(cfg.num_episodes): - runner.reset() rewards, info = runner.run(agent, cfg.max_steps) assert "success" in info, "info['success'] not returned in info from runner" print(f"info: {info}") diff --git a/imitation/config/policy/robomimic_eef.yaml b/imitation/config/policy/robomimic_eef.yaml deleted file mode 100644 index ef331d3..0000000 --- a/imitation/config/policy/robomimic_eef.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_target_: imitation.policy.robomimic_lowdim_policy.RobomimicPretrainedWrapper -obs_dim: ${task.obs_dim} -action_dim: ${task.action_dim} -# only works with fixed horizons, pred_horizon = obs_horizon -algo_name: "bc_rnn" -obs_type: "low_dim" -task_name: ${task.task_name} -dataset_type: ${task.dataset_type} - -dataset: ${task.dataset} -ckpt_path: ./weights/lift_ph_low_dim_epoch_1000_succ_100.pth -lr: 0.0001 \ No newline at end of file diff --git a/imitation/dataset/robomimic_eef_dataset.py b/imitation/dataset/robomimic_eef_dataset.py index 2fd5aa7..d07af12 100644 --- a/imitation/dataset/robomimic_eef_dataset.py +++ b/imitation/dataset/robomimic_eef_dataset.py @@ -36,8 +36,8 @@ def __init__(self, self.indices = [] self.data_at_indices = [] # if indices file exists, load it - index_file = dataset_path.replace(".hdf5", f"_indices_{obs_horizon}_{action_horizon}_{pred_horizon}.npy") - data_at_indices_file = dataset_path.replace(".hdf5", f"_data_at_indices_{obs_horizon}_{action_horizon}_{pred_horizon}.npy") + index_file = dataset_path.replace(".hdf5", f"_eef_indices_{obs_horizon}_{action_horizon}_{pred_horizon}.npy") + data_at_indices_file = dataset_path.replace(".hdf5", f"_eef_data_at_indices_{obs_horizon}_{action_horizon}_{pred_horizon}.npy") if os.path.exists(index_file): self.indices = np.load(index_file) self.data_at_indices = np.load(data_at_indices_file, allow_pickle=True) diff --git a/imitation/dataset/robomimic_lowdim_dataset.py b/imitation/dataset/robomimic_lowdim_dataset.py index ce0355a..bea294f 100644 --- a/imitation/dataset/robomimic_lowdim_dataset.py +++ b/imitation/dataset/robomimic_lowdim_dataset.py @@ -86,9 +86,10 @@ def create_sample_indices(self): |------------ pred_horizon -------------| ''' idx_global = 0 + n_latency_steps = 0 for key in tqdm(self.dataset_keys): episode_length = len(self.dataset_root[f"data/{key}/obs/{self.obs_keys[0]}"]) - for idx in range(episode_length - self.pred_horizon): + for idx in range(episode_length - self.pred_horizon + n_latency_steps): if idx - self.obs_horizon < 0: continue self.indices.append(idx_global + idx) @@ -100,7 +101,7 @@ def create_sample_indices(self): data_obs_keys.append(obs) data_action_keys = [] for action_key in self.action_keys: - action = self.dataset_root[f"data/{key}/obs/{action_key}"][idx:idx+self.pred_horizon, :] + action = self.dataset_root[f"data/{key}/obs/{action_key}"][idx + n_latency_steps:idx + n_latency_steps + self.pred_horizon, :] if "quat" in action_key: action = self.rotation_transformer.forward(action) data_action_keys.append(action) diff --git a/imitation/env/robomimic_lowdim_wrapper.py b/imitation/env/robomimic_lowdim_wrapper.py index 7fe8179..14c0cf1 100644 --- a/imitation/env/robomimic_lowdim_wrapper.py +++ b/imitation/env/robomimic_lowdim_wrapper.py @@ -86,11 +86,12 @@ def _robosuite_obs_to_robomimic_obs(self, obs): j = i*39 # 7 - sin of joint angles robot_joint_pos = obs[j:j + 7] - # 7 - sin of joint angles + # 7 - sin of joint angles # robot_joint_sin = obs[j + 7:j + 14] # 7 - cos of joint angles # robot_joint_cos = obs[j + 14:j + 21] # 7 - joint velocities + # robot_joint_vel = obs[j + 21:j + 28] eef_pose = obs[j + 28:j + 31] eef_quat = obs[j + 31:j + 35] eef_6d = self.rotation_transformer.forward(eef_quat) diff --git a/imitation/env_runner/robomimic_lowdim_runner.py b/imitation/env_runner/robomimic_lowdim_runner.py index 6b88b7c..348d362 100644 --- a/imitation/env_runner/robomimic_lowdim_runner.py +++ b/imitation/env_runner/robomimic_lowdim_runner.py @@ -59,8 +59,9 @@ def reset(self) -> None: self.obs_deque = collections.deque( [self.obs] * self.obs_horizon, maxlen=self.obs_horizon) - def run(self, agent: BaseAgent, n_steps: int) -> Dict: + def run(self, agent: BaseAgent, n_steps: int = 100) -> Dict: log.info(f"Running agent {agent.__class__.__name__} for {n_steps} steps") + self.reset() if self.output_video: self.start_video() done = False @@ -87,12 +88,12 @@ def run(self, agent: BaseAgent, n_steps: int) -> Dict: if self.output_video: self.end_video() return rewards, info + obs, reward, done, info = self.env.step(action) self.obs_deque.append(obs) if self.render: self.env.render() - # time.sleep(1/self.fps) # TODO properly fix the rendering speed or not if self.output_video: # We need to directly grab full observations so we can get image data diff --git a/imitation/policy/diffusion_policy.py b/imitation/policy/diffusion_policy.py index e673f61..c6ce74e 100644 --- a/imitation/policy/diffusion_policy.py +++ b/imitation/policy/diffusion_policy.py @@ -7,6 +7,7 @@ from tqdm.auto import tqdm from diffusers.optimization import get_scheduler +from diffusers.training_utils import EMAModel from diffusers.schedulers.scheduling_ddpm import DDPMScheduler import wandb @@ -159,13 +160,7 @@ def get_action(self, obs_seq): # only take action_horizon number of actions action = action_pred[:self.action_horizon,:] # (action_horizon, action_dim) - return action # TODO limit this in runner - - def validate(self, dataset=None, model_path=None): - ''' - Calculate validation loss for noise prediction model in the given dataset - ''' - return None + return action def train(self, dataset=None, @@ -188,10 +183,9 @@ def train(self, # accelerates training and improves stability # holds a copy of the model weights - # TODO use EMA - # ema = EMAModel( - # model=noise_pred_net, - # power=0.75) + ema = EMAModel( + parameters=self.noise_pred_net.parameters(), + power=0.75) # Standard ADAM optimizer # Note that EMA parameters are not optimized @@ -262,9 +256,8 @@ def train(self, # this is different from standard pytorch behavior lr_scheduler.step() - # TODO use EMA # update Exponential Moving Average of the model weights - # ema.step(noise_pred_net) + ema.step(self.noise_pred_net.parameters()) # logging @@ -273,10 +266,10 @@ def train(self, tepoch.set_postfix(loss=loss_cpu) tglobal.set_postfix(loss=np.mean(epoch_loss)) wandb.log({'epoch_loss': np.mean(epoch_loss)}) + # Weights of the EMA model are used for inference + ema_noise_pred_net = self.noise_pred_net + ema.copy_to(ema_noise_pred_net.parameters()) # save model checkpoint - torch.save(self.noise_pred_net.state_dict(), model_path) + torch.save(ema_noise_pred_net.state_dict(), model_path) - # Weights of the EMA model - # is used for inference - # ema_noise_pred_net = ema.averaged_model - self.ema_noise_pred_net = self.noise_pred_net \ No newline at end of file + \ No newline at end of file