Skip to content

Debug/pipeline#25

Open
muzi2018 wants to merge 35 commits intoEriconaldo:mainfrom
muzi2018:debug/pipeline
Open

Debug/pipeline#25
muzi2018 wants to merge 35 commits intoEriconaldo:mainfrom
muzi2018:debug/pipeline

Conversation

@muzi2018
Copy link

@muzi2018 muzi2018 commented Sep 5, 2025

No description provided.

Ericonaldo and others added 30 commits August 13, 2025 11:37
  maxEpisodeLength: 200 #200
    approaching: 5
reward:
  base_height_target: 0.55
  only_positive_rewards: False
  scales:
    approaching: 5.0
    lifting: 1.0
    pick_up: 5.0 # 0.5
    acc_penalty: -0.001
    command_penalty: -1.0
    command_reward: 0.25
    standpick: 0.25 # no found in reward_vec_task.py
    action_rate: -0.001
    ee_orn: 0.05 # 0.01
    base_dir: 0.25
    rad_penalty: 0.0
    base_ang_pen: 0.0
    base_approaching: 0.01 # 0.05
    grasp_base_height: 0.5 # no found in reward_vec_task.py
    gripper_rate: -0.1 # -0.1
policy 4
reward:
  base_height_target: 0.55
  only_positive_rewards: False
  scales:
    approaching: 5.0
    lifting: 1.0
    pick_up: 5.0 # 0.5
    acc_penalty: -0.001
    command_penalty: -1.0
    command_reward: 0.25
    standpick: 0.25 # no found in reward_vec_task.py
    action_rate: -0.001
    ee_orn: 0.05 # 0.01
    base_dir: 0.25
    rad_penalty: 0.0
    base_ang_pen: 0.0
    base_approaching: 0.01 # 0.05
    grasp_base_height: 0.5 # no found in reward_vec_task.py
    gripper_rate: -0.1 # -0.1
policy 4
reward:
  base_height_target: 0.55
  only_positive_rewards: False
  scales:
    approaching: 10.0
    lifting: 1.0
    pick_up: 5.0 # 0.5
    acc_penalty: -0.001
    command_penalty: -1.0
    command_reward: 0.25
    standpick: 0.25 # no found in reward_vec_task.py
    action_rate: -0.001
    ee_orn: 0.05 # 0.01
    base_dir: 0.25
    rad_penalty: 0.0
    base_ang_pen: 0.0
    base_approaching: 0.01 # 0.05
    grasp_base_height: 0.5 # no found in reward_vec_task.py
    gripper_rate: -0.1 # -0.1
reward:
  base_height_target: 0.55
  only_positive_rewards: False
  scales:
    approaching: 5.0
    lifting: 1.0
    pick_up: 5.0 # 0.5
    acc_penalty: -0.001
    command_penalty: -1.0
    command_reward: 0.25
    standpick: 0.25 # no found in reward_vec_task.py #
    action_rate: -0.001
    ee_orn: 0.1 # 0.01
    base_dir: 0.25
    rad_penalty: 0.0
    base_ang_pen: 0.0
    base_approaching: 0.01 # 0.05
    grasp_base_height: 0.5 # no found in reward_vec_task.py
    gripper_rate: -0.1 # -0.1
reward:
  base_height_target: 0.55
  only_positive_rewards: False
  scales:
    approaching: 5.0
    lifting: 1.0
    pick_up: 5.0 # 0.5
    acc_penalty: -0.001
    command_penalty: -1.0
    command_reward: 0.25
    standpick: 0.25 # no found in reward_vec_task.py #
    action_rate: -0.001
    ee_orn: 0.1 # 0.01
    base_dir: 0.25
    rad_penalty: 0.0
    base_ang_pen: 0.0
    base_approaching: 0.01 # 0.05
    grasp_base_height: 0.5 # no found in reward_vec_task.py
    gripper_rate: -0.1 # -0.1
    def _reward_base_dir(self, obj_pos):
        base_x_dir = torch.tensor([0., 0., 1.], device=self.device).repeat(self.num_envs, 1)
        base_x_dir_world = quat_apply(self.base_yaw_quat, base_x_dir)
        obj_dir = obj_pos - self._robot_root_states[:, :3]
        obj_dir[:,:2] = 0.
        obj_dist = torch.norm(obj_dir, dim=-1)

        safe_dis = obj_dist >= 0.01
        obj_dir_unit = obj_dir[safe_dis] / obj_dist[safe_dis].unsqueeze(-1)
        rew = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
        # rew[safe_dis] = torch.abs(torch.abs(torch.sum(base_x_dir_world[safe_dis] * obj_dir_unit, dim=-1)) - 1)
        rew[safe_dis] = F.cosine_similarity(base_x_dir_world[safe_dis], obj_dir_unit)

        return rew, rew
    def _reward_base_dir(self, obj_pos):
        base_x_dir = torch.tensor([0., 0., 1.], device=self.device).repeat(self.num_envs, 1)
        base_x_dir_world = quat_apply(self.base_yaw_quat, base_x_dir)
        obj_dir = obj_pos - self._robot_root_states[:, :3]
        obj_dir[:,:2] = 0.
        obj_dist = torch.norm(obj_dir, dim=-1)

        safe_dis = obj_dist >= 0.01
        obj_dir_unit = obj_dir[safe_dis] / obj_dist[safe_dis].unsqueeze(-1)
        rew = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
        # rew[safe_dis] = torch.abs(torch.abs(torch.sum(base_x_dir_world[safe_dis] * obj_dir_unit, dim=-1)) - 1)
        rew[safe_dis] = F.cosine_similarity(base_x_dir_world[safe_dis], obj_dir_unit)

        return rew, rew
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants