Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gym/envs/mini_cheetah/mini_cheetah.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
class MiniCheetah(LeggedRobot):
def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
self.se_base_height = torch.zeros_like(self.base_height)
self.se_base_lin_vel = torch.zeros_like(self.base_lin_vel)

def _reward_lin_vel_z(self):
"""Penalize z axis base linear velocity with squared exp"""
Expand Down
21 changes: 11 additions & 10 deletions gym/envs/mini_cheetah/mini_cheetah_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,21 @@ class init_state(LeggedRobotCfg.init_state):
# * initialization for random range setup
dof_pos_range = {
"haa": [-0.01, 0.01],
"hfe": [-0.785398, -0.785398],
"kfe": [1.596976, 1.596976],
"hfe": [-0.885398, -0.585398],
"kfe": [1.396976, 1.796976],
}
dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]}
dof_vel_range = {"haa": [0.0, 0.0], "hfe": [-0.2, 0.2], "kfe": [-0.2, 0.2]}
root_pos_range = [
[0.0, 0.0], # x
[0.0, 0.0], # y
[0.35, 0.35], # z
[0.0, 0.0], # roll
[0.0, 0.0], # pitch
[0.0, 0.0], # yaw
[-0.5, 0.5], # roll
[-0.5, 0.5], # pitch
[-0.5, 0.5], # yaw
]
root_vel_range = [
[-0.5, 2.0], # x
[0.0, 0.0], # y
[-1.0, 1.0], # x
[-0.5, 0.5], # y
[-0.05, 0.05], # z
[0.0, 0.0], # roll
[0.0, 0.0], # pitch
Expand All @@ -67,9 +67,10 @@ class control(LeggedRobotCfg.control):
class commands:
# * time before command are changed[s]
resampling_time = 3.0
var = 1.0

class ranges:
lin_vel_x = [-2.0, 3.0] # min max [m/s]
lin_vel_x = [-1.0, 0.0, 1.0, 3.0]
lin_vel_y = 1.0 # max [m/s]
yaw_vel = 3 # max [rad/s]

Expand All @@ -82,7 +83,7 @@ class push_robots:
class domain_rand:
randomize_friction = True
friction_range = [0.5, 1.0]
randomize_base_mass = False
randomize_base_mass = True
added_mass_range = [-1.0, 1.0]

class asset(LeggedRobotCfg.asset):
Expand Down
7 changes: 0 additions & 7 deletions gym/envs/mini_cheetah/mini_cheetah_osc.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,6 @@ def _compute_grf(self, grf_norm=True):
else:
return grf

def _switch(self):
c_vel = torch.linalg.norm(self.commands, dim=1)
return torch.exp(
-torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
/ self.cfg.reward_settings.switch_scale
)

def _reward_cursorial(self):
# penalize the abad joints being away from 0
return -torch.mean(
Expand Down
26 changes: 24 additions & 2 deletions gym/envs/mini_cheetah/mini_cheetah_osc_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class osc:
coupling = 1 # 0.02
osc_bool = False
grf_bool = False
randomize_osc_params = False
randomize_osc_params = True
grf_threshold = 0.1 # 20. # Normalized to body weight
omega_range = [1.0, 4.0] # [0.0, 10.]
coupling_range = [
Expand Down Expand Up @@ -170,6 +170,8 @@ class policy:
smooth_exploration = False

obs = [
"se_base_height",
"se_base_lin_vel",
"base_ang_vel",
"projected_gravity",
"commands",
Expand Down Expand Up @@ -226,6 +228,26 @@ class weights:
class termination_weight:
termination = 15.0 / 100.0

class state_estimator:
class network:
hidden_dims = [128, 128]
activation = "elu"
dropouts = None

obs = [
"base_ang_vel",
"projected_gravity",
"dof_pos_obs",
"dof_vel",
"oscillator_obs",
]
targets = ["base_height", "base_lin_vel"]
write_to = ["se_base_height", "se_base_lin_vel"]
batch_size = 2**15
max_gradient_steps = 10
normalize_obs = True
learning_rate = 1e-4

class algorithm(MiniCheetahRunnerCfg.algorithm):
# training params
value_loss_coef = 1.0
Expand All @@ -245,6 +267,6 @@ class algorithm(MiniCheetahRunnerCfg.algorithm):
class runner(MiniCheetahRunnerCfg.runner):
run_name = ""
experiment_name = "FullSend"
max_iterations = 500 # number of policy updates
max_iterations = 1000 # number of policy updates
algorithm_class_name = "PPO2"
num_steps_per_env = 32
106 changes: 88 additions & 18 deletions gym/envs/mini_cheetah/mini_cheetah_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR
)
self.leg_ref = 3 * to_torch(pd.read_csv(csv_path).to_numpy(), device=sim_device)
self.omega = 2 * torch.pi * cfg.control.gait_freq
# self.omega = 2 * torch.pi * cfg.control.gait_freq
super().__init__(gym, sim, cfg, sim_params, sim_device, headless)

def _init_buffers(self):
Expand All @@ -23,39 +23,109 @@ def _init_buffers(self):
self.phase_obs = torch.zeros(
self.num_envs, 2, dtype=torch.float, device=self.device
)
self.omega = torch.ones(self.num_envs, 1, device=self.device)

# self.previous_base_lin_vel = torch.zeros_like(self.base_lin_vel)
# self.base_lin_acc = torch.zeros_like(self.base_lin_vel)

self.grf = self._compute_grf()
self.se_grf = self._compute_grf()

self.se_base_height = torch.zeros_like(self.base_height)
self.se_base_lin_vel = torch.zeros_like(self.base_lin_vel)

def _reset_system(self, env_ids):
super()._reset_system(env_ids)
self._reset_gait_frequencies(env_ids)
self.phase[env_ids] = torch_rand_float(
0, torch.pi, shape=self.phase[env_ids].shape, device=self.device
)
self.se_base_lin_vel[env_ids] = self.base_lin_vel[env_ids]

def _reset_gait_frequencies(self, env_ids):
self.omega[env_ids, 0] = torch_rand_float(
self.cfg.control.gait_freq[0],
self.cfg.control.gait_freq[1],
(len(env_ids), 1),
device=self.device,
).squeeze(1)

def _resample_commands(self, env_ids):
"""Randommly select commands of some environments

Args:
env_ids (List[int]): Environments ids for which new commands are needed
"""
if len(env_ids) == 0:
return
super()._resample_commands(env_ids)
possible_commands = torch.tensor(
self.command_ranges["lin_vel_x"], device=self.device
)
self.commands[env_ids, 0:1] = possible_commands[
torch.randint(
0, len(possible_commands), (len(env_ids), 1), device=self.device
)
]
# add some gaussian noise to the commands
self.commands[env_ids, 0:1] += (
torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var
)

# if 0 in self.cfg.commands.ranges.lin_vel_x:
# * with 20% chance, reset to 0 commands except for forward
self.commands[env_ids, 1:] *= (
torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
< 0.8
).unsqueeze(1)
# * with 20% chance, reset to 0 commands except for rotation
self.commands[env_ids, :2] *= (
torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
< 0.8
).unsqueeze(1)
# * with 10% chance, reset to 0
# self.commands[env_ids, :] *= (
# torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
# < 0.9
# ).unsqueeze(1)

def _post_physx_step(self):
super()._post_physx_step()
self.phase = (
self.phase + self.dt * self.omega / self.cfg.control.decimation
).fmod(2 * torch.pi)
self.phase += self.dt * 2 * torch.pi * self.omega / self.cfg.control.decimation
self.phase.fmod(2 * torch.pi)

def _pre_decimation_step(self):
super()._pre_decimation_step
# self.previous_base_lin_vel = self.base_lin_vel.clone()

def _post_decimation_step(self):
super()._post_decimation_step()
self.phase_obs = torch.cat(
(torch.sin(self.phase), torch.cos(self.phase)), dim=1
)

def _resample_commands(self, env_ids):
super()._resample_commands(env_ids)
# * with 10% chance, reset to 0 commands
rand_ids = torch_rand_float(
0, 1, (len(env_ids), 1), device=self.device
).squeeze(1)
self.commands[env_ids, :3] *= (rand_ids < 0.9).unsqueeze(1)
self.grf = self._compute_grf()
# self.base_lin_acc = (self.base_lin_vel - self.previous_base_lin_vel) / self.dt

def _switch(self):
c_vel = torch.linalg.norm(self.commands, dim=1)
return torch.exp(
-torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) / 0.1
-torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
/ self.cfg.reward_settings.switch_scale
)

def _reward_trot(self):
off_phase = torch.fmod(self.phase + torch.pi, 2 * torch.pi)
phases = torch.cat((self.phase, off_phase, off_phase, self.phase), dim=1)
# grf = self._compute_grf()
return (self.grf * torch.sin(phases)).mean(dim=1) # * (1 - self._switch())

def _compute_grf(self, grf_norm=True):
grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
if grf_norm:
return torch.clamp_max(grf / 80.0, 1.0)
else:
return grf

def _reward_swing_grf(self):
"""Reward non-zero grf during swing (0 to pi)"""
in_contact = torch.gt(
Expand All @@ -64,7 +134,7 @@ def _reward_swing_grf(self):
)
ph_off = torch.lt(self.phase, torch.pi)
rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
return -torch.sum(rew.float(), dim=1) * (1 - self._switch())
return -torch.sum(rew.float(), dim=1) # * (1 - self._switch())

def _reward_stance_grf(self):
"""Reward non-zero grf during stance (pi to 2pi)"""
Expand All @@ -75,7 +145,7 @@ def _reward_stance_grf(self):
ph_off = torch.gt(self.phase, torch.pi) # should this be in swing?
rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)

return torch.sum(rew.float(), dim=1) * (1 - self._switch())
return torch.sum(rew.float(), dim=1) # * (1 - self._switch())

def _reward_reference_traj(self):
"""REWARDS EACH LEG INDIVIDUALLY BASED ON ITS POSITION IN THE CYCLE"""
Expand All @@ -84,7 +154,7 @@ def _reward_reference_traj(self):
error /= self.scales["dof_pos"]
reward = (self._sqrdexp(error) - torch.abs(error) * 0.2).mean(dim=1)
# * only when commanded velocity is higher
return reward * (1 - self._switch())
return reward # * (1 - self._switch())

def _get_ref(self):
leg_frame = torch.zeros_like(self.torques)
Expand Down Expand Up @@ -112,10 +182,10 @@ def _reward_stand_still(self):
rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
return (rew_vel + rew_pos - rew_base_vel) * self._switch()
return rew_vel + rew_pos - rew_base_vel # * self._switch()

def _reward_tracking_lin_vel(self):
"""Tracking linear velocity commands (xy axes)"""
# just use lin_vel?
reward = super()._reward_tracking_lin_vel()
return reward * (1 - self._switch())
return reward # * (1 - self._switch())
Loading