1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
| def _reward_lin_vel_z(self): return torch.square(self.base_lin_vel[:, 2])
def _reward_ang_vel_xy(self): return torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1)
def _reward_orientation(self): return torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1)
def _reward_base_height(self): base_height = self.root_states[:, 2] return torch.square(base_height - self.cfg.rewards.base_height_target)
def _reward_torques(self): return torch.sum(torch.square(self.torques), dim=1)
def _reward_dof_vel(self): return torch.sum(torch.square(self.dof_vel), dim=1)
def _reward_dof_acc(self): return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
def _reward_action_rate(self): return torch.sum(torch.square(self.last_actions - self.actions), dim=1)
def _reward_collision(self): return torch.sum(1.*(torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1)
def _reward_termination(self): return self.reset_buf * ~self.time_out_buf
def _reward_dof_pos_limits(self): out_of_limits = -(self.dof_pos - self.dof_pos_limits[:, 0]).clip(max=0.) out_of_limits += (self.dof_pos - self.dof_pos_limits[:, 1]).clip(min=0.) return torch.sum(out_of_limits, dim=1)
def _reward_dof_vel_limits(self): return torch.sum((torch.abs(self.dof_vel) - self.dof_vel_limits*self.cfg.rewards.soft_dof_vel_limit).clip(min=0., max=1.), dim=1)
def _reward_torque_limits(self): return torch.sum((torch.abs(self.torques) - self.torque_limits*self.cfg.rewards.soft_torque_limit).clip(min=0.), dim=1)
def _reward_tracking_lin_vel(self): lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) return torch.exp(-lin_vel_error/self.cfg.rewards.tracking_sigma)
def _reward_tracking_ang_vel(self): ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) return torch.exp(-ang_vel_error/self.cfg.rewards.tracking_sigma)
def _reward_feet_air_time(self): contact = self.contact_forces[:, self.feet_indices, 2] > 1. contact_filt = torch.logical_or(contact, self.last_contacts) self.last_contacts = contact first_contact = (self.feet_air_time > 0.) * contact_filt self.feet_air_time += self.dt rew_airTime = torch.sum((self.feet_air_time - 0.5) * first_contact, dim=1) rew_airTime *= torch.norm(self.commands[:, :2], dim=1) > 0.1 self.feet_air_time *= ~contact_filt return rew_airTime
def _reward_stumble(self): return torch.any(torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) >\ 5 *torch.abs(self.contact_forces[:, self.feet_indices, 2]), dim=1) def _reward_stand_still(self): return torch.sum(torch.abs(self.dof_pos - self.default_dof_pos), dim=1) * (torch.norm(self.commands[:, :2], dim=1) < 0.1)
def _reward_feet_contact_forces(self): return torch.sum((torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force).clip(min=0.), dim=1)
|