[f9c9f2]: / nips / round2_course.py

Download this file

126 lines (89 with data), 4.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def checkpoints_0(self):
state_desc = self.get_state_desc()
prev_state_desc = self.get_prev_state_desc()
if not prev_state_desc:
return 0
reward = 2
pelvis = state_desc["body_pos"]["pelvis"][1]
reward -= max(0, 0.70 - pelvis) * 20
penalty = 0
# Small penalty for too much activation (cost of transport)
penalty += np.sum(np.array(self.osim_model.get_activations()) ** 2) * 0.001
# Big penalty for not matching the vector on the X,Z projection.
# No penalty for the vertical axis
penalty += abs(state_desc["body_vel"]["pelvis"][0] - state_desc["target_vel"][0]) * 2
penalty += abs(state_desc["body_vel"]["pelvis"][2] - state_desc["target_vel"][2]) * 2
reward -= penalty
return reward * 0.5
# 接checkpoints_0
def checkpoints_1(self):
state_desc = self.get_state_desc()
prev_state_desc = self.get_prev_state_desc()
if not prev_state_desc:
return 0
reward = 2 + state_desc["body_vel"]["pelvis"][0]
pelvis = state_desc["body_pos"]["pelvis"][1]
reward -= max(0, 0.70 - pelvis) * 20
penalty = 0
# Small penalty for too much activation (cost of transport)
penalty += np.sum(np.array(self.osim_model.get_activations()) ** 2) * 0.001
# Big penalty for not matching the vector on the X,Z projection.
# No penalty for the vertical axis
penalty += abs(state_desc["body_vel"]["pelvis"][0] - state_desc["target_vel"][0]) * 2
penalty += abs(state_desc["body_vel"]["pelvis"][2] - state_desc["target_vel"][2]) * 2
reward -= penalty
return reward * 0.5
# 接checkpoints_1
def checkpoints_2(self):
state_desc = self.get_state_desc()
prev_state_desc = self.get_prev_state_desc()
if not prev_state_desc:
return 0
reward = 2
pelvis = state_desc["body_pos"]["pelvis"][1]
reward -= max(0, 0.70 - pelvis) * 20
penalty = 0
# Small penalty for too much activation (cost of transport)
penalty += np.sum(np.array(self.osim_model.get_activations()) ** 2) * 0.001
# Big penalty for not matching the vector on the X,Z projection.
# No penalty for the vertical axis
penalty += abs(state_desc["body_vel"]["pelvis"][0] - state_desc["target_vel"][0]) * 2
penalty += abs(state_desc["body_vel"]["pelvis"][2] - state_desc["target_vel"][2]) * 2
reward -= penalty
return reward * 0.5
# 接checkpoints_0
def checkpoints_3(self):
state_desc = self.get_state_desc()
prev_state_desc = self.get_prev_state_desc()
if not prev_state_desc:
return 0
reward = 3
pelvis = state_desc["body_pos"]["pelvis"][1]
reward -= max(0, 0.70 - pelvis) * 20
penalty = 0
# Small penalty for too much activation (cost of transport)
penalty += np.sum(np.array(self.osim_model.get_activations()) ** 2) * 0.001
# Big penalty for not matching the vector on the X,Z projection.
# No penalty for the vertical axis
penalty += abs(state_desc["body_vel"]["pelvis"][0] - state_desc["target_vel"][0]) * 2
penalty += abs(state_desc["body_vel"]["pelvis"][2] - state_desc["target_vel"][2]) * 2
reward -= penalty
return reward * 0.5
# 接checkpoints_3
def checkpoints_4(self):
state_desc = self.get_state_desc()
prev_state_desc = self.get_prev_state_desc()
if not prev_state_desc:
return 0
reward = 2
pelvis = state_desc["body_pos"]["pelvis"][1]
reward -= max(0, 0.70 - pelvis) * 20
penalty = 0
# Small penalty for too much activation (cost of transport)
penalty += np.sum(np.array(self.osim_model.get_activations()) ** 2) * 0.001
# Big penalty for not matching the vector on the X,Z projection.
# No penalty for the vertical axis
penalty += abs(state_desc["body_vel"]["pelvis"][0] - state_desc["target_vel"][0]) * 2
penalty += abs(state_desc["body_vel"]["pelvis"][2] - state_desc["target_vel"][2]) * 2
reward -= penalty
return reward * 0.5