|
a |
|
b/nips/course.py |
|
|
1 |
|
|
|
2 |
def course_0(self): |
|
|
3 |
state_desc = self.get_state_desc() |
|
|
4 |
prev_state_desc = self.get_prev_state_desc() |
|
|
5 |
if not prev_state_desc: |
|
|
6 |
return 0 |
|
|
7 |
|
|
|
8 |
reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 4 + 2 |
|
|
9 |
|
|
|
10 |
lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.0) |
|
|
11 |
reward -= lean_back * 40 |
|
|
12 |
|
|
|
13 |
pelvis = state_desc["body_pos"]["pelvis"][1] |
|
|
14 |
reward -= max(0, 0.70 - pelvis) * 100 |
|
|
15 |
|
|
|
16 |
return reward * 0.05 |
|
|
17 |
|
|
|
18 |
|
|
|
19 |
def course_1(self): |
|
|
20 |
state_desc = self.get_state_desc() |
|
|
21 |
prev_state_desc = self.get_prev_state_desc() |
|
|
22 |
if not prev_state_desc: |
|
|
23 |
return 0 |
|
|
24 |
|
|
|
25 |
reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 2 + 2 \ |
|
|
26 |
+ state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0] |
|
|
27 |
|
|
|
28 |
lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2) |
|
|
29 |
reward -= lean_back * 40 |
|
|
30 |
|
|
|
31 |
pelvis = state_desc["body_pos"]["pelvis"][1] |
|
|
32 |
reward -= max(0, 0.70 - pelvis) * 100 |
|
|
33 |
|
|
|
34 |
return reward * 0.05 |
|
|
35 |
|
|
|
36 |
|
|
|
37 |
def course_2_3(self): |
|
|
38 |
state_desc = self.get_state_desc() |
|
|
39 |
prev_state_desc = self.get_prev_state_desc() |
|
|
40 |
if not prev_state_desc: |
|
|
41 |
return 0 |
|
|
42 |
|
|
|
43 |
reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 2 + 2 \ |
|
|
44 |
+ state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0] |
|
|
45 |
|
|
|
46 |
front_foot = state_desc["body_pos"]["pros_foot_r"][0] |
|
|
47 |
back_foot = state_desc["body_pos"]["toes_l"][0] |
|
|
48 |
dist = max(0.0, front_foot - back_foot - 0.9) |
|
|
49 |
reward -= dist * 40 |
|
|
50 |
|
|
|
51 |
lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2) |
|
|
52 |
reward -= lean_back * 40 |
|
|
53 |
|
|
|
54 |
pelvis = state_desc["body_pos"]["pelvis"][1] |
|
|
55 |
reward -= max(0, 0.70 - pelvis) * 100 |
|
|
56 |
|
|
|
57 |
return reward * 0.05 |
|
|
58 |
|
|
|
59 |
|
|
|
60 |
def course_4(self): |
|
|
61 |
state_desc = self.get_state_desc() |
|
|
62 |
prev_state_desc = self.get_prev_state_desc() |
|
|
63 |
if not prev_state_desc: |
|
|
64 |
return 0 |
|
|
65 |
|
|
|
66 |
super_reward = 9.0 - (state_desc["body_vel"]["pelvis"][0] - 3.0) ** 2 |
|
|
67 |
|
|
|
68 |
reward = super_reward * 0.5 + 1 + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0] |
|
|
69 |
|
|
|
70 |
front_foot = state_desc["body_pos"]["pros_foot_r"][0] |
|
|
71 |
back_foot = state_desc["body_pos"]["toes_l"][0] |
|
|
72 |
dist = max(0.0, front_foot - back_foot - 0.9) |
|
|
73 |
reward -= dist * 40 |
|
|
74 |
|
|
|
75 |
lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2) |
|
|
76 |
reward -= lean_back * 40 |
|
|
77 |
|
|
|
78 |
pelvis = state_desc["body_pos"]["pelvis"][1] |
|
|
79 |
reward -= max(0, 0.70 - pelvis) * 100 |
|
|
80 |
|
|
|
81 |
return reward * 0.05 |
|
|
82 |
|
|
|
83 |
|
|
|
84 |
def course_5(self): |
|
|
85 |
state_desc = self.get_state_desc() |
|
|
86 |
prev_state_desc = self.get_prev_state_desc() |
|
|
87 |
if not prev_state_desc: |
|
|
88 |
return 0 |
|
|
89 |
|
|
|
90 |
pelvis_vx = state_desc["body_vel"]["pelvis"][0] |
|
|
91 |
if pelvis_vx < 1.0: |
|
|
92 |
reward = -1 |
|
|
93 |
else: |
|
|
94 |
reward = 9.0 - (pelvis_vx - 3.0) ** 2 |
|
|
95 |
|
|
|
96 |
front_foot = state_desc["body_pos"]["pros_foot_r"][0] |
|
|
97 |
back_foot = state_desc["body_pos"]["toes_l"][0] |
|
|
98 |
dist = max(0.0, front_foot - back_foot - 0.9) |
|
|
99 |
reward -= dist * 40 |
|
|
100 |
|
|
|
101 |
lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2) |
|
|
102 |
reward -= lean_back * 40 |
|
|
103 |
|
|
|
104 |
pelvis = state_desc["body_pos"]["pelvis"][1] |
|
|
105 |
reward -= max(0, 0.7 - pelvis) * 100 |
|
|
106 |
|
|
|
107 |
pelvis_z = abs(state_desc["body_pos"]["pelvis"][2]) |
|
|
108 |
reward -= max(0, pelvis_z - 0.6) * 100 |
|
|
109 |
|
|
|
110 |
return reward * 0.05 |