Diff of /nips/course.py [000000] .. [f9c9f2]

Switch to side-by-side view

--- a
+++ b/nips/course.py
@@ -0,0 +1,110 @@
+
+def course_0(self):
+    state_desc = self.get_state_desc()
+    prev_state_desc = self.get_prev_state_desc()
+    if not prev_state_desc:
+        return 0
+
+    reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 4 + 2
+
+    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.0)
+    reward -= lean_back * 40
+
+    pelvis = state_desc["body_pos"]["pelvis"][1]
+    reward -= max(0, 0.70 - pelvis) * 100
+
+    return reward * 0.05
+
+
+def course_1(self):
+    state_desc = self.get_state_desc()
+    prev_state_desc = self.get_prev_state_desc()
+    if not prev_state_desc:
+        return 0
+
+    reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 2 + 2 \
+             + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0]
+
+    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
+    reward -= lean_back * 40
+
+    pelvis = state_desc["body_pos"]["pelvis"][1]
+    reward -= max(0, 0.70 - pelvis) * 100
+
+    return reward * 0.05
+
+
+def course_2_3(self):
+    state_desc = self.get_state_desc()
+    prev_state_desc = self.get_prev_state_desc()
+    if not prev_state_desc:
+        return 0
+
+    reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 2 + 2 \
+             + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0]
+
+    front_foot = state_desc["body_pos"]["pros_foot_r"][0]
+    back_foot = state_desc["body_pos"]["toes_l"][0]
+    dist = max(0.0, front_foot - back_foot - 0.9)
+    reward -= dist * 40
+
+    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
+    reward -= lean_back * 40
+
+    pelvis = state_desc["body_pos"]["pelvis"][1]
+    reward -= max(0, 0.70 - pelvis) * 100
+
+    return reward * 0.05
+
+
+def course_4(self):
+    state_desc = self.get_state_desc()
+    prev_state_desc = self.get_prev_state_desc()
+    if not prev_state_desc:
+        return 0
+
+    super_reward = 9.0 - (state_desc["body_vel"]["pelvis"][0] - 3.0) ** 2
+
+    reward = super_reward * 0.5 + 1 + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0]
+
+    front_foot = state_desc["body_pos"]["pros_foot_r"][0]
+    back_foot = state_desc["body_pos"]["toes_l"][0]
+    dist = max(0.0, front_foot - back_foot - 0.9)
+    reward -= dist * 40
+
+    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
+    reward -= lean_back * 40
+
+    pelvis = state_desc["body_pos"]["pelvis"][1]
+    reward -= max(0, 0.70 - pelvis) * 100
+
+    return reward * 0.05
+
+
+def course_5(self):
+    state_desc = self.get_state_desc()
+    prev_state_desc = self.get_prev_state_desc()
+    if not prev_state_desc:
+        return 0
+
+    pelvis_vx = state_desc["body_vel"]["pelvis"][0]
+    if pelvis_vx < 1.0:
+        reward = -1
+    else:
+        reward = 9.0 - (pelvis_vx - 3.0) ** 2
+
+    front_foot = state_desc["body_pos"]["pros_foot_r"][0]
+    back_foot = state_desc["body_pos"]["toes_l"][0]
+    dist = max(0.0, front_foot - back_foot - 0.9)
+    reward -= dist * 40
+
+    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
+    reward -= lean_back * 40
+
+    pelvis = state_desc["body_pos"]["pelvis"][1]
+    reward -= max(0, 0.7 - pelvis) * 100
+
+    pelvis_z = abs(state_desc["body_pos"]["pelvis"][2])
+    reward -= max(0, pelvis_z - 0.6) * 100
+
+    return reward * 0.05