Diff of /nips/course.py [000000] .. [f9c9f2]

Switch to unified view

a b/nips/course.py
1
2
def course_0(self):
3
    state_desc = self.get_state_desc()
4
    prev_state_desc = self.get_prev_state_desc()
5
    if not prev_state_desc:
6
        return 0
7
8
    reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 4 + 2
9
10
    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.0)
11
    reward -= lean_back * 40
12
13
    pelvis = state_desc["body_pos"]["pelvis"][1]
14
    reward -= max(0, 0.70 - pelvis) * 100
15
16
    return reward * 0.05
17
18
19
def course_1(self):
20
    state_desc = self.get_state_desc()
21
    prev_state_desc = self.get_prev_state_desc()
22
    if not prev_state_desc:
23
        return 0
24
25
    reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 2 + 2 \
26
             + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0]
27
28
    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
29
    reward -= lean_back * 40
30
31
    pelvis = state_desc["body_pos"]["pelvis"][1]
32
    reward -= max(0, 0.70 - pelvis) * 100
33
34
    return reward * 0.05
35
36
37
def course_2_3(self):
38
    state_desc = self.get_state_desc()
39
    prev_state_desc = self.get_prev_state_desc()
40
    if not prev_state_desc:
41
        return 0
42
43
    reward = min(3.0, state_desc["body_vel"]["pelvis"][0]) * 2 + 2 \
44
             + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0]
45
46
    front_foot = state_desc["body_pos"]["pros_foot_r"][0]
47
    back_foot = state_desc["body_pos"]["toes_l"][0]
48
    dist = max(0.0, front_foot - back_foot - 0.9)
49
    reward -= dist * 40
50
51
    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
52
    reward -= lean_back * 40
53
54
    pelvis = state_desc["body_pos"]["pelvis"][1]
55
    reward -= max(0, 0.70 - pelvis) * 100
56
57
    return reward * 0.05
58
59
60
def course_4(self):
61
    state_desc = self.get_state_desc()
62
    prev_state_desc = self.get_prev_state_desc()
63
    if not prev_state_desc:
64
        return 0
65
66
    super_reward = 9.0 - (state_desc["body_vel"]["pelvis"][0] - 3.0) ** 2
67
68
    reward = super_reward * 0.5 + 1 + state_desc["body_vel"]["pros_foot_r"][0] + state_desc["body_vel"]["toes_l"][0]
69
70
    front_foot = state_desc["body_pos"]["pros_foot_r"][0]
71
    back_foot = state_desc["body_pos"]["toes_l"][0]
72
    dist = max(0.0, front_foot - back_foot - 0.9)
73
    reward -= dist * 40
74
75
    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
76
    reward -= lean_back * 40
77
78
    pelvis = state_desc["body_pos"]["pelvis"][1]
79
    reward -= max(0, 0.70 - pelvis) * 100
80
81
    return reward * 0.05
82
83
84
def course_5(self):
85
    state_desc = self.get_state_desc()
86
    prev_state_desc = self.get_prev_state_desc()
87
    if not prev_state_desc:
88
        return 0
89
90
    pelvis_vx = state_desc["body_vel"]["pelvis"][0]
91
    if pelvis_vx < 1.0:
92
        reward = -1
93
    else:
94
        reward = 9.0 - (pelvis_vx - 3.0) ** 2
95
96
    front_foot = state_desc["body_pos"]["pros_foot_r"][0]
97
    back_foot = state_desc["body_pos"]["toes_l"][0]
98
    dist = max(0.0, front_foot - back_foot - 0.9)
99
    reward -= dist * 40
100
101
    lean_back = max(0, state_desc["body_pos"]["pelvis"][0] - state_desc["body_pos"]["head"][0] - 0.2)
102
    reward -= lean_back * 40
103
104
    pelvis = state_desc["body_pos"]["pelvis"][1]
105
    reward -= max(0, 0.7 - pelvis) * 100
106
107
    pelvis_z = abs(state_desc["body_pos"]["pelvis"][2])
108
    reward -= max(0, pelvis_z - 0.6) * 100
109
110
    return reward * 0.05