a b/examples/under-construction/newenv.py
1
import os
2
from osim.env import OsimEnv
3
4
## Define an environment where we teach an agent to stand still
5
# We use a walker model and define the objective to keep the center of mass still
6
class StandingEnv(OsimEnv):
7
    model_path = os.path.join(os.path.dirname(__file__), '../osim/models/gait9dof18musc.osim')    
8
    time_limit = 300
9
    ninput = 99
10
    
11
    def is_done(self):
12
        # End the simulation if the pelvis is too low
13
        state_desc = self.get_state_desc()
14
        return state_desc["joint_pos"]["ground_pelvis"][2] < 0.7
15
16
    def get_observation(self):
17
        state_desc = self.get_state_desc()
18
19
        # Augmented environment from the L2R challenge
20
        res = []
21
22
        # Map some of the state variables to the observation vector
23
        for body_part in ["pelvis","head","torso","toes_l","toes_r","talus_l","talus_r"]:
24
            res = res + state_desc["body_pos_rot"][body_part][2:]
25
            res = res + state_desc["body_pos"][body_part][0:2]
26
            res = res + state_desc["body_vel_rot"][body_part][2:]
27
            res = res + state_desc["body_vel"][body_part][0:2]
28
            res = res + state_desc["body_acc_rot"][body_part][2:]
29
            res = res + state_desc["body_acc"][body_part][0:2]
30
31
        for joint in ["ankle_l","ankle_r","back","ground_pelvis","hip_l","hip_r","knee_l","knee_r"]:
32
            res = res + state_desc["joint_pos"][joint]
33
            res = res + state_desc["joint_vel"][joint]
34
            res = res + state_desc["joint_acc"][joint]
35
36
        res = res + state_desc["misc"]["mass_center_pos"] + state_desc["misc"]["mass_center_vel"] + state_desc["misc"]["mass_center_acc"]
37
38
        return res
39
40
    def get_observation_space_size(self):
41
        return 99
42
43
    def reward(self):
44
        # Get the current state and the last state
45
        state_desc = self.get_state_desc()
46
        prev_state_desc = self.get_prev_state_desc()
47
        if not prev_state_desc:
48
            return 0
49
50
        # Penalize movement of the pelvis
51
        res = -(prev_state_desc["misc"]["mass_center_pos"][0] - state_desc["misc"]["mass_center_pos"][0])**2\
52
              -(prev_state_desc["misc"]["mass_center_pos"][1] - state_desc["misc"]["mass_center_pos"][1])**2
53
54
        # Penalize very low position of the pelvis
55
        res += -(state_desc["joint_pos"]["ground_pelvis"][2] < 0.8)
56
        
57
        return res
58
59
env = StandingEnv(visualize=True)
60
61
observation = env.reset()
62
for i in range(200):
63
    observation, reward, done, info = env.step(env.action_space.sample())
64
    print("Reward %f" % reward)
65
    if done:
66
        env.reset()