Switch to unified view

a b/examples/under-construction/ue.py
1
import os
2
from osim.env import OsimEnv
3
import pprint
4
import numpy as np
5
from keras.models import load_model
6
7
class Arm3dEnv(OsimEnv):
8
    model_path = os.path.join(os.path.dirname(__file__), '../osim/models/ue_RL.osim')
9
    time_limit = 200
10
    current_objective = np.array([0,0,0])
11
    
12
    def is_done(self):
13
        # End the simulation if the pelvis is too low
14
        state_desc = self.get_state_desc()
15
        return False
16
17
    def get_observation(self):
18
        state_desc = self.get_state_desc()
19
20
        # Augmented environment from the L2R challenge
21
        res = []
22
23
        # Map some of the state variables to the observation vector
24
        for body_part in state_desc["body_pos_rot"].keys():
25
            res = res + state_desc["body_pos_rot"][body_part][2:]
26
            res = res + state_desc["body_pos"][body_part][0:2]
27
            res = res + state_desc["body_vel_rot"][body_part][2:]
28
            res = res + state_desc["body_vel"][body_part][0:2]
29
            res = res + state_desc["body_acc_rot"][body_part][2:]
30
            res = res + state_desc["body_acc"][body_part][0:2]
31
32
        for joint in state_desc["joint_pos"].keys():
33
            res = res + state_desc["joint_pos"][joint]
34
            res = res + state_desc["joint_vel"][joint]
35
            res = res + state_desc["joint_acc"][joint]
36
37
        res = res + state_desc["misc"]["mass_center_pos"] + state_desc["misc"]["mass_center_vel"] + state_desc["misc"]["mass_center_acc"]
38
        res += self.current_objective.tolist()
39
40
        res = np.array(res)
41
        res[np.isnan(res)] = 0
42
43
        return res
44
45
    def get_observation_space_size(self):
46
        return 168
47
48
    def reset_objective(self):
49
        self.current_objective = np.random.uniform(-0.5,0.5,3)
50
51
    def reset(self):
52
        print(self.reward())
53
        self.reset_objective()
54
        return super(Arm3dEnv, self).reset()
55
56
    def reward(self):
57
        # Get the current state and the last state
58
        prev_state_desc = self.get_prev_state_desc()
59
        if not prev_state_desc:
60
            return 0
61
        state_desc = self.get_state_desc()
62
63
        res = 0
64
65
        # # Penalize movement of the pelvis
66
        # res = -(prev_state_desc["misc"]["mass_center_pos"][0] - state_desc["misc"]["mass_center_pos"][0])**2\
67
        #       -(prev_state_desc["misc"]["mass_center_pos"][1] - state_desc["misc"]["mass_center_pos"][1])**2
68
69
        # # Penalize very low position of the pelvis
70
        # res += -(state_desc["joint_pos"]["ground_pelvis"][2] < 0.8)
71
        
72
        return -np.linalg.norm(np.array(state_desc["markers"]["Handle"]["pos"]) - self.current_objective)
73
74
env = Arm3dEnv(visualize=True, integrator_accuracy=1e-4)
75
76
if __name__ == '__main__':
77
    observation = env.reset()
78
79
    # returns a compiled model
80
    # identical to the previous one
81
    model = load_model('/home/lukasz/nnregression.h5')
82
    print(model.summary())
83
84
    for i in range(200):
85
        action = env.action_space.sample()
86
        observation, reward, done, info = env.step(action)
87
        if done:
88
            env.reset()