a b/cmaes/optim_par_L2M2019Ctrl_2D.py
1
from osim.env import L2M2019Env
2
from osim.control.osim_loco_reflex_song2019 import OsimReflexCtrl
3
from joblib import Parallel, delayed
4
5
import sys
6
import numpy as np
7
8
trial_name = 'trial_190510_L2M2019CtrlEnv_2D_d0_'
9
10
params = np.ones(37)
11
#params = np.loadtxt('./optim_data/cma/trial_181029_walk_3D_noStand_8_best.txt')
12
N_POP = 16 # 8 = 4 + floor(3*log(37))
13
N_PROC = 2
14
TIMEOUT = 10*60
15
16
init_pose = np.array([1.5, .9, 10*np.pi/180, # forward speed, pelvis height, trunk lean
17
        -3*np.pi/180, -30*np.pi/180, -10*np.pi/180, 10*np.pi/180, # [right] hip abduct, hip extend, knee extend, ankle extend
18
        -3*np.pi/180, 5*np.pi/180, -40*np.pi/180, -0*np.pi/180]) # [left] hip abduct, hip extend, knee extend, ankle extend
19
        
20
def f_ind(n_gen, i_worker, params):
21
    flag_model = '2D'
22
    flag_ctrl_mode = '2D' # use 2D
23
    seed = None
24
    difficulty = 0
25
    sim_dt = 0.01
26
    sim_t = 20
27
    timstep_limit = int(round(sim_t/sim_dt))
28
29
    init_error = True
30
    error_count = 0
31
    while init_error:
32
        try:
33
            locoCtrl = OsimReflexCtrl(mode=flag_ctrl_mode, dt=sim_dt)
34
            env = L2M2019Env(seed=seed, difficulty=difficulty, visualize=False)
35
            env.change_model(model=flag_model, difficulty=difficulty, seed=seed)
36
            obs_dict = env.reset(project=True, seed=seed, init_pose=init_pose, obs_as_dict=True)
37
            init_error = False
38
        except Exception as e_msg:
39
            error_count += 1
40
            print('\ninitialization error (x{})!!!'.format(error_count))
41
            #print(e_msg)
42
            #import pdb; pdb.set_trace()
43
    env.spec.timestep_limit = timstep_limit+100
44
45
    total_reward = 0
46
    error_sim = 0;
47
    t = 0
48
    while True:
49
        t += sim_dt
50
51
        locoCtrl.set_control_params(params)
52
        action = locoCtrl.update(obs_dict)
53
        obs_dict, reward, done, info = env.step(action, project=True, obs_as_dict=True)
54
        total_reward += reward
55
56
        if done:
57
            break
58
59
    print('\n    gen#={} sim#={}: score={} time={}sec #step={}'.format(n_gen, i_worker, total_reward, t, env.footstep['n']))
60
61
    return total_reward  # minimization
62
63
64
class CMATrainPar(object):
65
    def __init__(self, ):
66
        self.n_gen = 0
67
        self.best_total_reward = -np.inf
68
69
    def f(self, v_params):
70
        self.n_gen += 1
71
        timeout_error = True
72
        error_count = 0
73
        while timeout_error:
74
            try:
75
                v_total_reward = Parallel(n_jobs=N_PROC, timeout=TIMEOUT)\
76
                (delayed(f_ind)(self.n_gen, i, p) for i, p in enumerate(v_params))
77
                timeout_error = False
78
            except Exception as e_msg:
79
                error_count += 1
80
                print('\ntimeout error (x{})!!!'.format(error_count))
81
                #print(e_msg)
82
83
        for total_reward in v_total_reward:
84
            if self.best_total_reward  < total_reward:
85
                filename = "./optim_data/cma/" + trial_name + "best_w.txt"
86
                print("\n")
87
                print("----")
88
                print("update the best score!!!!")
89
                print("\tprev = %.8f" % self.best_total_reward )
90
                print("\tcurr = %.8f" % total_reward)
91
                print("\tsave to [%s]" % filename)
92
                print("----")
93
                print("")
94
                self.best_total_reward  = total_reward
95
                np.savetxt(filename, params)
96
97
        return [-r for r in v_total_reward]
98
99
if __name__ == '__main__':
100
    prob = CMATrainPar()
101
102
    from cmaes.solver_cma import CMASolverPar
103
    solver = CMASolverPar(prob)
104
105
    solver.options.set("popsize", N_POP)
106
    solver.options.set("maxiter", 400)
107
    solver.options.set("verb_filenameprefix", 'optim_data/cma/' + trial_name)
108
    solver.set_verbose(True)
109
110
    x0 = params
111
    sigma = .01
112
113
    res = solver.solve(x0, sigma)
114
    print(res)