|
a |
|
b/examples/legacy/train.arm.py |
|
|
1 |
# Derived from keras-rl |
|
|
2 |
import opensim as osim |
|
|
3 |
import numpy as np |
|
|
4 |
import sys |
|
|
5 |
|
|
|
6 |
from keras.models import Sequential, Model |
|
|
7 |
from keras.layers import Dense, Activation, Flatten, Input, concatenate |
|
|
8 |
from keras.optimizers import Adam |
|
|
9 |
|
|
|
10 |
import numpy as np |
|
|
11 |
|
|
|
12 |
from rl.agents import DDPGAgent |
|
|
13 |
from rl.memory import SequentialMemory |
|
|
14 |
from rl.random import OrnsteinUhlenbeckProcess |
|
|
15 |
|
|
|
16 |
from osim.env.arm import ArmEnv |
|
|
17 |
|
|
|
18 |
from keras.optimizers import RMSprop |
|
|
19 |
|
|
|
20 |
import argparse |
|
|
21 |
import math |
|
|
22 |
|
|
|
23 |
# Command line parameters |
|
|
24 |
parser = argparse.ArgumentParser(description='Train or test neural net motor controller') |
|
|
25 |
parser.add_argument('--train', dest='train', action='store_true', default=True) |
|
|
26 |
parser.add_argument('--test', dest='train', action='store_false', default=True) |
|
|
27 |
parser.add_argument('--steps', dest='steps', action='store', default=10000, type=int) |
|
|
28 |
parser.add_argument('--visualize', dest='visualize', action='store_true', default=False) |
|
|
29 |
parser.add_argument('--model', dest='model', action='store', default="example.h5f") |
|
|
30 |
args = parser.parse_args() |
|
|
31 |
|
|
|
32 |
# Load walking environment |
|
|
33 |
env = ArmEnv(args.visualize) |
|
|
34 |
env.reset() |
|
|
35 |
|
|
|
36 |
nb_actions = env.action_space.shape[0] |
|
|
37 |
|
|
|
38 |
# Total number of steps in training |
|
|
39 |
nallsteps = args.steps |
|
|
40 |
|
|
|
41 |
# Create networks for DDPG |
|
|
42 |
# Next, we build a very simple model. |
|
|
43 |
actor = Sequential() |
|
|
44 |
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) |
|
|
45 |
actor.add(Dense(32)) |
|
|
46 |
actor.add(Activation('relu')) |
|
|
47 |
actor.add(Dense(32)) |
|
|
48 |
actor.add(Activation('relu')) |
|
|
49 |
actor.add(Dense(32)) |
|
|
50 |
actor.add(Activation('relu')) |
|
|
51 |
actor.add(Dense(nb_actions)) |
|
|
52 |
actor.add(Activation('sigmoid')) |
|
|
53 |
print(actor.summary()) |
|
|
54 |
|
|
|
55 |
action_input = Input(shape=(nb_actions,), name='action_input') |
|
|
56 |
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') |
|
|
57 |
flattened_observation = Flatten()(observation_input) |
|
|
58 |
x = concatenate([action_input, flattened_observation]) |
|
|
59 |
x = Dense(64)(x) |
|
|
60 |
x = Activation('relu')(x) |
|
|
61 |
x = Dense(64)(x) |
|
|
62 |
x = Activation('relu')(x) |
|
|
63 |
x = Dense(64)(x) |
|
|
64 |
x = Activation('relu')(x) |
|
|
65 |
x = Dense(1)(x) |
|
|
66 |
x = Activation('linear')(x) |
|
|
67 |
critic = Model(inputs=[action_input, observation_input], outputs=x) |
|
|
68 |
print(critic.summary()) |
|
|
69 |
|
|
|
70 |
# Set up the agent for training |
|
|
71 |
memory = SequentialMemory(limit=100000, window_length=1) |
|
|
72 |
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) |
|
|
73 |
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, |
|
|
74 |
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, |
|
|
75 |
random_process=random_process, gamma=.99, target_model_update=1e-3, |
|
|
76 |
delta_clip=1.) |
|
|
77 |
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, |
|
|
78 |
# memory=memory, nb_steps_warmup=1000, random_process=random_process, |
|
|
79 |
# gamma=.99, target_model_update=0.1) |
|
|
80 |
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) |
|
|
81 |
|
|
|
82 |
# Okay, now it's time to learn something! We visualize the training here for show, but this |
|
|
83 |
# slows down training quite a lot. You can always safely abort the training prematurely using |
|
|
84 |
# Ctrl + C. |
|
|
85 |
if args.train: |
|
|
86 |
agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) |
|
|
87 |
# After training is done, we save the final weights. |
|
|
88 |
agent.save_weights(args.model, overwrite=True) |
|
|
89 |
|
|
|
90 |
if not args.train: |
|
|
91 |
agent.load_weights(args.model) |
|
|
92 |
# Finally, evaluate our algorithm for 1 episode. |
|
|
93 |
agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000) |