Switch to unified view

a b/examples/legacy/train.arm.py
1
# Derived from keras-rl
2
import opensim as osim
3
import numpy as np
4
import sys
5
6
from keras.models import Sequential, Model
7
from keras.layers import Dense, Activation, Flatten, Input, concatenate
8
from keras.optimizers import Adam
9
10
import numpy as np
11
12
from rl.agents import DDPGAgent
13
from rl.memory import SequentialMemory
14
from rl.random import OrnsteinUhlenbeckProcess
15
16
from osim.env.arm import ArmEnv
17
18
from keras.optimizers import RMSprop
19
20
import argparse
21
import math
22
23
# Command line parameters
24
parser = argparse.ArgumentParser(description='Train or test neural net motor controller')
25
parser.add_argument('--train', dest='train', action='store_true', default=True)
26
parser.add_argument('--test', dest='train', action='store_false', default=True)
27
parser.add_argument('--steps', dest='steps', action='store', default=10000, type=int)
28
parser.add_argument('--visualize', dest='visualize', action='store_true', default=False)
29
parser.add_argument('--model', dest='model', action='store', default="example.h5f")
30
args = parser.parse_args()
31
32
# Load walking environment
33
env = ArmEnv(args.visualize)
34
env.reset()
35
36
nb_actions = env.action_space.shape[0]
37
38
# Total number of steps in training
39
nallsteps = args.steps
40
41
# Create networks for DDPG
42
# Next, we build a very simple model.
43
actor = Sequential()
44
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
45
actor.add(Dense(32))
46
actor.add(Activation('relu'))
47
actor.add(Dense(32))
48
actor.add(Activation('relu'))
49
actor.add(Dense(32))
50
actor.add(Activation('relu'))
51
actor.add(Dense(nb_actions))
52
actor.add(Activation('sigmoid'))
53
print(actor.summary())
54
55
action_input = Input(shape=(nb_actions,), name='action_input')
56
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
57
flattened_observation = Flatten()(observation_input)
58
x = concatenate([action_input, flattened_observation])
59
x = Dense(64)(x)
60
x = Activation('relu')(x)
61
x = Dense(64)(x)
62
x = Activation('relu')(x)
63
x = Dense(64)(x)
64
x = Activation('relu')(x)
65
x = Dense(1)(x)
66
x = Activation('linear')(x)
67
critic = Model(inputs=[action_input, observation_input], outputs=x)
68
print(critic.summary())
69
70
# Set up the agent for training
71
memory = SequentialMemory(limit=100000, window_length=1)
72
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
73
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
74
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
75
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
76
                  delta_clip=1.)
77
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
78
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
79
#                            gamma=.99, target_model_update=0.1)
80
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
81
82
# Okay, now it's time to learn something! We visualize the training here for show, but this
83
# slows down training quite a lot. You can always safely abort the training prematurely using
84
# Ctrl + C.
85
if args.train:
86
    agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000)
87
    # After training is done, we save the final weights.
88
    agent.save_weights(args.model, overwrite=True)
89
90
if not args.train:
91
    agent.load_weights(args.model)
92
    # Finally, evaluate our algorithm for 1 episode.
93
    agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)