--- a +++ b/examples/train_arm.py @@ -0,0 +1,99 @@ +# Derived from keras-rl +import opensim as osim +import numpy as np +import sys + +from keras.models import Sequential, Model +from keras.layers import Dense, Activation, Flatten, Input, concatenate +from keras.optimizers import Adam + +import numpy as np + +from rl.agents import DDPGAgent +from rl.memory import SequentialMemory +from rl.random import OrnsteinUhlenbeckProcess + +from osim.env.arm import Arm2DVecEnv + +from keras.optimizers import RMSprop + +import argparse +import math + +# Command line parameters +parser = argparse.ArgumentParser(description='Train or test neural net motor controller') +parser.add_argument('--train', dest='train', action='store_true', default=True) +parser.add_argument('--test', dest='train', action='store_false', default=True) +parser.add_argument('--steps', dest='steps', action='store', default=10000, type=int) +parser.add_argument('--visualize', dest='visualize', action='store_true', default=False) +parser.add_argument('--model', dest='model', action='store', default="example.h5f") +args = parser.parse_args() + +# set to get observation in array +#def _new_step(self, action, project=True, obs_as_dict=False): +# return super(Arm2DEnv, self).step(action, project=project, obs_as_dict=obs_as_dict) +#Arm2DEnv.step = _new_step +# Load walking environment +env = Arm2DVecEnv(args.visualize) +#env = Arm2DVecEnv(visualize=True) +env.reset() +#env.reset(verbose=True, logfile='arm_log.txt') + +nb_actions = env.action_space.shape[0] + +# Total number of steps in training +nallsteps = args.steps + +# Create networks for DDPG +# Next, we build a very simple model. +actor = Sequential() +actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) +actor.add(Dense(32)) +actor.add(Activation('relu')) +actor.add(Dense(32)) +actor.add(Activation('relu')) +actor.add(Dense(32)) +actor.add(Activation('relu')) +actor.add(Dense(nb_actions)) +actor.add(Activation('sigmoid')) +print(actor.summary()) + +action_input = Input(shape=(nb_actions,), name='action_input') +observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') +flattened_observation = Flatten()(observation_input) +x = concatenate([action_input, flattened_observation]) +x = Dense(64)(x) +x = Activation('relu')(x) +x = Dense(64)(x) +x = Activation('relu')(x) +x = Dense(64)(x) +x = Activation('relu')(x) +x = Dense(1)(x) +x = Activation('linear')(x) +critic = Model(inputs=[action_input, observation_input], outputs=x) +print(critic.summary()) + +# Set up the agent for training +memory = SequentialMemory(limit=100000, window_length=1) +random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) +agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, + memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, + random_process=random_process, gamma=.99, target_model_update=1e-3, + delta_clip=1.) +# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, +# memory=memory, nb_steps_warmup=1000, random_process=random_process, +# gamma=.99, target_model_update=0.1) +agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) + +# Okay, now it's time to learn something! We visualize the training here for show, but this +# slows down training quite a lot. You can always safely abort the training prematurely using +# Ctrl + C. +if args.train: + agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) + # After training is done, we save the final weights. + agent.save_weights(args.model, overwrite=True) + +if not args.train: + agent.load_weights(args.model) + # Finally, evaluate our algorithm for 1 episode. + agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)