--- a +++ b/rdpg/gym_rdpg.py @@ -0,0 +1,63 @@ +from rdpg import * +import opensim as osim +from osim.http.client import Client +from osim.env import * +from history import History +ENV_NAME = 'learning_to_run' +PATH = 'models/' +EPISODES = 100000 +TEST = 5 + +def main(): + env = RunEnv(visualize=False) + env.reset(difficulty = 0) + agent = RDPG(env) + + returns = [] + rewards = [] + + for episode in xrange(EPISODES): + state = env.reset(difficulty = 0) + reward_episode = [] + print "episode:",episode + #Initializing empty history + history = History(state) + # Train + for step in xrange(env.spec.timestep_limit): + action = agent.noise_action(history) + next_state,reward,done,_ = env.step(action) + # appending to history + history.append(next_state,action,reward) + reward_episode.append(reward) + if done: + break + # storing the history into replay buffer and if the number of histories sequence is above the threshod, start training + agent.perceive(history) + # Testing: + #if episode % 1 == 0: + # if episode % 1000 == 0 and episode > 50: + # agent.save_model(PATH, episode) + + # total_return = 0 + # ave_reward = 0 + # for i in xrange(TEST): + # state = env.reset() + # reward_per_step = 0 + # for j in xrange(env.spec.timestep_limit): + # action = agent.action(state) # direct action for test + # state,reward,done,_ = env.step(action) + # total_return += reward + # if done: + # break + # reward_per_step += (reward - reward_per_step)/(j+1) + # ave_reward += reward_per_step + + # ave_return = total_return/TEST + # ave_reward = ave_reward/TEST + # returns.append(ave_return) + # rewards.append(ave_reward) + + # print 'episode: ',episode,'Evaluation Average Return:',ave_return, ' Evaluation Average Reward: ', ave_reward + +if __name__ == '__main__': + main()