Diff of /rdpg/gym_rdpg.py [000000] .. [687a25]

Switch to unified view

a b/rdpg/gym_rdpg.py
1
from rdpg import *
2
import opensim as osim
3
from osim.http.client import Client
4
from osim.env import *
5
from history import History
6
ENV_NAME = 'learning_to_run'
7
PATH = 'models/'
8
EPISODES = 100000
9
TEST = 5
10
11
def main():
12
    env = RunEnv(visualize=False)
13
    env.reset(difficulty = 0)
14
    agent = RDPG(env)
15
16
    returns = []
17
    rewards = []
18
19
    for episode in xrange(EPISODES):
20
        state = env.reset(difficulty = 0)
21
        reward_episode = []
22
        print "episode:",episode
23
        #Initializing empty history
24
        history = History(state)
25
        # Train
26
        for step in xrange(env.spec.timestep_limit):
27
            action = agent.noise_action(history)
28
            next_state,reward,done,_ = env.step(action)
29
            # appending to history
30
            history.append(next_state,action,reward)
31
            reward_episode.append(reward)
32
            if done:
33
                break
34
        # storing the history into replay buffer and if the number of histories sequence is above the threshod, start training
35
        agent.perceive(history)
36
        # Testing:
37
        #if episode % 1 == 0:
38
        # if episode % 1000 == 0 and episode > 50:
39
        #     agent.save_model(PATH, episode)
40
41
        #     total_return = 0
42
        #     ave_reward = 0
43
        #     for i in xrange(TEST):
44
        #         state = env.reset()
45
        #         reward_per_step = 0
46
        #         for j in xrange(env.spec.timestep_limit):
47
        #             action = agent.action(state) # direct action for test
48
        #             state,reward,done,_ = env.step(action)
49
        #             total_return += reward
50
        #             if done:
51
        #                 break
52
        #             reward_per_step += (reward - reward_per_step)/(j+1)
53
        #         ave_reward += reward_per_step
54
55
        #     ave_return = total_return/TEST
56
        #     ave_reward = ave_reward/TEST
57
        #     returns.append(ave_return)
58
        #     rewards.append(ave_reward)
59
60
        #     print 'episode: ',episode,'Evaluation Average Return:',ave_return, '  Evaluation Average Reward: ', ave_reward
61
62
if __name__ == '__main__':
63
    main()