Diff of /ddpg/helper.py [000000] .. [687a25]

Switch to unified view

a b/ddpg/helper.py
1
import numpy as np
2
import tensorflow as tf
3
# Helper Function------------------------------------------------------------------------------------------------------------
4
# Copies one set of variables to another.
5
# Used to set worker network parameters to those of global network.
6
7
def dlrelu(x, alpha=0.1):
8
  return tf.nn.relu(x) - alpha * tf.nn.relu(0.05-x) - (1 - alpha) *  tf.nn.relu(x-0.95) 
9
10
class RunningStats:
11
12
    def __init__(self):
13
        self.n = 0
14
        self.old_m = 0
15
        self.new_m = 0
16
        self.old_s = 0
17
        self.new_s = 0
18
19
    def clear(self):
20
        self.n = 0
21
22
    def push(self, x):
23
        self.n += 1
24
25
        if self.n == 1:
26
            self.old_m = self.new_m = x
27
            self.old_s = 0
28
        else:
29
            self.new_m = self.old_m + (x - self.old_m) / self.n
30
            self.new_s = self.old_s + (x - self.old_m) * (x - self.new_m)
31
32
            self.old_m = self.new_m
33
            self.old_s = self.new_s
34
35
    def mean(self):
36
        return self.new_m if self.n else 0.0
37
38
    def variance(self):
39
        return self.new_s / (self.n - 1) if self.n > 1 else 0.0
40
41
    def standard_deviation(self):
42
        return np.sqrt(self.variance())
43
        
44
    def normalize(self,x):
45
        self.push(x)
46
        return (x - self.mean()) / (self.standard_deviation()+1e-3) if self.n > 1 else x
47
48
49
50
# process state (the last 3 entires are obstacle info which should not be processed)
51
def process_state(s,s1,center=True,diff=0):
52
    s = np.asarray(s)
53
    s1 = np.asarray(s1)
54
    s_14 = (s1[22:36]-s[22:36]) / 0.01
55
    s_3 = (s1[38:]-s[38:]) / 0.01
56
    s = np.hstack((s1[:36],s_14,s1[36:],s_3))
57
    if diff == 0:
58
        s[-6:] = 0.0 # if diff = 0, then manully turn off all obstacles
59
60
    if center:
61
      # transform into all relative quantities
62
      x_pos = [1,22,24,26,28,30,32,34]
63
      y_pos = [i+1 for i in x_pos]
64
      for i in x_pos:
65
          s[i] -= s[18]
66
      for j in y_pos:
67
          s[j] -= s[19]
68
      
69
      x_vs = [i+14 for i in x_pos]
70
      x_vs[0] = 4
71
      y_vs = [i+1 for i in x_vs]
72
      for i in x_vs:
73
          s[i] -= s[20]
74
      for j in y_vs:
75
          s[j] -= s[21]
76
      # transform cm as origin
77
      s[18:22] = 0.0
78
        
79
    return s
80
        
81
def n_step_transition(episode_buffer,n_step,gamma):
82
    _,_,_,s1,done = episode_buffer[-1]
83
    s,action,_,_,_ = episode_buffer[-1-n_step]
84
    r = 0
85
    for i in range(n_step):
86
      r += episode_buffer[-1-n_step+i][2]*gamma**i
87
    return [s,action,r,s1,done]
88
89
def engineered_action(seed):
90
    test = np.ones(18)*0.05
91
    if seed < 0.5:
92
        test[0] = 0.3
93
        test[3] = 0.8
94
        test[4] = 0.5
95
        test[6] = 0.3
96
        test[8] = 0.8
97
        test[9] = 0.3
98
        test[11] = 0.5
99
        test[14] = 0.3
100
        test[17] = 0.5
101
    else:
102
        test[9] = 0.3
103
        test[12] = 0.8
104
        test[13] = 0.5
105
        test[15] = 0.3
106
        test[17] = 0.8
107
        test[0] = 0.3
108
        test[2] = 0.5
109
        test[3] = 0.3
110
        test[8] = 0.5
111
            
112
    return test
113
114
# [Hacked] the memory might always be leaking, here's a solution #58
115
# https://github.com/stanfordnmbl/osim-rl/issues/58 
116
# separate process that holds a separate RunEnv instance.
117
# This has to be done since RunEnv() in the same process result in interleaved running of simulations.
118
119
import opensim as osim
120
from osim.http.client import Client
121
from osim.env import *
122
123
import multiprocessing
124
from multiprocessing import Process, Pipe
125
126
def standalone_headless_isolated(conn,vis,seed,diff):
127
    e = RunEnv(visualize=vis)
128
    while True:
129
        try:
130
            msg = conn.recv()
131
132
            # messages should be tuples,
133
            # msg[0] should be string
134
135
            if msg[0] == 'reset':
136
                o = e.reset(difficulty=diff,seed=seed)
137
                conn.send(o)
138
            elif msg[0] == 'step':
139
                ordi = e.step(msg[1])
140
                conn.send(ordi)
141
            else:
142
                conn.close()
143
                del e
144
                return
145
        except:
146
            conn.close()
147
            del e
148
            raise
149
150
# class that manages the interprocess communication and expose itself as a RunEnv.
151
class ei: # Environment Instance
152
    def __init__(self,vis,seed,diff):
153
        self.pc, self.cc = Pipe()
154
        self.p = Process(
155
            target = standalone_headless_isolated,
156
            args=(self.cc,vis,seed,diff,)
157
        )
158
        self.p.daemon = True
159
        self.p.start()
160
161
    def reset(self):
162
        self.pc.send(('reset',))
163
        return self.pc.recv()
164
165
    def step(self,actions):
166
        self.pc.send(('step',actions,))
167
        try:
168
            return self.pc.recv()
169
        except :  
170
            print('Error in recv()')
171
            raise
172
173
    def __del__(self):
174
        self.pc.send(('exit',))
175
        #print('(ei)waiting for join...')
176
        self.p.join()
177
    try:
178
        del self.pc
179
        del self.cc
180
        del self.p
181
    except:
182
        raise
183
184