|
a |
|
b/ddpg/helper.py |
|
|
1 |
import numpy as np |
|
|
2 |
import tensorflow as tf |
|
|
3 |
# Helper Function------------------------------------------------------------------------------------------------------------ |
|
|
4 |
# Copies one set of variables to another. |
|
|
5 |
# Used to set worker network parameters to those of global network. |
|
|
6 |
|
|
|
7 |
def dlrelu(x, alpha=0.1): |
|
|
8 |
return tf.nn.relu(x) - alpha * tf.nn.relu(0.05-x) - (1 - alpha) * tf.nn.relu(x-0.95) |
|
|
9 |
|
|
|
10 |
class RunningStats: |
|
|
11 |
|
|
|
12 |
def __init__(self): |
|
|
13 |
self.n = 0 |
|
|
14 |
self.old_m = 0 |
|
|
15 |
self.new_m = 0 |
|
|
16 |
self.old_s = 0 |
|
|
17 |
self.new_s = 0 |
|
|
18 |
|
|
|
19 |
def clear(self): |
|
|
20 |
self.n = 0 |
|
|
21 |
|
|
|
22 |
def push(self, x): |
|
|
23 |
self.n += 1 |
|
|
24 |
|
|
|
25 |
if self.n == 1: |
|
|
26 |
self.old_m = self.new_m = x |
|
|
27 |
self.old_s = 0 |
|
|
28 |
else: |
|
|
29 |
self.new_m = self.old_m + (x - self.old_m) / self.n |
|
|
30 |
self.new_s = self.old_s + (x - self.old_m) * (x - self.new_m) |
|
|
31 |
|
|
|
32 |
self.old_m = self.new_m |
|
|
33 |
self.old_s = self.new_s |
|
|
34 |
|
|
|
35 |
def mean(self): |
|
|
36 |
return self.new_m if self.n else 0.0 |
|
|
37 |
|
|
|
38 |
def variance(self): |
|
|
39 |
return self.new_s / (self.n - 1) if self.n > 1 else 0.0 |
|
|
40 |
|
|
|
41 |
def standard_deviation(self): |
|
|
42 |
return np.sqrt(self.variance()) |
|
|
43 |
|
|
|
44 |
def normalize(self,x): |
|
|
45 |
self.push(x) |
|
|
46 |
return (x - self.mean()) / (self.standard_deviation()+1e-3) if self.n > 1 else x |
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
# process state (the last 3 entires are obstacle info which should not be processed) |
|
|
51 |
def process_state(s,s1,center=True,diff=0): |
|
|
52 |
s = np.asarray(s) |
|
|
53 |
s1 = np.asarray(s1) |
|
|
54 |
s_14 = (s1[22:36]-s[22:36]) / 0.01 |
|
|
55 |
s_3 = (s1[38:]-s[38:]) / 0.01 |
|
|
56 |
s = np.hstack((s1[:36],s_14,s1[36:],s_3)) |
|
|
57 |
if diff == 0: |
|
|
58 |
s[-6:] = 0.0 # if diff = 0, then manully turn off all obstacles |
|
|
59 |
|
|
|
60 |
if center: |
|
|
61 |
# transform into all relative quantities |
|
|
62 |
x_pos = [1,22,24,26,28,30,32,34] |
|
|
63 |
y_pos = [i+1 for i in x_pos] |
|
|
64 |
for i in x_pos: |
|
|
65 |
s[i] -= s[18] |
|
|
66 |
for j in y_pos: |
|
|
67 |
s[j] -= s[19] |
|
|
68 |
|
|
|
69 |
x_vs = [i+14 for i in x_pos] |
|
|
70 |
x_vs[0] = 4 |
|
|
71 |
y_vs = [i+1 for i in x_vs] |
|
|
72 |
for i in x_vs: |
|
|
73 |
s[i] -= s[20] |
|
|
74 |
for j in y_vs: |
|
|
75 |
s[j] -= s[21] |
|
|
76 |
# transform cm as origin |
|
|
77 |
s[18:22] = 0.0 |
|
|
78 |
|
|
|
79 |
return s |
|
|
80 |
|
|
|
81 |
def n_step_transition(episode_buffer,n_step,gamma): |
|
|
82 |
_,_,_,s1,done = episode_buffer[-1] |
|
|
83 |
s,action,_,_,_ = episode_buffer[-1-n_step] |
|
|
84 |
r = 0 |
|
|
85 |
for i in range(n_step): |
|
|
86 |
r += episode_buffer[-1-n_step+i][2]*gamma**i |
|
|
87 |
return [s,action,r,s1,done] |
|
|
88 |
|
|
|
89 |
def engineered_action(seed): |
|
|
90 |
test = np.ones(18)*0.05 |
|
|
91 |
if seed < 0.5: |
|
|
92 |
test[0] = 0.3 |
|
|
93 |
test[3] = 0.8 |
|
|
94 |
test[4] = 0.5 |
|
|
95 |
test[6] = 0.3 |
|
|
96 |
test[8] = 0.8 |
|
|
97 |
test[9] = 0.3 |
|
|
98 |
test[11] = 0.5 |
|
|
99 |
test[14] = 0.3 |
|
|
100 |
test[17] = 0.5 |
|
|
101 |
else: |
|
|
102 |
test[9] = 0.3 |
|
|
103 |
test[12] = 0.8 |
|
|
104 |
test[13] = 0.5 |
|
|
105 |
test[15] = 0.3 |
|
|
106 |
test[17] = 0.8 |
|
|
107 |
test[0] = 0.3 |
|
|
108 |
test[2] = 0.5 |
|
|
109 |
test[3] = 0.3 |
|
|
110 |
test[8] = 0.5 |
|
|
111 |
|
|
|
112 |
return test |
|
|
113 |
|
|
|
114 |
# [Hacked] the memory might always be leaking, here's a solution #58 |
|
|
115 |
# https://github.com/stanfordnmbl/osim-rl/issues/58 |
|
|
116 |
# separate process that holds a separate RunEnv instance. |
|
|
117 |
# This has to be done since RunEnv() in the same process result in interleaved running of simulations. |
|
|
118 |
|
|
|
119 |
import opensim as osim |
|
|
120 |
from osim.http.client import Client |
|
|
121 |
from osim.env import * |
|
|
122 |
|
|
|
123 |
import multiprocessing |
|
|
124 |
from multiprocessing import Process, Pipe |
|
|
125 |
|
|
|
126 |
def standalone_headless_isolated(conn,vis,seed,diff): |
|
|
127 |
e = RunEnv(visualize=vis) |
|
|
128 |
while True: |
|
|
129 |
try: |
|
|
130 |
msg = conn.recv() |
|
|
131 |
|
|
|
132 |
# messages should be tuples, |
|
|
133 |
# msg[0] should be string |
|
|
134 |
|
|
|
135 |
if msg[0] == 'reset': |
|
|
136 |
o = e.reset(difficulty=diff,seed=seed) |
|
|
137 |
conn.send(o) |
|
|
138 |
elif msg[0] == 'step': |
|
|
139 |
ordi = e.step(msg[1]) |
|
|
140 |
conn.send(ordi) |
|
|
141 |
else: |
|
|
142 |
conn.close() |
|
|
143 |
del e |
|
|
144 |
return |
|
|
145 |
except: |
|
|
146 |
conn.close() |
|
|
147 |
del e |
|
|
148 |
raise |
|
|
149 |
|
|
|
150 |
# class that manages the interprocess communication and expose itself as a RunEnv. |
|
|
151 |
class ei: # Environment Instance |
|
|
152 |
def __init__(self,vis,seed,diff): |
|
|
153 |
self.pc, self.cc = Pipe() |
|
|
154 |
self.p = Process( |
|
|
155 |
target = standalone_headless_isolated, |
|
|
156 |
args=(self.cc,vis,seed,diff,) |
|
|
157 |
) |
|
|
158 |
self.p.daemon = True |
|
|
159 |
self.p.start() |
|
|
160 |
|
|
|
161 |
def reset(self): |
|
|
162 |
self.pc.send(('reset',)) |
|
|
163 |
return self.pc.recv() |
|
|
164 |
|
|
|
165 |
def step(self,actions): |
|
|
166 |
self.pc.send(('step',actions,)) |
|
|
167 |
try: |
|
|
168 |
return self.pc.recv() |
|
|
169 |
except : |
|
|
170 |
print('Error in recv()') |
|
|
171 |
raise |
|
|
172 |
|
|
|
173 |
def __del__(self): |
|
|
174 |
self.pc.send(('exit',)) |
|
|
175 |
#print('(ei)waiting for join...') |
|
|
176 |
self.p.join() |
|
|
177 |
try: |
|
|
178 |
del self.pc |
|
|
179 |
del self.cc |
|
|
180 |
del self.p |
|
|
181 |
except: |
|
|
182 |
raise |
|
|
183 |
|
|
|
184 |
|