NIPS-2017-Learning-to-Run / Git / Diff of /ddpg/helper.py

Models:
JoelW/
NIPS-2017-Learning-to-Run
Downloads: 1
Diff of /ddpg/helper.py [000000] .. [687a25]
Switch to side-by-side view

--- a
+++ b/ddpg/helper.py
@@ -0,0 +1,184 @@
+import numpy as np
+import tensorflow as tf
+# Helper Function------------------------------------------------------------------------------------------------------------
+# Copies one set of variables to another.
+# Used to set worker network parameters to those of global network.
+
+def dlrelu(x, alpha=0.1):
+  return tf.nn.relu(x) - alpha * tf.nn.relu(0.05-x) - (1 - alpha) *  tf.nn.relu(x-0.95) 
+
+class RunningStats:
+
+    def __init__(self):
+        self.n = 0
+        self.old_m = 0
+        self.new_m = 0
+        self.old_s = 0
+        self.new_s = 0
+
+    def clear(self):
+        self.n = 0
+
+    def push(self, x):
+        self.n += 1
+
+        if self.n == 1:
+            self.old_m = self.new_m = x
+            self.old_s = 0
+        else:
+            self.new_m = self.old_m + (x - self.old_m) / self.n
+            self.new_s = self.old_s + (x - self.old_m) * (x - self.new_m)
+
+            self.old_m = self.new_m
+            self.old_s = self.new_s
+
+    def mean(self):
+        return self.new_m if self.n else 0.0
+
+    def variance(self):
+        return self.new_s / (self.n - 1) if self.n > 1 else 0.0
+
+    def standard_deviation(self):
+        return np.sqrt(self.variance())
+        
+    def normalize(self,x):
+        self.push(x)
+        return (x - self.mean()) / (self.standard_deviation()+1e-3) if self.n > 1 else x
+
+
+
+# process state (the last 3 entires are obstacle info which should not be processed)
+def process_state(s,s1,center=True,diff=0):
+    s = np.asarray(s)
+    s1 = np.asarray(s1)
+    s_14 = (s1[22:36]-s[22:36]) / 0.01
+    s_3 = (s1[38:]-s[38:]) / 0.01
+    s = np.hstack((s1[:36],s_14,s1[36:],s_3))
+    if diff == 0:
+        s[-6:] = 0.0 # if diff = 0, then manully turn off all obstacles
+
+    if center:
+      # transform into all relative quantities
+      x_pos = [1,22,24,26,28,30,32,34]
+      y_pos = [i+1 for i in x_pos]
+      for i in x_pos:
+          s[i] -= s[18]
+      for j in y_pos:
+          s[j] -= s[19]
+      
+      x_vs = [i+14 for i in x_pos]
+      x_vs[0] = 4
+      y_vs = [i+1 for i in x_vs]
+      for i in x_vs:
+          s[i] -= s[20]
+      for j in y_vs:
+          s[j] -= s[21]
+      # transform cm as origin
+      s[18:22] = 0.0
+        
+    return s
+        
+def n_step_transition(episode_buffer,n_step,gamma):
+    _,_,_,s1,done = episode_buffer[-1]
+    s,action,_,_,_ = episode_buffer[-1-n_step]
+    r = 0
+    for i in range(n_step):
+      r += episode_buffer[-1-n_step+i][2]*gamma**i
+    return [s,action,r,s1,done]
+
+def engineered_action(seed):
+    test = np.ones(18)*0.05
+    if seed < 0.5:
+        test[0] = 0.3
+        test[3] = 0.8
+        test[4] = 0.5
+        test[6] = 0.3
+        test[8] = 0.8
+        test[9] = 0.3
+        test[11] = 0.5
+        test[14] = 0.3
+        test[17] = 0.5
+    else:
+        test[9] = 0.3
+        test[12] = 0.8
+        test[13] = 0.5
+        test[15] = 0.3
+        test[17] = 0.8
+        test[0] = 0.3
+        test[2] = 0.5
+        test[3] = 0.3
+        test[8] = 0.5
+            
+    return test
+
+# [Hacked] the memory might always be leaking, here's a solution #58
+# https://github.com/stanfordnmbl/osim-rl/issues/58 
+# separate process that holds a separate RunEnv instance.
+# This has to be done since RunEnv() in the same process result in interleaved running of simulations.
+
+import opensim as osim
+from osim.http.client import Client
+from osim.env import *
+
+import multiprocessing
+from multiprocessing import Process, Pipe
+
+def standalone_headless_isolated(conn,vis,seed,diff):
+    e = RunEnv(visualize=vis)
+    while True:
+        try:
+            msg = conn.recv()
+
+            # messages should be tuples,
+            # msg[0] should be string
+
+            if msg[0] == 'reset':
+                o = e.reset(difficulty=diff,seed=seed)
+                conn.send(o)
+            elif msg[0] == 'step':
+                ordi = e.step(msg[1])
+                conn.send(ordi)
+            else:
+                conn.close()
+                del e
+                return
+        except:
+            conn.close()
+            del e
+            raise
+
+# class that manages the interprocess communication and expose itself as a RunEnv.
+class ei: # Environment Instance
+    def __init__(self,vis,seed,diff):
+        self.pc, self.cc = Pipe()
+        self.p = Process(
+            target = standalone_headless_isolated,
+            args=(self.cc,vis,seed,diff,)
+        )
+        self.p.daemon = True
+        self.p.start()
+
+    def reset(self):
+        self.pc.send(('reset',))
+        return self.pc.recv()
+
+    def step(self,actions):
+        self.pc.send(('step',actions,))
+        try:
+            return self.pc.recv()
+        except :  
+            print('Error in recv()')
+            raise
+
+    def __del__(self):
+        self.pc.send(('exit',))
+        #print('(ei)waiting for join...')
+        self.p.join()
+	try:
+	    del self.pc
+	    del self.cc
+	    del self.p
+	except:
+	    raise
+
+