NIPS-2017-Learning-to-Run / Git / [687a25] /ddpg/helper.py

Models:
JoelW/
NIPS-2017-Learning-to-Run
Downloads: 1
[687a25]: / ddpg / helper.py
History
Download this file
185 lines (152 with data), 4.9 kB

import numpy as np
import tensorflow as tf
# Helper Function------------------------------------------------------------------------------------------------------------
# Copies one set of variables to another.
# Used to set worker network parameters to those of global network.

def dlrelu(x, alpha=0.1):
  return tf.nn.relu(x) - alpha * tf.nn.relu(0.05-x) - (1 - alpha) *  tf.nn.relu(x-0.95) 

class RunningStats:

    def __init__(self):
        self.n = 0
        self.old_m = 0
        self.new_m = 0
        self.old_s = 0
        self.new_s = 0

    def clear(self):
        self.n = 0

    def push(self, x):
        self.n += 1

        if self.n == 1:
            self.old_m = self.new_m = x
            self.old_s = 0
        else:
            self.new_m = self.old_m + (x - self.old_m) / self.n
            self.new_s = self.old_s + (x - self.old_m) * (x - self.new_m)

            self.old_m = self.new_m
            self.old_s = self.new_s

    def mean(self):
        return self.new_m if self.n else 0.0

    def variance(self):
        return self.new_s / (self.n - 1) if self.n > 1 else 0.0

    def standard_deviation(self):
        return np.sqrt(self.variance())
        
    def normalize(self,x):
        self.push(x)
        return (x - self.mean()) / (self.standard_deviation()+1e-3) if self.n > 1 else x



# process state (the last 3 entires are obstacle info which should not be processed)
def process_state(s,s1,center=True,diff=0):
    s = np.asarray(s)
    s1 = np.asarray(s1)
    s_14 = (s1[22:36]-s[22:36]) / 0.01
    s_3 = (s1[38:]-s[38:]) / 0.01
    s = np.hstack((s1[:36],s_14,s1[36:],s_3))
    if diff == 0:
        s[-6:] = 0.0 # if diff = 0, then manully turn off all obstacles

    if center:
      # transform into all relative quantities
      x_pos = [1,22,24,26,28,30,32,34]
      y_pos = [i+1 for i in x_pos]
      for i in x_pos:
          s[i] -= s[18]
      for j in y_pos:
          s[j] -= s[19]
      
      x_vs = [i+14 for i in x_pos]
      x_vs[0] = 4
      y_vs = [i+1 for i in x_vs]
      for i in x_vs:
          s[i] -= s[20]
      for j in y_vs:
          s[j] -= s[21]
      # transform cm as origin
      s[18:22] = 0.0
        
    return s
        
def n_step_transition(episode_buffer,n_step,gamma):
    _,_,_,s1,done = episode_buffer[-1]
    s,action,_,_,_ = episode_buffer[-1-n_step]
    r = 0
    for i in range(n_step):
      r += episode_buffer[-1-n_step+i][2]*gamma**i
    return [s,action,r,s1,done]

def engineered_action(seed):
    test = np.ones(18)*0.05
    if seed < 0.5:
        test[0] = 0.3
        test[3] = 0.8
        test[4] = 0.5
        test[6] = 0.3
        test[8] = 0.8
        test[9] = 0.3
        test[11] = 0.5
        test[14] = 0.3
        test[17] = 0.5
    else:
        test[9] = 0.3
        test[12] = 0.8
        test[13] = 0.5
        test[15] = 0.3
        test[17] = 0.8
        test[0] = 0.3
        test[2] = 0.5
        test[3] = 0.3
        test[8] = 0.5
            
    return test

# [Hacked] the memory might always be leaking, here's a solution #58
# https://github.com/stanfordnmbl/osim-rl/issues/58 
# separate process that holds a separate RunEnv instance.
# This has to be done since RunEnv() in the same process result in interleaved running of simulations.

import opensim as osim
from osim.http.client import Client
from osim.env import *

import multiprocessing
from multiprocessing import Process, Pipe

def standalone_headless_isolated(conn,vis,seed,diff):
    e = RunEnv(visualize=vis)
    while True:
        try:
            msg = conn.recv()

            # messages should be tuples,
            # msg[0] should be string

            if msg[0] == 'reset':
                o = e.reset(difficulty=diff,seed=seed)
                conn.send(o)
            elif msg[0] == 'step':
                ordi = e.step(msg[1])
                conn.send(ordi)
            else:
                conn.close()
                del e
                return
        except:
            conn.close()
            del e
            raise

# class that manages the interprocess communication and expose itself as a RunEnv.
class ei: # Environment Instance
    def __init__(self,vis,seed,diff):
        self.pc, self.cc = Pipe()
        self.p = Process(
            target = standalone_headless_isolated,
            args=(self.cc,vis,seed,diff,)
        )
        self.p.daemon = True
        self.p.start()

    def reset(self):
        self.pc.send(('reset',))
        return self.pc.recv()

    def step(self,actions):
        self.pc.send(('step',actions,))
        try:
            return self.pc.recv()
        except :  
            print('Error in recv()')
            raise

    def __del__(self):
        self.pc.send(('exit',))
        #print('(ei)waiting for join...')
        self.p.join()
	try:
	    del self.pc
	    del self.cc
	    del self.p
	except:
	    raise