Diff of /A3C/helper.py [000000] .. [687a25]

Switch to unified view

a b/A3C/helper.py
1
import numpy as np
2
import tensorflow as tf
3
import scipy.signal as ss
4
5
6
# Helper Function------------------------------------------------------------------------------------------------------------
7
# Copies one set of variables to another.
8
# Used to set worker network parameters to those of global network.
9
def update_target_graph(from_scope,to_scope):
10
    from_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, from_scope)
11
    to_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, to_scope)
12
13
    op_holder = []
14
    for from_var,to_var in zip(from_vars,to_vars):
15
        op_holder.append(to_var.assign(from_var))
16
    return op_holder
17
18
# Normalize state 
19
def process_frame(s):
20
    s = np.asarray(s)
21
    s = (s-np.mean(s)) / np.std(s)
22
    return s
23
24
# process state (the last 3 entires are obstacle info which should not be processed)
25
def process_state(s,s1):
26
    s = np.asarray(s)
27
    s1 = np.asarray(s1)
28
    s = np.hstack((s1[:-3]-s[:-3],s[-3:]))
29
    return s
30
    
31
def engineered_action(seed):
32
    a = np.ones(18)*0.05
33
    if seed < .5:
34
        a[17:]=0.9
35
        a[0]=0.9
36
        a[3]=0.9
37
        a[4]=0.9
38
        a[8]=0.9
39
        a[11]=0.9
40
        a[12]=0.9
41
        a[13]=0.9
42
        a[10]=0.9
43
    else:
44
        a[8]=0.9
45
        a[9]=0.9
46
        a[12]=0.9
47
        a[13]=0.9
48
        a[17]=0.9
49
        a[2]=0.9
50
        a[3]=0.9
51
        a[4]=0.9
52
        a[1]=0.9 
53
    return a
54
55
def normalized_columns_initializer(std=1.0):
56
    def _initializer(shape, dtype=None, partition_info=None):
57
        out = np.random.randn(*shape).astype(np.float32)
58
        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
59
        return tf.constant(out)
60
    return _initializer
61
62
def discount(x, gamma):
63
    return ss.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
64
65
#These functions allows us to update the parameters of our target network with those of the primary network.
66
def updateTargetGraph(tfVars,tau):
67
    total_vars = len(tfVars)
68
    op_holder = []
69
    for idx,var in enumerate(tfVars[0:total_vars/2]):
70
        op_holder.append(tfVars[idx+total_vars//2].assign((var.value()*tau) + ((1-tau)*tfVars[idx+total_vars//2].value())))
71
    return op_holder
72
73
def updateTarget(op_holder,sess):
74
    for op in op_holder:
75
        sess.run(op)
76
    total_vars = len(tf.trainable_variables())
77
    a = tf.trainable_variables()[0].eval(session=sess)
78
    b = tf.trainable_variables()[total_vars/2].eval(session=sess)
79
    if a.all() == b.all():
80
        print("Target Set Success")
81
    else:
82
        print("Target Set Failed")
83
        
84
85
    
86
87
88