[687a25]: / ddpg / actor_network.py

Download this file

115 lines (87 with data), 4.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
import tensorflow as tf
import numpy as np
import math
from helper import dlrelu
# Hyper Parameters
LAYER1_SIZE = 400
LAYER2_SIZE = 300
LEARNING_RATE = 5e-5
TAU = 1e-5
BATCH_SIZE = 64
class ActorNetwork:
"""docstring for ActorNetwork"""
def __init__(self,sess,state_dim,action_dim):
self.sess = sess
self.state_dim = state_dim
self.action_dim = action_dim
# create actor network
self.state_input,self.action_output,self.net = self.create_network(state_dim,action_dim)
# create target actor network
self.target_state_input,self.target_action_output,self.target_update,self.target_net = self.create_target_network(state_dim,action_dim,self.net)
# define training rules
self.create_training_method()
self.sess.run(tf.global_variables_initializer())
self.update_target()
#self.load_network()
def create_training_method(self):
self.q_gradient_input = tf.placeholder("float",[None,self.action_dim])
self.parameters_gradients = tf.gradients(self.action_output,self.net,self.q_gradient_input)
self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(zip(self.parameters_gradients,self.net))
def create_network(self,state_dim,action_dim):
layer1_size = LAYER1_SIZE
layer2_size = LAYER2_SIZE
state_input = tf.placeholder("float",[None,state_dim])
W1 = self.variable([state_dim,layer1_size],state_dim)
b1 = self.variable([layer1_size],state_dim)
W2 = self.variable([layer1_size,layer2_size],layer1_size)
b2 = self.variable([layer2_size],layer1_size)
W3 = tf.Variable(tf.random_uniform([layer2_size,action_dim],-3e-3,3e-3))
b3 = tf.Variable(tf.random_uniform([action_dim],1e-3,0.1))
layer1 = tf.tanh(tf.matmul(state_input,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1,W2) + b2)
action_output = tf.sigmoid(tf.matmul(layer2,W3) + b3)
return state_input,action_output,[W1,b1,W2,b2,W3,b3]
def create_target_network(self,state_dim,action_dim,net):
state_input = tf.placeholder("float",[None,state_dim])
ema = tf.train.ExponentialMovingAverage(decay=1-TAU)
target_update = ema.apply(net)
target_net = [ema.average(x) for x in net]
layer1 = tf.tanh(tf.matmul(state_input,target_net[0]) + target_net[1])
layer2 = tf.tanh(tf.matmul(layer1,target_net[2]) + target_net[3])
action_output = tf.sigmoid(tf.matmul(layer2,target_net[4]) + target_net[5])
return state_input,action_output,target_update,target_net
def update_target(self):
self.sess.run(self.target_update)
def train(self,q_gradient_batch,state_batch):
self.sess.run(self.optimizer,feed_dict={
self.q_gradient_input:q_gradient_batch,
self.state_input:state_batch
})
def actions(self,state_batch):
return self.sess.run(self.action_output,feed_dict={
self.state_input:state_batch
})
def action(self,state):
return self.sess.run(self.action_output,feed_dict={
self.state_input:[state]
})[0]
def target_actions(self,state_batch):
return self.sess.run(self.target_action_output,feed_dict={
self.target_state_input:state_batch
})
# f fan-in size
def variable(self,shape,f):
return tf.Variable(tf.random_uniform(shape,-1/math.sqrt(f),1/math.sqrt(f)))
'''
def load_network(self):
self.saver = tf.train.Saver()
checkpoint = tf.train.get_checkpoint_state("saved_actor_networks")
if checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(self.sess, checkpoint.model_checkpoint_path)
print "Successfully loaded:", checkpoint.model_checkpoint_path
else:
print "Could not find old network weights"
def save_network(self,time_step):
print 'save actor-network...',time_step
self.saver.save(self.sess, 'saved_actor_networks/' + 'actor-network', global_step = time_step)
'''