[349d16]: / code / dnc_code / DNC / controller.py

Download this file

79 lines (61 with data), 4.0 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Read and Write Head controller based on LSTM.
# Note : Derived from GitHub user loudinthecloud's NTM implementation
import torch
from torch import nn
from torch.nn import Parameter
import numpy as np
class controller(nn.Module): # LSTM Controller
def __init__(self, num_inputs, num_outputs, num_layers):
super(controller, self).__init__()
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.num_layers = num_layers
self.lstm_network = nn.LSTM(input_size = self.num_inputs, hidden_size = self.num_outputs, num_layers = self.num_layers)
# Parameters of the LSTM. Hidden state serves as the output of our network
self.h_init = Parameter(torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) # Hidden state initialization
self.c_init = Parameter(torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) # C variable initialization
# Initialization of the LSTM parameters.
for p in self.lstm_network.parameters():
if p.dim() == 1:
nn.init.constant_(p, 0)
else:
stdev = 5 / (np.sqrt(self.num_inputs + self.num_outputs)) # I don't know why we multiplied 5
nn.init.uniform_(p, -stdev, stdev)
def create_hidden_state(self, batch_size): # Output : (num_layers x batch_size x num_outputs)
h = self.h_init.clone().repeat(1, batch_size, 1)
c = self.c_init.clone().repeat(1, batch_size, 1)
return h, c
def network_size(self):
return self.num_inputs, self.num_outputs
def forward(self, inp, prev_state):
inp = inp.unsqueeze(0) # inp dimension after unsqueeze : (1 x inp.shape)
output, state = self.lstm_network(inp, prev_state) # Input to LSTM must be of shape (seq_len x batch_size x input_size) in Pytorch. Here, seq_len = 1
return output.squeeze(0), state
class backward_controller(nn.Module): # Backward LSTM to make DNC Bi-Directional
def __init__(self, num_inputs, num_outputs, num_layers):
super(backward_controller, self).__init__()
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.num_layers = num_layers
self.lstm_network = nn.LSTM(input_size = self.num_inputs, hidden_size = self.num_outputs, num_layers = self.num_layers)
# Parameters of the LSTM. Hidden state serves as the output of our network
self.h_init = Parameter(torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) # Hidden state initialization
self.c_init = Parameter(torch.randn(self.num_layers, 1, self.num_outputs) * 0.05) # C variable initialization
# Initialization of the LSTM parameters.
for p in self.lstm_network.parameters():
if p.dim() == 1:
nn.init.constant_(p, 0)
else:
stdev = 5 / (np.sqrt(self.num_inputs + self.num_outputs)) # I don't know why we multiplied 5
nn.init.uniform_(p, -stdev, stdev)
def create_hidden_state(self, batch_size): # Output : (num_layers x batch_size x num_outputs)
h = self.h_init.clone().repeat(1, batch_size, 1)
c = self.c_init.clone().repeat(1, batch_size, 1)
return h, c
def network_size(self):
return self.num_inputs, self.num_outputs
def forward(self, inp, prev_states): # inp dimension: (seq_len x batch_size x input_size)
inp = inp[torch.arange(inp.shape[0]-1, -1, -1), :, :] # Reversing the input for backward direction
output, state = self.lstm_network(inp, prev_states) # Input to LSTM must be of shape (seq_len x batch_size x input_size) in Pytorch. Here, seq_len = 1
# output = output[torch.arange(output.shape[0]-1, -1, -1), :, :] # Reversing the 'output'.
return output, state # Output size is (seq_len x batch x hidden_size) as per documentation