[6d0c6b]: / model.lua

Download this file

172 lines (136 with data), 4.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
require 'torch'
require 'nn'
require './util/LSTM'
require './util/ReverseSequence'
require('./util/OneHot.lua')
local Model, parent = torch.class('nn.Model', 'nn.Module')
local function create_lstm(self, reverse)
local lstm = nn.Sequential()
if reverse then lstm:add(nn.ReverseSequence(2,self.gpu)) end
for i = 1, self.rnn_layers do
local prev_dim = self.rnn_size
if i == 1 then prev_dim = self.wordvec_dim end
local rnn = nn.LSTM(prev_dim, self.rnn_size)
rnn.remember_states = false
table.insert(self.rnns, rnn)
lstm:add(rnn)
if self.dropout > 0 then
lstm:add(nn.Dropout(self.dropout))
end
end
if reverse then lstm:add(nn.ReverseSequence(2,self.gpu)) end
return lstm
end
function Model:__init(opt)
self.gpu = opt.gpu
self.rnn_size = opt.rnn_size
self.rnn_layers = opt.rnn_layers
self.dropout = opt.dropout
self.batchnorm = opt.batchnorm
self.unidirectional = opt.unidirectional
self.wordvec_dim = #(opt.alphabet) -- ACGT = 4
self.cnn = opt.cnn
self.rnn = opt.rnn
self.cnn_filters = tablex.map(tonumber, opt.cnn_filters:split('-'))
self.cnn_size = opt.cnn_size
self.cnn_pool = opt.cnn_pool
self.batch_size = opt.batch_size
self.num_classes = opt.num_classes
self.rnns = {}
self.model = nn.Sequential()
-- Create input embedding (we always use onehot-encoded matrix in this project)
self.model:add(OneHot(self.wordvec_dim))
------------------------------------------
-------- RNN and CNN-RNN Models ----------
------------------------------------------
if self.rnn then
local CNN = nn.Sequential()
local RNN = nn.Sequential()
-----------------------------
---------- CNN-RNN ----------
-----------------------------
if self.cnn then
-- only use 1 layer of convolution in CNN-RNN
local pad_size = math.floor(self.cnn_filters[1]/2)
CNN:add(nn.SpatialZeroPadding(0,0,pad_size, pad_size))
CNN:add(nn.TemporalConvolution(self.wordvec_dim, self.cnn_size, self.cnn_filters[1]))
CNN:add(nn.ReLU())
self.model:add(CNN)
self.wordvec_dim = self.cnn_size
end
-----------------------------
------------ RNN ------------
-----------------------------
local fwd = create_lstm(self, false)
fwd:add(nn.Mean(2)) -- take mean of output vectors over time dimension
local bwd = create_lstm(self, true)
bwd:add(nn.Mean(2)) -- take mean of output vectors over time dimension
local concat = nn.ConcatTable()
local output_size
if self.unidirectional then
concat:add(fwd) -- uese ConcatTable for consistency w/ b-lstm
output_size = self.rnn_size
else
concat:add(fwd)
concat:add(bwd)
output_size = self.rnn_size*2
end
RNN:add(concat)
RNN:add(nn.JoinTable(2))
self.model:add(RNN)
-- Create output classifier of (CNN-)RNN
self.model:add(nn.Linear((output_size), self.num_classes))
self.model:add(nn.LogSoftMax())
---------------------------------------
-------------- CNN Model --------------
---------------------------------------
else
-- Need to compute output sequnece size to be fed to linear classifier
local input_size = self.wordvec_dim
-- Create layers
for layer = 1,#self.cnn_filters-1 do
self.model:add(nn.TemporalConvolution(input_size, self.cnn_size, self.cnn_filters[layer]))
input_size = self.cnn_size
self.model:add(nn.ReLU())
self.model:add(nn.TemporalMaxPooling(self.cnn_pool,self.cnn_pool))
if self.dropout > 0 then self.model:add(nn.Dropout(self.dropout)) end
end
-- Last layer of convolution
self.model:add(nn.TemporalConvolution(input_size, self.cnn_size, self.cnn_filters[#self.cnn_filters]))
self.model:add(nn.ReLU())
if self.dropout > 0 then self.model:add(nn.Dropout(self.dropout)) end
-- Max pool across entire sequence to get unfiform output size,
-- and transpose (view) to feed into linear classifier
self.model:add(nn.Max(2))
self.model:add(nn.View(-1,self.cnn_size))
-- Output classifier of CNN --
self.model:add(nn.Linear(self.cnn_size,self.num_classes))
self.model:add(nn.LogSoftMax())
end
print('-------- Model Architechture ----------')
print(self.model)
end
-- Model Functions
function Model:updateOutput(input)
return self.model:forward(input)
end
function Model:backward(input, gradOutput, scale)
return self.model:backward(input, gradOutput, scale)
end
function Model:parameters()
return self.model:parameters()
end
function Model:training()
self.model:training()
parent.training(self)
end
function Model:evaluate()
self.model:evaluate()
parent.evaluate(self)
end
function Model:resetStates()
for i, rnn in ipairs(self.rnns) do rnn:resetStates() end
end
function Model:clearState()
self.model:clearState()
end