a b/baselines/models.py
1
from __future__ import print_function
2
import tensorflow as tf
3
from tensorflow.contrib import rnn
4
import tensorflow.contrib.layers as layers
5
6
7
import sklearn
8
import numpy as np
9
import os, time, shutil, collections
10
11
PADDING_ID = 1016
12
WORDS_NUM = 1017
13
MASK_ARRAY = [[1.]] * PADDING_ID + [[0.]] + [[1.]] * (WORDS_NUM - PADDING_ID - 1)
14
15
class BaseModel(object):
16
    """
17
    Base Model for basic networks with sequential data, i.e., RNN, CNN.
18
    """
19
    def __init__(self):
20
        self.regularizers = []
21
22
    def loss(self, logits):
23
        # Define loss and optimizer
24
        with tf.name_scope('cross_entropy'):
25
            labels = tf.to_int64(self.ph_labels)
26
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
27
            cross_entropy = tf.reduce_mean(cross_entropy)
28
        with tf.name_scope('regularization'):
29
            regularization = self.regularization
30
            regularization *= tf.add_n(self.regularizers)
31
        loss = cross_entropy + regularization
32
33
        # Summaries for TensorBoard.
34
        tf.summary.scalar('loss/cross_entropy', cross_entropy)
35
        tf.summary.scalar('loss/regularization', regularization)
36
        tf.summary.scalar('loss/total', loss)
37
        with tf.name_scope('averages'):
38
            averages = tf.train.ExponentialMovingAverage(0.9)
39
            op_averages = averages.apply([cross_entropy, regularization, loss])
40
            tf.summary.scalar('loss/avg/cross_entropy', averages.average(cross_entropy))
41
            tf.summary.scalar('loss/avg/regularization', averages.average(regularization))
42
            tf.summary.scalar('loss/avg/total', averages.average(loss))
43
            with tf.control_dependencies([op_averages]):
44
                loss_average = tf.identity(averages.average(loss), name='control')
45
        return loss, loss_average
46
47
    def predict(self, data, labels=None, sess=None):
48
        loss = 0
49
        size = data.shape[0]
50
        predictions = np.empty(size)
51
        sess = self._get_session(sess)
52
        for begin in range(0, size, self.batch_size):
53
            end = begin + self.batch_size
54
            end = min([end, size])
55
            batch_data = np.zeros((self.batch_size, data.shape[1], data.shape[2]))
56
            tmp_data = data[begin:end, :, :]
57
58
            if type(tmp_data) is not np.ndarray:
59
                tmp_data = tmp_data.toarray()  # convert sparse matrices
60
            batch_data[:end-begin] = tmp_data
61
            feed_dict = {self.ph_data: batch_data, self.ph_dropout: 1, self.ph_training: False}
62
63
            # Compute loss if labels are given.
64
            if labels is not None:
65
                batch_labels = np.zeros(self.batch_size)
66
                batch_labels[:end-begin] = labels[begin:end]
67
                feed_dict[self.ph_labels] = batch_labels
68
                batch_pred, batch_loss = sess.run([self.op_prediction, self.op_loss], feed_dict)
69
                loss += batch_loss
70
            else:
71
                batch_pred = sess.run(self.op_prediction, feed_dict)
72
73
            predictions[begin:end] = batch_pred[:end-begin]
74
75
        if labels is not None:
76
            return predictions, loss * self.batch_size / size
77
        else:
78
            return predictions
79
80
    def training(self, loss, learning_rate, decay_steps, decay_rate=0.95, momentum=0.9):
81
        """Adds to the loss model the Ops required to generate and apply gradients."""
82
        with tf.name_scope('training'):
83
            # Learning rate.
84
            global_step = tf.Variable(0, name='global_step', trainable=False)
85
            if decay_rate != 1:
86
                learning_rate = tf.train.exponential_decay(
87
                        learning_rate, global_step, decay_steps, decay_rate, staircase=True)
88
            tf.summary.scalar('learning_rate', learning_rate)
89
            # Optimizer.
90
            if momentum == 0:
91
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
92
            else:
93
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
94
            grads = optimizer.compute_gradients(loss)
95
            op_gradients = optimizer.apply_gradients(grads, global_step=global_step)
96
            # Histograms.
97
            for grad, var in grads:
98
                if grad is None:
99
                    print('warning: {} has no gradient'.format(var.op.name))
100
                else:
101
                    tf.summary.histogram(var.op.name + '/gradients', grad)
102
            # The op return the learning rate.
103
            with tf.control_dependencies([op_gradients]):
104
                op_train = tf.identity(learning_rate, name='control')
105
        return op_train
106
107
    def fit(self, X_tr, y_tr, X_vl, y_vl):
108
        t_process, t_wall = time.process_time(), time.time()
109
        sess = tf.Session(graph=self.graph)
110
        shutil.rmtree(self._get_path('summaries'), ignore_errors=True)
111
        writer = tf.summary.FileWriter(self._get_path('summaries'), self.graph)
112
        shutil.rmtree(self._get_path('checkpoints'), ignore_errors=True)
113
        os.makedirs(self._get_path('checkpoints'))
114
        path = os.path.join(self._get_path('checkpoints'), 'model')
115
        sess.run(self.op_init)
116
117
        # Training.
118
        count = 0
119
        bad_counter = 0
120
        accuracies = []
121
        aucs = []
122
        losses = []
123
        indices = collections.deque()
124
        num_steps = int(self.num_epochs * X_tr.shape[0] / self.batch_size)
125
        estop = False  # early stop
126
        if type(X_vl) is not np.ndarray:
127
            X_vl = X_vl.toarray()
128
129
        for step in range(1, num_steps+1):
130
            # Be sure to have used all the samples before using one a second time.
131
            if len(indices) < self.batch_size:
132
                indices.extend(np.random.permutation(X_tr.shape[0]))
133
            idx = [indices.popleft() for i in range(self.batch_size)]
134
            count += len(idx)
135
            batch_data, batch_labels = X_tr[idx, :, :], y_tr[idx]
136
137
            if type(batch_data) is not np.ndarray:
138
                batch_data = batch_data.toarray()  # convert sparse matrices
139
            feed_dict = {self.ph_data: batch_data, self.ph_labels: batch_labels, self.ph_dropout: self.dropout, self.ph_training: True}
140
            learning_rate, loss_average = sess.run([self.op_train, self.op_loss_average], feed_dict)
141
142
            # Periodical evaluation of the model.
143
            if step % self.eval_frequency == 0 or step == num_steps:
144
                print ('Seen samples: %d' % count)
145
                epoch = step * self.batch_size / X_tr.shape[0]
146
                print('step {} / {} (epoch {:.2f} / {}):'.format(step, num_steps, epoch, self.num_epochs))
147
                print('  learning_rate = {:.2e}, loss_average = {:.2e}'.format(learning_rate, loss_average))
148
                string, auc, accuracy, loss, predictions = self.evaluate(X_vl, y_vl, sess)
149
                aucs.append(auc)
150
                accuracies.append(accuracy)
151
                losses.append(loss)
152
                print('  validation {}'.format(string))
153
                # print(predictions.tolist()[:50])
154
                print('  time: {:.0f}s (wall {:.0f}s)'.format(time.process_time()-t_process, time.time()-t_wall))
155
156
                # Summaries for TensorBoard.
157
                summary = tf.Summary()
158
                summary.ParseFromString(sess.run(self.op_summary, feed_dict))
159
                summary.value.add(tag='validataion/auc', simple_value=auc)
160
                summary.value.add(tag='validation/loss', simple_value=loss)
161
                writer.add_summary(summary, step)
162
163
                # Save model parameters (for evaluation).
164
                self.op_saver.save(sess, path, global_step=step)
165
166
                if len(aucs) > (self.patience+5) and auc > np.array(aucs).max():
167
                    bad_counter = 0
168
169
                if len(aucs) > (self.patience+5) and auc <= np.array(aucs)[:-self.patience].max():
170
                    bad_counter += 1
171
                    if bad_counter > self.patience:
172
                        print('Early Stop!')
173
                        estop = True
174
                        break
175
            if estop:
176
                break
177
        print('validation accuracy: peak = {:.2f}, mean = {:.2f}'.format(max(accuracies), np.mean(accuracies[-10:])))
178
        print('validation auc: peak = {:.2f}, mean = {:.2f}'.format(max(aucs), np.mean(aucs[-10:])))
179
        writer.close()
180
        sess.close()
181
        t_step = (time.time() - t_wall) / num_steps
182
        print ("Optimization Finished!")
183
        return  aucs, accuracies, losses
184
185
    def evaluate(self, data, labels, sess=None):
186
        """
187
        Runs one evaluation against the full epoch of data.
188
        Return the precision and the number of correct predictions.
189
        Batch evaluation saves memory and enables this to run on smaller GPUs.
190
        sess: the session in which the model has been trained.
191
        op: the Tensor that returns the number of correct predictions.
192
        """
193
        t_process, t_wall = time.process_time(), time.time()
194
        predictions, loss = self.predict(data, labels, sess)
195
196
        fpr, tpr, _ = sklearn.metrics.roc_curve(labels, predictions)
197
        auc = 100 * sklearn.metrics.auc(fpr, tpr)
198
        ncorrects = sum(predictions == labels)
199
        accuracy = 100 * sklearn.metrics.accuracy_score(labels, predictions)
200
        string = 'auc: {:.2f}, accuracy: {:.2f} ({:d} / {:d}), loss: {:.2e}'.format(auc, accuracy, ncorrects, len(labels), loss)
201
202
        if sess is None:
203
            string += '\ntime: {:.0f}s (wall {:.0f}s)'.format(time.process_time()-t_process, time.time()-t_wall)
204
        # return string, auc, loss, predictions
205
        return string, auc, accuracy, loss, predictions
206
207
208
    def inference(self, data, dropout, is_training):
209
        """
210
        It builds the model, i.e. the computational graph, as far as
211
        is required for running the network forward to make predictions,
212
        i.e. return logits given raw data.
213
        data: size N x M
214
            N: number of signals (samples)
215
            M: number of vertices (features)
216
        """
217
        # TODO: optimizations for sparse data
218
        logits = self._inference(data, dropout, is_training)
219
        return logits
220
221
    def _weight_variable(self, shape):
222
        initial = tf.truncated_normal_initializer(0, 0.1)
223
        var = tf.get_variable('weights', shape, tf.float32, initializer=initial)
224
        if self.isReg:
225
            self.regularizers.append(tf.nn.l2_loss(var))
226
        tf.summary.histogram(var.op.name, var)
227
        return var
228
229
    def _bias_variable(self, shape):
230
        initial = tf.constant_initializer(0.1)
231
        var = tf.get_variable('bias', shape, tf.float32, initializer=initial)
232
        if self.isReg:
233
            self.regularizers.append(tf.nn.l2_loss(var))
234
        tf.summary.histogram(var.op.name, var)
235
        return var
236
237
    def fc(self, x, Mout, relu=True):
238
        """Fully connected layer with Mout features."""
239
        N, Min = x.get_shape()
240
        W = self._weight_variable([int(Min), Mout])
241
        b = self._bias_variable([Mout])
242
        x = tf.matmul(x, W) + b
243
        return tf.nn.relu(x) if relu else x
244
245
    def normalize(self, inputs, epsilon = 1e-8, scope="ln", reuse=None):
246
        '''Applies layer normalization.
247
248
        Args:
249
          inputs: A tensor with 2 or more dimensions, where the first dimension has
250
            `batch_size`.
251
          epsilon: A floating number. A very small number for preventing ZeroDivision Error.
252
          scope: Optional scope for `variable_scope`.
253
          reuse: Boolean, whether to reuse the weights of a previous layer
254
            by the same name.
255
256
        Returns:
257
          A tensor with the same shape and data dtype as `inputs`.
258
        '''
259
        with tf.variable_scope(scope, reuse=reuse):
260
            inputs_shape = inputs.get_shape()
261
            params_shape = inputs_shape[-1:]
262
263
            mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
264
            beta= tf.Variable(tf.zeros(params_shape))
265
            gamma = tf.Variable(tf.ones(params_shape))
266
            normalized = (inputs - mean) / ( (variance + epsilon) ** (.5) )
267
            outputs = gamma * normalized + beta
268
        return outputs
269
270
    # Helper methods.
271
    def _get_path(self, folder):
272
        path = '../../models/'
273
        return os.path.join(path, folder, self.dir_name)
274
275
    def _get_session(self, sess=None):
276
        """Restore parameters if no session given."""
277
        if sess is None:
278
            sess = tf.Session(graph=self.graph)
279
            filename = tf.train.latest_checkpoint(self._get_path('checkpoints'))
280
            self.op_saver.restore(sess, filename)
281
        return sess
282
283
    def _get_prediction(self, logits):
284
        """Return the predicted classes."""
285
        with tf.name_scope('prediction'):
286
            prediction = tf.argmax(logits, axis=1)
287
            return prediction
288
289
    # Methods to construct the computational graph
290
    def build_model(self):
291
        """Build the computational graph with memory network of the model."""
292
        self.graph = tf.Graph()
293
        with self.graph.as_default():
294
            # Inputs.
295
            with tf.name_scope('inputs'):
296
                # tf Graph input
297
                self.ph_data = tf.placeholder(tf.int32, (self.batch_size, self.timesteps, self.code_size), 'data')
298
                self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')
299
                self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')
300
                self.ph_training = tf.placeholder(tf.bool, name='trainingFlag')
301
302
            # Construct model
303
            op_logits, self.op_represent = self.inference(self.ph_data, self.ph_dropout, self.ph_training)
304
            self.op_loss, self.op_loss_average = self.loss(op_logits)
305
            self.op_train = self.training(self.op_loss, self.learning_rate,
306
                    self.decay_steps, self.decay_rate, self.momentum)
307
            self.op_prediction = self._get_prediction(op_logits)
308
309
            # Initialize variables, i.e. weights and biases.
310
            self.op_init = tf.global_variables_initializer()
311
312
            # Summaries for TensorBoard and Save for model parameters.
313
            self.op_summary = tf.summary.merge_all()
314
            self.op_saver = tf.train.Saver(max_to_keep=5)
315
        self.graph.finalize()
316
317
318
class vrnn(BaseModel):
319
    """
320
    Build a vanilla recurrent neural network.
321
    """
322
    def __init__(self, n_words, n_classes, timesteps, code_size, dir_name, init_std=0.05):
323
        super().__init__()
324
        # training parameters
325
        self.learning_rate = 0.05
326
        self.batch_size = 64
327
        self.num_epochs = 200
328
        self.dropout = 0.8
329
        self.decay_rate = 0.9
330
        self.decay_steps = 10000 / self.batch_size
331
        self.momentum = 0.95
332
        self.patience = 10
333
        self.eval_frequency = self.num_epochs
334
        self.regularization = 0.01
335
        self.isReg = True
336
        self.dir_name =  dir_name
337
338
        # Network Parameters
339
        self.init_std = init_std
340
        self.n_hidden = 256 # hidden dimensions of embedding
341
        self.n_hidden_1 = 128
342
        self.n_hidden_2 = 128
343
        self.n_words = n_words
344
        self.n_classes = n_classes
345
        self.timesteps = timesteps
346
        self.code_size = code_size
347
        self.M = [self.n_hidden_1, self.n_classes]
348
        self.build_model()
349
350
    def build_emb(self, x):
351
        self.Wemb = tf.Variable(tf.random_normal([self.n_words, self.n_hidden], stddev=self.init_std))
352
        self.Wemb_mask = tf.get_variable("mask_padding", initializer=MASK_ARRAY, dtype="float32", trainable=False)
353
354
        _x = tf.nn.embedding_lookup(self.Wemb, x) # recs size is (batch_size, mem_size, n_words)
355
        _x_mask = tf.nn.embedding_lookup(self.Wemb_mask, x)
356
        emb_vecs = tf.multiply(_x, _x_mask) # broadcast
357
        emb_vecs = tf.reduce_sum(emb_vecs, 2)
358
        return emb_vecs
359
360
    def lstm(self, x):
361
        # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
362
        # x = tf.unstack(x, self.timesteps, 1)
363
        # lstm_cell = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0) # Define a lstm cell with tensorflow
364
        # h, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
365
        # print (h[-1].get_shape())
366
        lstm_cell = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0)
367
        output, state = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
368
        output_sum = tf.reduce_sum(output, axis=1)
369
        output = tf.transpose(output, [1, 0, 2])
370
        last = tf.gather(output, int(output.get_shape()[0]) - 1)
371
        return last
372
373
    def gru(self, x):
374
        # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
375
        x = tf.unstack(x, self.timesteps, 1)
376
        gru_cell = rnn.GRUCell(self.n_hidden) # Define a gru cell with tensorflow
377
        h, states = rnn.static_rnn(gru_cell, x, dtype=tf.float32)
378
        return h[-1]
379
380
    def build_attention(self, x, output_size, initializer=layers.xavier_initializer(),
381
                            activation_fn=tf.tanh, scope=None):
382
        '''similar to the method in Hierarchical Attention Networks for Document Classification'''
383
        assert len(x.get_shape()) == 3 and x.get_shape()[-1].value is not None
384
385
        attention_context_vector = tf.get_variable(name='attention_context_vector',
386
                                                   shape=[output_size],
387
                                                   initializer=initializer,
388
                                                   dtype=tf.float32)
389
        x_projection = layers.fully_connected(x, output_size,
390
                                              activation_fn=activation_fn,
391
                                              scope=scope)
392
393
        vector_attn = tf.reduce_sum(tf.multiply(x_projection, attention_context_vector), axis=2, keep_dims=True)
394
        attention_weights = tf.nn.softmax(vector_attn, dim=1)
395
        weighted_projection = tf.multiply(x_projection, attention_weights)
396
        outputs = tf.reduce_sum(weighted_projection, axis=1)
397
        return outputs
398
399
    # Create model
400
    def _inference(self, x, dropout, is_training=True):
401
        # embedding
402
        with tf.variable_scope("embedding"):
403
            x = self.build_emb(x)
404
            x = self.normalize(x)
405
406
        # recurrent neural networks
407
        with tf.variable_scope("rnn"):
408
            # hout = self.gru(x)
409
            hout = self.lstm(x)
410
411
        with tf.variable_scope("dropout"):
412
            h_ = layers.dropout(hout, keep_prob=dropout)
413
414
        # fully connected layers
415
        for i, dim in enumerate(self.M[:-1]):
416
            with tf.variable_scope('fc{}'.format(i+1)):
417
                h_ = self.fc(h_, dim)
418
                h_ = tf.nn.dropout(h_, dropout)
419
420
        # Logits linear layer, i.e. softmax without normalization.
421
        with tf.variable_scope('logits'):
422
            prob = self.fc(h_, self.M[-1], relu=False)
423
        return prob
424
425
426
class birnn(BaseModel):
427
    def __init__(self, n_words, n_classes, timesteps, code_size, dir_name, init_std=0.05):
428
        super().__init__()
429
        # training parameters
430
        self.learning_rate = 0.05
431
        self.batch_size = 64
432
        self.num_epochs = 200
433
        self.dropout = 0.8
434
        self.decay_rate = 0.9
435
        self.decay_steps = 10000 / self.batch_size
436
        self.momentum = 0.95
437
        self.patience = 10
438
        self.eval_frequency = self.num_epochs
439
        self.regularization = 0.01
440
        self.isReg = True
441
        self.dir_name =  dir_name
442
443
        # Network Parameters
444
        self.init_std = init_std
445
        self.n_hidden = 256 # hidden dimensions of embedding
446
        self.n_hidden_1 = 128
447
        self.n_hidden_2 = 128
448
        self.n_words = n_words
449
        self.n_classes = n_classes
450
        self.timesteps = timesteps
451
        self.code_size = code_size
452
        self.M = [self.n_hidden_1, self.n_classes]
453
        self.build_model()
454
455
    def build_emb(self, x):
456
        with tf.variable_scope("embed"):
457
            self.Wemb = tf.Variable(tf.random_normal([self.n_words, self.n_hidden], stddev=self.init_std))
458
            self.Wemb_mask = tf.get_variable("mask_padding", initializer=MASK_ARRAY, dtype="float32", trainable=False)
459
460
            _x = tf.nn.embedding_lookup(self.Wemb, x) # recs size is (batch_size, mem_size, n_words)
461
            _x_mask = tf.nn.embedding_lookup(self.Wemb_mask, x)
462
            emb_vecs = tf.multiply(_x, _x_mask) # broadcast
463
            emb_vecs = tf.reduce_sum(emb_vecs, 2)
464
        return emb_vecs
465
466
    def bilstm(self, x):
467
        x = tf.unstack(x, self.timesteps, 1)
468
469
        with tf.variable_scope('birnn') as scope:
470
            with tf.variable_scope('forward'):
471
                lstm_fw_cell = rnn.BasicLSTMCell(int(self.n_hidden/2), forget_bias=1.0)
472
            # Backward direction cell
473
            with tf.variable_scope('backward'):
474
                lstm_bw_cell = rnn.BasicLSTMCell(int(self.n_hidden/2), forget_bias=1.0)
475
        try:
476
            outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
477
                                                  dtype=tf.float32)
478
        except Exception: # Old TensorFlow version only returns outputs not states
479
            outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
480
                                                    dtype=tf.float32)
481
        return outputs[-1]
482
483
    # Create model
484
    def _inference(self, x, dropout, is_training=True):
485
        # embedding
486
        with tf.variable_scope("embedding"):
487
            x = self.build_emb(x)
488
            x = self.normalize(x)
489
490
        # recurrent neural networks
491
        with tf.variable_scope("birnn"):
492
            # hout = self.gru(x)
493
            hout = self.bilstm(x)
494
495
        with tf.variable_scope("dropout"):
496
            h_ = layers.dropout(hout, keep_prob=dropout)
497
498
        # fully connected layers
499
        for i, dim in enumerate(self.M[:-1]):
500
            with tf.variable_scope('fc{}'.format(i+1)):
501
                h_ = self.fc(h_, dim)
502
                h_ = tf.nn.dropout(h_, dropout)
503
504
        # Logits linear layer, i.e. softmax without normalization.
505
        with tf.variable_scope('logits'):
506
            prob = self.fc(h_, self.M[-1], relu=False)
507
        return prob
508
509
510
class cnn(BaseModel):
511
    def __init__(self, n_words, n_classes, timesteps, code_size, dir_name, init_std=0.05):
512
        super().__init__()
513
        # training parameters
514
        self.learning_rate = 0.01
515
        self.batch_size = 32
516
        self.num_epochs = 200
517
        self.dropout = 0.6
518
        self.decay_rate = 0.9
519
        self.decay_steps = 10000 / self.batch_size
520
        self.momentum = 0.95
521
        self.patience = 10
522
        self.eval_frequency = self.num_epochs
523
        self.regularization = 0.01
524
        self.isReg = True
525
        self.dir_name =  dir_name
526
527
        # Network Parameters
528
        self.init_std = init_std
529
        self.n_hidden = 256 # hidden dimensions of embedding
530
        self.n_hidden_1 = 128
531
        self.n_hidden_2 = 128
532
        self.n_words = n_words
533
        self.n_classes = n_classes
534
        self.n_filters = 128
535
        self.timesteps = timesteps
536
        self.code_size = code_size
537
        self.M = [self.n_hidden_1, self.n_classes]
538
        self.filter_sizes = [3, 4, 5]
539
        self.build_model()
540
541
    def build_emb(self, x):
542
        with tf.variable_scope("embed"):
543
            self.Wemb = tf.Variable(tf.random_normal([self.n_words, self.n_hidden], stddev=self.init_std))
544
            self.Wemb_mask = tf.get_variable("mask_padding", initializer=MASK_ARRAY, dtype="float32", trainable=False)
545
546
            _x = tf.nn.embedding_lookup(self.Wemb, x) # recs size is (batch_size, mem_size, n_words)
547
            _x_mask = tf.nn.embedding_lookup(self.Wemb_mask, x)
548
            emb_vecs = tf.multiply(_x, _x_mask) # broadcast
549
            emb_vecs = tf.reduce_sum(emb_vecs, 2)
550
            self.emb_expanded = tf.expand_dims(emb_vecs, -1)
551
        return emb_vecs
552
553
    def build_conv(self, x, is_training):
554
        '''Create a convolution + maxpool layer for each filter size'''
555
        pooled_outputs = []
556
        for i, filter_size in enumerate(self.filter_sizes):
557
            with tf.name_scope("conv-maxpool-%s" % filter_size):
558
                # Convolution Layer
559
                filter_shape = [filter_size, self.n_hidden, 1, self.n_filters]
560
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
561
                b = tf.Variable(tf.constant(0.1, shape=[self.n_filters]), name="b")
562
                conv = tf.nn.conv2d(
563
                    self.emb_expanded,
564
                    W,
565
                    strides=[1, 1, 1, 1],
566
                    padding="VALID",
567
                    name="conv")
568
                # Apply nonlinearity
569
                h = tf.nn.leaky_relu(tf.nn.bias_add(conv, b), name="relu")
570
                h = layers.batch_norm(h, updates_collections=None,
571
                                         decay=0.99,
572
                                         scale=True, center=True,
573
                                         is_training=is_training)
574
                # Maxpooling over the outputs
575
                pooled = tf.nn.max_pool(
576
                h,
577
                ksize=[1, self.timesteps - filter_size + 1, 1, 1],
578
                strides=[1, 1, 1, 1],
579
                padding='VALID',
580
                name="pool")
581
                pooled_outputs.append(pooled)
582
583
        # Combine all the pooled features
584
        num_filters_total = self.n_filters * len(self.filter_sizes)
585
        h_pool = tf.concat(pooled_outputs, 3)
586
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
587
        return h_pool_flat
588
589
    # Create model
590
    def _inference(self, x, dropout, is_training=True):
591
        with tf.variable_scope("embedding"):
592
            xemb = self.build_emb(x)
593
594
        # convolutional network
595
        with tf.variable_scope("conv"):
596
            hout = self.build_conv(xemb, is_training)
597
598
        with tf.variable_scope("dropout"):
599
            h_ = layers.dropout(hout, keep_prob=dropout)
600
601
        for i, dim in enumerate(self.M[:-1]):
602
            with tf.variable_scope('fc{}'.format(i+1)):
603
                h_ = self.fc(h_, dim)
604
                h_ = tf.nn.dropout(h_, dropout)
605
606
        # Logits linear layer, i.e. softmax without normalization.
607
        with tf.variable_scope('logits'):
608
            prob = self.fc(h_, self.M[-1], relu=False)
609
        return prob