a b/baselines/mlp.py
1
import tensorflow as tf
2
import sklearn
3
import scipy.sparse
4
import numpy as np
5
import os, time, shutil, collections
6
7
class MLP(object):
8
    """
9
    Build a 2-hidden layers fully connected neural network (a.k.a multilayer perceptron).
10
    """
11
    def __init__(self, num_input, num_classes):
12
        # Training Parameters
13
        self.learning_rate = 0.1
14
        self.batch_size = 64
15
        self.num_epochs = 200
16
        self.display_step = 10000
17
        self.dropout = 0.8
18
        self.decay_rate = 0.9
19
        self.decay_steps = 5000/ self.batch_size
20
        self.momentum = 0.95
21
        self.patience = 5
22
        self.eval_frequency = self.num_epochs
23
        self.regularization = 0.01
24
        self.regularizers = []
25
        self.isReg = True
26
        self.dir_name =  "mlp"
27
28
        # Network Parameters
29
        self.n_hidden_1 = 128 # 1st layer number of neurons
30
        self.n_hidden_2 = 128 # 2nd layer number of neurons
31
        self.num_input = num_input
32
        self.num_classes = num_classes
33
        self.M = [self.n_hidden_1, self.n_hidden_2, self.num_classes]
34
35
        self.build_model()
36
37
38
    # Methods to construct the computational graph with mlp.
39
    def build_model(self):
40
        """Build the computational graph with memory network of the model."""
41
        self.graph = tf.Graph()
42
        with self.graph.as_default():
43
            # Inputs.
44
            with tf.name_scope('inputs'):
45
                # tf Graph input
46
                self.ph_data = tf.placeholder(tf.float32, (self.batch_size, self.num_input), 'data')
47
                self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')
48
                self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')
49
50
            # Construct model
51
            op_logits = self.inference(self.ph_data, self.ph_dropout)
52
            self.op_loss, self.op_loss_average = self.loss(op_logits)
53
            self.op_train = self.training(self.op_loss, self.learning_rate,
54
                    self.decay_steps, self.decay_rate, self.momentum)
55
            self.op_prediction = self._get_prediction(op_logits)
56
57
            # Initialize variables, i.e. weights and biases.
58
            self.op_init = tf.global_variables_initializer()
59
60
            # Summaries for TensorBoard and Save for model parameters.
61
            self.op_summary = tf.summary.merge_all()
62
            self.op_saver = tf.train.Saver(max_to_keep=5)
63
        self.graph.finalize()
64
65
66
    def loss(self, logits):
67
        # Define loss and optimizer
68
        with tf.name_scope('cross_entropy'):
69
            labels = tf.to_int64(self.ph_labels)
70
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
71
            cross_entropy = tf.reduce_mean(cross_entropy)
72
        with tf.name_scope('regularization'):
73
            regularization = self.regularization
74
            regularization *= tf.add_n(self.regularizers)
75
        loss = cross_entropy + regularization
76
77
        # Summaries for TensorBoard.
78
        tf.summary.scalar('loss/cross_entropy', cross_entropy)
79
        tf.summary.scalar('loss/regularization', regularization)
80
        tf.summary.scalar('loss/total', loss)
81
        with tf.name_scope('averages'):
82
            averages = tf.train.ExponentialMovingAverage(0.9)
83
            op_averages = averages.apply([cross_entropy, regularization, loss])
84
            tf.summary.scalar('loss/avg/cross_entropy', averages.average(cross_entropy))
85
            tf.summary.scalar('loss/avg/regularization', averages.average(regularization))
86
            tf.summary.scalar('loss/avg/total', averages.average(loss))
87
            with tf.control_dependencies([op_averages]):
88
                loss_average = tf.identity(averages.average(loss), name='control')
89
        return loss, loss_average
90
91
    def predict(self, data, labels=None, sess=None):
92
        loss = 0
93
        size = data.shape[0]
94
        predictions = np.empty(size)
95
        sess = self._get_session(sess)
96
        for begin in range(0, size, self.batch_size):
97
            end = begin + self.batch_size
98
            end = min([end, size])
99
            batch_data = np.zeros((self.batch_size, data.shape[1]))
100
            tmp_data = data[begin:end, :]
101
102
            if type(tmp_data) is not np.ndarray:
103
                tmp_data = tmp_data.toarray()  # convert sparse matrices
104
            batch_data[:end-begin] = tmp_data
105
            feed_dict = {self.ph_data: batch_data, self.ph_dropout: 1}
106
107
            # Compute loss if labels are given.
108
            if labels is not None:
109
                batch_labels = np.zeros(self.batch_size)
110
                batch_labels[:end-begin] = labels[begin:end]
111
                feed_dict[self.ph_labels] = batch_labels
112
                batch_pred, batch_loss = sess.run([self.op_prediction, self.op_loss], feed_dict)
113
                loss += batch_loss
114
            else:
115
                batch_pred = sess.run(self.op_prediction, feed_dict)
116
117
            predictions[begin:end] = batch_pred[:end-begin]
118
119
        if labels is not None:
120
            return predictions, loss * self.batch_size / size
121
        else:
122
            return predictions
123
124
125
    def training(self, loss, learning_rate, decay_steps, decay_rate=0.95, momentum=0.9):
126
        """Adds to the loss model the Ops required to generate and apply gradients."""
127
        with tf.name_scope('training'):
128
            # Learning rate.
129
            global_step = tf.Variable(0, name='global_step', trainable=False)
130
            if decay_rate != 1:
131
                learning_rate = tf.train.exponential_decay(
132
                        learning_rate, global_step, decay_steps, decay_rate, staircase=True)
133
            tf.summary.scalar('learning_rate', learning_rate)
134
            # Optimizer.
135
            if momentum == 0:
136
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
137
            else:
138
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
139
            grads = optimizer.compute_gradients(loss)
140
            op_gradients = optimizer.apply_gradients(grads, global_step=global_step)
141
            # Histograms.
142
            for grad, var in grads:
143
                if grad is None:
144
                    print('warning: {} has no gradient'.format(var.op.name))
145
                else:
146
                    tf.summary.histogram(var.op.name + '/gradients', grad)
147
            # The op return the learning rate.
148
            with tf.control_dependencies([op_gradients]):
149
                op_train = tf.identity(learning_rate, name='control')
150
        return op_train
151
152
     # Helper methods.
153
    def _get_path(self, folder):
154
        path = '../../models/'
155
        return os.path.join(path, folder, self.dir_name)
156
157
    def _get_session(self, sess=None):
158
        """Restore parameters if no session given."""
159
        if sess is None:
160
            sess = tf.Session(graph=self.graph)
161
            filename = tf.train.latest_checkpoint(self._get_path('checkpoints'))
162
            self.op_saver.restore(sess, filename)
163
        return sess
164
165
    def _get_prediction(self, logits):
166
        """Return the predicted classes."""
167
        with tf.name_scope('prediction'):
168
            prediction = tf.argmax(logits, axis=1)
169
            return prediction
170
171
    def weight_variable(self, shape):
172
        initial = tf.truncated_normal_initializer(0, 0.1)
173
        var = tf.get_variable('weights', shape, tf.float32, initializer=initial)
174
        if self.isReg:
175
            self.regularizers.append(tf.nn.l2_loss(var))
176
        tf.summary.histogram(var.op.name, var)
177
        return var
178
179
    def bias_variable(self, shape):
180
        initial = tf.constant_initializer(0.1)
181
        var = tf.get_variable('bias', shape, tf.float32, initializer=initial)
182
        if self.isReg:
183
            self.regularizers.append(tf.nn.l2_loss(var))
184
        tf.summary.histogram(var.op.name, var)
185
        return var
186
187
    def fc(self, x, Mout, relu=True):
188
        """Fully connected layer with Mout features."""
189
        N, Min = x.get_shape()
190
        W = self.weight_variable([int(Min), Mout])
191
        b = self.bias_variable([Mout])
192
        x = tf.matmul(x, W) + b
193
        return tf.nn.relu(x) if relu else x
194
195
    # Create model
196
    def inference(self, x, dropout):
197
        for i, dim in enumerate(self.M[:-1]):
198
            with tf.variable_scope('fc{}'.format(i+1)):
199
                x = self.fc(x, dim)
200
                x = tf.nn.dropout(x, dropout)
201
202
        # Logits linear layer, i.e. softmax without normalization.
203
        with tf.variable_scope('logits'):
204
            prob = self.fc(x, self.M[-1], relu=False)
205
        return prob
206
207
208
    def evaluate(self, data, labels, sess=None):
209
        """
210
        Runs one evaluation against the full epoch of data.
211
        Return the precision and the number of correct predictions.
212
        Batch evaluation saves memory and enables this to run on smaller GPUs.
213
        sess: the session in which the model has been trained.
214
        op: the Tensor that returns the number of correct predictions.
215
        """
216
        t_process, t_wall = time.process_time(), time.time()
217
        predictions, loss = self.predict(data, labels, sess)
218
219
        fpr, tpr, _ = sklearn.metrics.roc_curve(labels, predictions)
220
        auc = 100 * sklearn.metrics.auc(fpr, tpr)
221
        ncorrects = sum(predictions == labels)
222
        accuracy = 100 * sklearn.metrics.accuracy_score(labels, predictions)
223
        string = 'auc: {:.2f}, accuracy: {:.2f} ({:d} / {:d}), loss: {:.2e}'.format(auc, accuracy, ncorrects, len(labels), loss)
224
225
        if sess is None:
226
            string += '\ntime: {:.0f}s (wall {:.0f}s)'.format(time.process_time()-t_process, time.time()-t_wall)
227
        # return string, auc, loss, predictions
228
        return string, auc, accuracy, loss, predictions
229
230
231
    def fit(self, X_tr, y_tr, X_vl, y_vl):
232
        t_process, t_wall = time.process_time(), time.time()
233
        sess = tf.Session(graph=self.graph)
234
        shutil.rmtree(self._get_path('summaries'), ignore_errors=True)
235
        writer = tf.summary.FileWriter(self._get_path('summaries'), self.graph)
236
        shutil.rmtree(self._get_path('checkpoints'), ignore_errors=True)
237
        os.makedirs(self._get_path('checkpoints'))
238
        path = os.path.join(self._get_path('checkpoints'), 'model')
239
        sess.run(self.op_init)
240
241
        # Training.
242
        count = 0
243
        bad_counter = 0
244
        accuracies = []
245
        aucs = []
246
        losses = []
247
        indices = collections.deque()
248
        num_steps = int(self.num_epochs * X_tr.shape[0] / self.batch_size)
249
        estop = False  # early stop
250
        if type(X_vl) is not np.ndarray:
251
            X_vl = X_vl.toarray()
252
253
        for step in range(1, num_steps+1):
254
255
            # Be sure to have used all the samples before using one a second time.
256
            if len(indices) < self.batch_size:
257
                indices.extend(np.random.permutation(X_tr.shape[0]))
258
            idx = [indices.popleft() for i in range(self.batch_size)]
259
            count += len(idx)
260
            batch_data, batch_labels = X_tr[idx, :], y_tr[idx]
261
262
            if type(batch_data) is not np.ndarray:
263
                batch_data = batch_data.toarray()  # convert sparse matrices
264
            feed_dict = {self.ph_data: batch_data, self.ph_labels: batch_labels, self.ph_dropout: self.dropout}
265
            learning_rate, loss_average = sess.run([self.op_train, self.op_loss_average], feed_dict)
266
267
            # Periodical evaluation of the model.
268
            if step % self.eval_frequency == 0 or step == num_steps:
269
                print ('Seen samples: %d' % count)
270
                epoch = step * self.batch_size / X_tr.shape[0]
271
                print('step {} / {} (epoch {:.2f} / {}):'.format(step, num_steps, epoch, self.num_epochs))
272
                print('  learning_rate = {:.2e}, loss_average = {:.2e}'.format(learning_rate, loss_average))
273
                string, auc, accuracy, loss, predictions = self.evaluate(X_vl, y_vl, sess)
274
                aucs.append(auc)
275
                accuracies.append(accuracy)
276
                losses.append(loss)
277
                print('  validation {}'.format(string))
278
                print('  time: {:.0f}s (wall {:.0f}s)'.format(time.process_time()-t_process, time.time()-t_wall))
279
280
                # Summaries for TensorBoard.
281
                summary = tf.Summary()
282
                summary.ParseFromString(sess.run(self.op_summary, feed_dict))
283
                summary.value.add(tag='validataion/auc', simple_value=auc)
284
                summary.value.add(tag='validation/loss', simple_value=loss)
285
                writer.add_summary(summary, step)
286
287
                # Save model parameters (for evaluation).
288
                self.op_saver.save(sess, path, global_step=step)
289
290
                if len(aucs) > (self.patience+5) and auc > np.array(aucs).max():
291
                    bad_counter = 0
292
293
                if len(aucs) > (self.patience+5) and auc <= np.array(aucs)[:-self.patience].max():
294
                    bad_counter += 1
295
                    if bad_counter > self.patience:
296
                        print('Early Stop!')
297
                        estop = True
298
                        break
299
            if estop:
300
                break
301
        print('validation accuracy: peak = {:.2f}, mean = {:.2f}'.format(max(accuracies), np.mean(accuracies[-10:])))
302
        print('validation auc: peak = {:.2f}, mean = {:.2f}'.format(max(aucs), np.mean(aucs[-10:])))
303
        writer.close()
304
        sess.close()
305
        t_step = (time.time() - t_wall) / num_steps
306
307
        return  aucs, accuracies, losses