Switch to unified view

a b/dsb2018_topcoders/selim/resnetv2.py
1
# -*- coding: utf-8 -*-
2
"""Inception-ResNet V2 model for Keras.
3
4
Model naming and structure follows TF-slim implementation (which has some additional
5
layers and different number of filters from the original arXiv paper):
6
https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py
7
8
Pre-trained ImageNet weights are also converted from TF-slim, which can be found in:
9
https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models
10
11
# Reference
12
- [Inception-v4, Inception-ResNet and the Impact of
13
   Residual Connections on Learning](https://arxiv.org/abs/1602.07261)
14
15
"""
16
from __future__ import absolute_import
17
from __future__ import division
18
from __future__ import print_function
19
20
import os
21
import warnings
22
23
from keras.applications import imagenet_utils
24
from keras.applications.imagenet_utils import _obtain_input_shape
25
from keras.models import Model
26
from keras.layers import Activation
27
from keras.layers import AveragePooling2D
28
from keras.layers import BatchNormalization
29
from keras.layers import Concatenate
30
from keras.layers import Conv2D
31
from keras.layers import Dense
32
from keras.layers import GlobalAveragePooling2D
33
from keras.layers import GlobalMaxPooling2D
34
from keras.layers import Input
35
from keras.layers import Lambda
36
from keras.layers import MaxPooling2D
37
from keras.utils.data_utils import get_file
38
from keras.engine.topology import get_source_inputs
39
from keras import backend as K
40
41
42
BASE_WEIGHT_URL = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.7/'
43
44
45
def preprocess_input(x):
46
    """Preprocesses a numpy array encoding a batch of images.
47
48
    # Arguments
49
        x: a 4D numpy array consists of RGB values within [0, 255].
50
51
    # Returns
52
        Preprocessed array.
53
    """
54
    return imagenet_utils.preprocess_input(x, mode='tf')
55
56
57
def conv2d_bn(x,
58
              filters,
59
              kernel_size,
60
              strides=1,
61
              padding='same',
62
              activation='relu',
63
              use_bias=False,
64
              name=None):
65
    """Utility function to apply conv + BN.
66
67
    # Arguments
68
        x: input tensor.
69
        filters: filters in `Conv2D`.
70
        kernel_size: kernel size as in `Conv2D`.
71
        strides: strides in `Conv2D`.
72
        padding: padding mode in `Conv2D`.
73
        activation: activation in `Conv2D`.
74
        use_bias: whether to use a bias in `Conv2D`.
75
        name: name of the ops; will become `name + '_ac'` for the activation
76
            and `name + '_bn'` for the batch norm layer.
77
78
    # Returns
79
        Output tensor after applying `Conv2D` and `BatchNormalization`.
80
    """
81
    x = Conv2D(filters,
82
               kernel_size,
83
               strides=strides,
84
               padding=padding,
85
               use_bias=use_bias,
86
               name=name)(x)
87
    if not use_bias:
88
        bn_axis = 1 if K.image_data_format() == 'channels_first' else 3
89
        bn_name = None if name is None else name + '_bn'
90
        x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
91
    if activation is not None:
92
        ac_name = None if name is None else name + '_ac'
93
        x = Activation(activation, name=ac_name)(x)
94
    return x
95
96
97
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'):
98
    """Adds a Inception-ResNet block.
99
100
    This function builds 3 types of Inception-ResNet blocks mentioned
101
    in the paper, controlled by the `block_type` argument (which is the
102
    block name used in the official TF-slim implementation):
103
        - Inception-ResNet-A: `block_type='block35'`
104
        - Inception-ResNet-B: `block_type='block17'`
105
        - Inception-ResNet-C: `block_type='block8'`
106
107
    # Arguments
108
        x: input tensor.
109
        scale: scaling factor to scale the residuals (i.e., the output of
110
            passing `x` through an inception module) before adding them
111
            to the shortcut branch. Let `r` be the output from the residual branch,
112
            the output of this block will be `x + scale * r`.
113
        block_type: `'block35'`, `'block17'` or `'block8'`, determines
114
            the network structure in the residual branch.
115
        block_idx: an `int` used for generating layer names. The Inception-ResNet blocks
116
            are repeated many times in this network. We use `block_idx` to identify
117
            each of the repetitions. For example, the first Inception-ResNet-A block
118
            will have `block_type='block35', block_idx=0`, ane the layer names will have
119
            a common prefix `'block35_0'`.
120
        activation: activation function to use at the end of the block
121
            (see [activations](keras./activations.md)).
122
            When `activation=None`, no activation is applied
123
            (i.e., "linear" activation: `a(x) = x`).
124
125
    # Returns
126
        Output tensor for the block.
127
128
    # Raises
129
        ValueError: if `block_type` is not one of `'block35'`,
130
            `'block17'` or `'block8'`.
131
    """
132
    if block_type == 'block35':
133
        branch_0 = conv2d_bn(x, 32, 1)
134
        branch_1 = conv2d_bn(x, 32, 1)
135
        branch_1 = conv2d_bn(branch_1, 32, 3)
136
        branch_2 = conv2d_bn(x, 32, 1)
137
        branch_2 = conv2d_bn(branch_2, 48, 3)
138
        branch_2 = conv2d_bn(branch_2, 64, 3)
139
        branches = [branch_0, branch_1, branch_2]
140
    elif block_type == 'block17':
141
        branch_0 = conv2d_bn(x, 192, 1)
142
        branch_1 = conv2d_bn(x, 128, 1)
143
        branch_1 = conv2d_bn(branch_1, 160, [1, 7])
144
        branch_1 = conv2d_bn(branch_1, 192, [7, 1])
145
        branches = [branch_0, branch_1]
146
    elif block_type == 'block8':
147
        branch_0 = conv2d_bn(x, 192, 1)
148
        branch_1 = conv2d_bn(x, 192, 1)
149
        branch_1 = conv2d_bn(branch_1, 224, [1, 3])
150
        branch_1 = conv2d_bn(branch_1, 256, [3, 1])
151
        branches = [branch_0, branch_1]
152
    else:
153
        raise ValueError('Unknown Inception-ResNet block type. '
154
                         'Expects "block35", "block17" or "block8", '
155
                         'but got: ' + str(block_type))
156
157
    block_name = block_type + '_' + str(block_idx)
158
    channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
159
    mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches)
160
    up = conv2d_bn(mixed,
161
                   K.int_shape(x)[channel_axis],
162
                   1,
163
                   activation=None,
164
                   use_bias=True,
165
                   name=block_name + '_conv')
166
167
    x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale,
168
               output_shape=K.int_shape(x)[1:],
169
               arguments={'scale': scale},
170
               name=block_name)([x, up])
171
    if activation is not None:
172
        x = Activation(activation, name=block_name + '_ac')(x)
173
    return x
174
175
176
def InceptionResNetV2Same(include_top=True,
177
                      weights='imagenet',
178
                      input_tensor=None,
179
                      input_shape=None,
180
                      pooling=None,
181
                      classes=1000):
182
    """Instantiates the Inception-ResNet v2 architecture.
183
184
    Optionally loads weights pre-trained on ImageNet.
185
    Note that when using TensorFlow, for best performance you should
186
    set `"image_data_format": "channels_last"` in your Keras config
187
    at `~/.keras/keras.json`.
188
189
    The model and the weights are compatible with TensorFlow, Theano and
190
    CNTK backends. The data format convention used by the model is
191
    the one specified in your Keras config file.
192
193
    Note that the default input image size for this model is 299x299, instead
194
    of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing
195
    function is different (i.e., do not use `imagenet_utils.preprocess_input()`
196
    with this model. Use `preprocess_input()` defined in this module instead).
197
198
    # Arguments
199
        include_top: whether to include the fully-connected
200
            layer at the top of the network.
201
        weights: one of `None` (random initialization),
202
              'imagenet' (pre-training on ImageNet),
203
              or the path to the weights file to be loaded.
204
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
205
            to use as image input for the model.
206
        input_shape: optional shape tuple, only to be specified
207
            if `include_top` is `False` (otherwise the input shape
208
            has to be `(299, 299, 3)` (with `'channels_last'` data format)
209
            or `(3, 299, 299)` (with `'channels_first'` data format).
210
            It should have exactly 3 inputs channels,
211
            and width and height should be no smaller than 139.
212
            E.g. `(150, 150, 3)` would be one valid value.
213
        pooling: Optional pooling mode for feature extraction
214
            when `include_top` is `False`.
215
            - `None` means that the output of the model will be
216
                the 4D tensor output of the last convolutional layer.
217
            - `'avg'` means that global average pooling
218
                will be applied to the output of the
219
                last convolutional layer, and thus
220
                the output of the model will be a 2D tensor.
221
            - `'max'` means that global max pooling will be applied.
222
        classes: optional number of classes to classify images
223
            into, only to be specified if `include_top` is `True`, and
224
            if no `weights` argument is specified.
225
226
    # Returns
227
        A Keras `Model` instance.
228
229
    # Raises
230
        ValueError: in case of invalid argument for `weights`,
231
            or invalid input shape.
232
    """
233
    if not (weights in {'imagenet', None} or os.path.exists(weights)):
234
        raise ValueError('The `weights` argument should be either '
235
                         '`None` (random initialization), `imagenet` '
236
                         '(pre-training on ImageNet), '
237
                         'or the path to the weights file to be loaded.')
238
239
    if weights == 'imagenet' and include_top and classes != 1000:
240
        raise ValueError('If using `weights` as imagenet with `include_top`'
241
                         ' as true, `classes` should be 1000')
242
243
    # Determine proper input shape
244
    input_shape = _obtain_input_shape(
245
        input_shape,
246
        default_size=299,
247
        min_size=139,
248
        data_format=K.image_data_format(),
249
        require_flatten=False,
250
        weights=weights)
251
252
    if input_tensor is None:
253
        img_input = Input(shape=input_shape)
254
    else:
255
        if not K.is_keras_tensor(input_tensor):
256
            img_input = Input(tensor=input_tensor, shape=input_shape)
257
        else:
258
            img_input = input_tensor
259
260
    # Stem block: 35 x 35 x 192
261
    x = conv2d_bn(img_input, 32, 3, strides=2, padding='same')
262
    x = conv2d_bn(x, 32, 3, padding='same')
263
    x = conv2d_bn(x, 64, 3)
264
    conv1 = x
265
    x = MaxPooling2D(3, strides=2, padding='same')(x)
266
    x = conv2d_bn(x, 80, 1, padding='same')
267
    x = conv2d_bn(x, 192, 3, padding='same')
268
    conv2 = x
269
    x = MaxPooling2D(3, strides=2, padding='same')(x)
270
271
    # Mixed 5b (Inception-A block): 35 x 35 x 320
272
    branch_0 = conv2d_bn(x, 96, 1)
273
    branch_1 = conv2d_bn(x, 48, 1)
274
    branch_1 = conv2d_bn(branch_1, 64, 5)
275
    branch_2 = conv2d_bn(x, 64, 1)
276
    branch_2 = conv2d_bn(branch_2, 96, 3)
277
    branch_2 = conv2d_bn(branch_2, 96, 3)
278
    branch_pool = AveragePooling2D(3, strides=1, padding='same')(x)
279
    branch_pool = conv2d_bn(branch_pool, 64, 1)
280
    branches = [branch_0, branch_1, branch_2, branch_pool]
281
    channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
282
    x = Concatenate(axis=channel_axis, name='mixed_5b')(branches)
283
284
    # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
285
    for block_idx in range(1, 11):
286
        x = inception_resnet_block(x,
287
                                   scale=0.17,
288
                                   block_type='block35',
289
                                   block_idx=block_idx)
290
    conv3 = x
291
    # Mixed 6a (Reduction-A block): 17 x 17 x 1088
292
    branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='same')
293
    branch_1 = conv2d_bn(x, 256, 1)
294
    branch_1 = conv2d_bn(branch_1, 256, 3)
295
    branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='same')
296
    branch_pool = MaxPooling2D(3, strides=2, padding='same')(x)
297
    branches = [branch_0, branch_1, branch_pool]
298
    x = Concatenate(axis=channel_axis, name='mixed_6a')(branches)
299
300
    # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
301
    for block_idx in range(1, 21):
302
        x = inception_resnet_block(x,
303
                                   scale=0.1,
304
                                   block_type='block17',
305
                                   block_idx=block_idx)
306
    conv4 = x
307
    # Mixed 7a (Reduction-B block): 8 x 8 x 2080
308
    branch_0 = conv2d_bn(x, 256, 1)
309
    branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='same')
310
    branch_1 = conv2d_bn(x, 256, 1)
311
    branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='same')
312
    branch_2 = conv2d_bn(x, 256, 1)
313
    branch_2 = conv2d_bn(branch_2, 288, 3)
314
    branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='same')
315
    branch_pool = MaxPooling2D(3, strides=2, padding='same')(x)
316
    branches = [branch_0, branch_1, branch_2, branch_pool]
317
    x = Concatenate(axis=channel_axis, name='mixed_7a')(branches)
318
319
    # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
320
    for block_idx in range(1, 10):
321
        x = inception_resnet_block(x,
322
                                   scale=0.2,
323
                                   block_type='block8',
324
                                   block_idx=block_idx)
325
    x = inception_resnet_block(x,
326
                               scale=1.,
327
                               activation=None,
328
                               block_type='block8',
329
                               block_idx=10)
330
331
    # Final convolution block: 8 x 8 x 1536
332
    x = conv2d_bn(x, 1536, 1, name='conv_7b')
333
    conv5 = x
334
    if include_top:
335
        # Classification block
336
        x = GlobalAveragePooling2D(name='avg_pool')(x)
337
        x = Dense(classes, activation='softmax', name='predictions')(x)
338
    else:
339
        if pooling == 'avg':
340
            x = GlobalAveragePooling2D()(x)
341
        elif pooling == 'max':
342
            x = GlobalMaxPooling2D()(x)
343
344
    # Ensure that the model takes into account
345
    # any potential predecessors of `input_tensor`
346
    if input_tensor is not None:
347
        inputs = get_source_inputs(input_tensor)
348
    else:
349
        inputs = img_input
350
351
    # Create model
352
    model = Model(inputs, [conv1, conv2, conv3, conv4, conv5], name='inception_resnet_v2')
353
354
    # Load weights
355
    if weights == 'imagenet':
356
        if K.image_data_format() == 'channels_first':
357
            if K.backend() == 'tensorflow':
358
                warnings.warn('You are using the TensorFlow backend, yet you '
359
                              'are using the Theano '
360
                              'image data format convention '
361
                              '(`image_data_format="channels_first"`). '
362
                              'For best performance, set '
363
                              '`image_data_format="channels_last"` in '
364
                              'your Keras config '
365
                              'at ~/.keras/keras.json.')
366
        if include_top:
367
            fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5'
368
            weights_path = get_file(fname,
369
                                    BASE_WEIGHT_URL + fname,
370
                                    cache_subdir='models',
371
                                    file_hash='e693bd0210a403b3192acc6073ad2e96')
372
        else:
373
            fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5'
374
            weights_path = get_file(fname,
375
                                    BASE_WEIGHT_URL + fname,
376
                                    cache_subdir='models',
377
                                    file_hash='d19885ff4a710c122648d3b5c3b684e4')
378
        model.load_weights(weights_path)
379
    elif weights is not None:
380
        model.load_weights(weights)
381
382
    return model
383
384
if __name__ == '__main__':
385
    InceptionResNetV2Same(include_top=False, input_shape=(256, 256, 3)).summary()