|
a |
|
b/dsb2018_topcoders/selim/resnetv2.py |
|
|
1 |
# -*- coding: utf-8 -*- |
|
|
2 |
"""Inception-ResNet V2 model for Keras. |
|
|
3 |
|
|
|
4 |
Model naming and structure follows TF-slim implementation (which has some additional |
|
|
5 |
layers and different number of filters from the original arXiv paper): |
|
|
6 |
https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py |
|
|
7 |
|
|
|
8 |
Pre-trained ImageNet weights are also converted from TF-slim, which can be found in: |
|
|
9 |
https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models |
|
|
10 |
|
|
|
11 |
# Reference |
|
|
12 |
- [Inception-v4, Inception-ResNet and the Impact of |
|
|
13 |
Residual Connections on Learning](https://arxiv.org/abs/1602.07261) |
|
|
14 |
|
|
|
15 |
""" |
|
|
16 |
from __future__ import absolute_import |
|
|
17 |
from __future__ import division |
|
|
18 |
from __future__ import print_function |
|
|
19 |
|
|
|
20 |
import os |
|
|
21 |
import warnings |
|
|
22 |
|
|
|
23 |
from keras.applications import imagenet_utils |
|
|
24 |
from keras.applications.imagenet_utils import _obtain_input_shape |
|
|
25 |
from keras.models import Model |
|
|
26 |
from keras.layers import Activation |
|
|
27 |
from keras.layers import AveragePooling2D |
|
|
28 |
from keras.layers import BatchNormalization |
|
|
29 |
from keras.layers import Concatenate |
|
|
30 |
from keras.layers import Conv2D |
|
|
31 |
from keras.layers import Dense |
|
|
32 |
from keras.layers import GlobalAveragePooling2D |
|
|
33 |
from keras.layers import GlobalMaxPooling2D |
|
|
34 |
from keras.layers import Input |
|
|
35 |
from keras.layers import Lambda |
|
|
36 |
from keras.layers import MaxPooling2D |
|
|
37 |
from keras.utils.data_utils import get_file |
|
|
38 |
from keras.engine.topology import get_source_inputs |
|
|
39 |
from keras import backend as K |
|
|
40 |
|
|
|
41 |
|
|
|
42 |
BASE_WEIGHT_URL = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.7/' |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
def preprocess_input(x): |
|
|
46 |
"""Preprocesses a numpy array encoding a batch of images. |
|
|
47 |
|
|
|
48 |
# Arguments |
|
|
49 |
x: a 4D numpy array consists of RGB values within [0, 255]. |
|
|
50 |
|
|
|
51 |
# Returns |
|
|
52 |
Preprocessed array. |
|
|
53 |
""" |
|
|
54 |
return imagenet_utils.preprocess_input(x, mode='tf') |
|
|
55 |
|
|
|
56 |
|
|
|
57 |
def conv2d_bn(x, |
|
|
58 |
filters, |
|
|
59 |
kernel_size, |
|
|
60 |
strides=1, |
|
|
61 |
padding='same', |
|
|
62 |
activation='relu', |
|
|
63 |
use_bias=False, |
|
|
64 |
name=None): |
|
|
65 |
"""Utility function to apply conv + BN. |
|
|
66 |
|
|
|
67 |
# Arguments |
|
|
68 |
x: input tensor. |
|
|
69 |
filters: filters in `Conv2D`. |
|
|
70 |
kernel_size: kernel size as in `Conv2D`. |
|
|
71 |
strides: strides in `Conv2D`. |
|
|
72 |
padding: padding mode in `Conv2D`. |
|
|
73 |
activation: activation in `Conv2D`. |
|
|
74 |
use_bias: whether to use a bias in `Conv2D`. |
|
|
75 |
name: name of the ops; will become `name + '_ac'` for the activation |
|
|
76 |
and `name + '_bn'` for the batch norm layer. |
|
|
77 |
|
|
|
78 |
# Returns |
|
|
79 |
Output tensor after applying `Conv2D` and `BatchNormalization`. |
|
|
80 |
""" |
|
|
81 |
x = Conv2D(filters, |
|
|
82 |
kernel_size, |
|
|
83 |
strides=strides, |
|
|
84 |
padding=padding, |
|
|
85 |
use_bias=use_bias, |
|
|
86 |
name=name)(x) |
|
|
87 |
if not use_bias: |
|
|
88 |
bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 |
|
|
89 |
bn_name = None if name is None else name + '_bn' |
|
|
90 |
x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) |
|
|
91 |
if activation is not None: |
|
|
92 |
ac_name = None if name is None else name + '_ac' |
|
|
93 |
x = Activation(activation, name=ac_name)(x) |
|
|
94 |
return x |
|
|
95 |
|
|
|
96 |
|
|
|
97 |
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): |
|
|
98 |
"""Adds a Inception-ResNet block. |
|
|
99 |
|
|
|
100 |
This function builds 3 types of Inception-ResNet blocks mentioned |
|
|
101 |
in the paper, controlled by the `block_type` argument (which is the |
|
|
102 |
block name used in the official TF-slim implementation): |
|
|
103 |
- Inception-ResNet-A: `block_type='block35'` |
|
|
104 |
- Inception-ResNet-B: `block_type='block17'` |
|
|
105 |
- Inception-ResNet-C: `block_type='block8'` |
|
|
106 |
|
|
|
107 |
# Arguments |
|
|
108 |
x: input tensor. |
|
|
109 |
scale: scaling factor to scale the residuals (i.e., the output of |
|
|
110 |
passing `x` through an inception module) before adding them |
|
|
111 |
to the shortcut branch. Let `r` be the output from the residual branch, |
|
|
112 |
the output of this block will be `x + scale * r`. |
|
|
113 |
block_type: `'block35'`, `'block17'` or `'block8'`, determines |
|
|
114 |
the network structure in the residual branch. |
|
|
115 |
block_idx: an `int` used for generating layer names. The Inception-ResNet blocks |
|
|
116 |
are repeated many times in this network. We use `block_idx` to identify |
|
|
117 |
each of the repetitions. For example, the first Inception-ResNet-A block |
|
|
118 |
will have `block_type='block35', block_idx=0`, ane the layer names will have |
|
|
119 |
a common prefix `'block35_0'`. |
|
|
120 |
activation: activation function to use at the end of the block |
|
|
121 |
(see [activations](keras./activations.md)). |
|
|
122 |
When `activation=None`, no activation is applied |
|
|
123 |
(i.e., "linear" activation: `a(x) = x`). |
|
|
124 |
|
|
|
125 |
# Returns |
|
|
126 |
Output tensor for the block. |
|
|
127 |
|
|
|
128 |
# Raises |
|
|
129 |
ValueError: if `block_type` is not one of `'block35'`, |
|
|
130 |
`'block17'` or `'block8'`. |
|
|
131 |
""" |
|
|
132 |
if block_type == 'block35': |
|
|
133 |
branch_0 = conv2d_bn(x, 32, 1) |
|
|
134 |
branch_1 = conv2d_bn(x, 32, 1) |
|
|
135 |
branch_1 = conv2d_bn(branch_1, 32, 3) |
|
|
136 |
branch_2 = conv2d_bn(x, 32, 1) |
|
|
137 |
branch_2 = conv2d_bn(branch_2, 48, 3) |
|
|
138 |
branch_2 = conv2d_bn(branch_2, 64, 3) |
|
|
139 |
branches = [branch_0, branch_1, branch_2] |
|
|
140 |
elif block_type == 'block17': |
|
|
141 |
branch_0 = conv2d_bn(x, 192, 1) |
|
|
142 |
branch_1 = conv2d_bn(x, 128, 1) |
|
|
143 |
branch_1 = conv2d_bn(branch_1, 160, [1, 7]) |
|
|
144 |
branch_1 = conv2d_bn(branch_1, 192, [7, 1]) |
|
|
145 |
branches = [branch_0, branch_1] |
|
|
146 |
elif block_type == 'block8': |
|
|
147 |
branch_0 = conv2d_bn(x, 192, 1) |
|
|
148 |
branch_1 = conv2d_bn(x, 192, 1) |
|
|
149 |
branch_1 = conv2d_bn(branch_1, 224, [1, 3]) |
|
|
150 |
branch_1 = conv2d_bn(branch_1, 256, [3, 1]) |
|
|
151 |
branches = [branch_0, branch_1] |
|
|
152 |
else: |
|
|
153 |
raise ValueError('Unknown Inception-ResNet block type. ' |
|
|
154 |
'Expects "block35", "block17" or "block8", ' |
|
|
155 |
'but got: ' + str(block_type)) |
|
|
156 |
|
|
|
157 |
block_name = block_type + '_' + str(block_idx) |
|
|
158 |
channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 |
|
|
159 |
mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) |
|
|
160 |
up = conv2d_bn(mixed, |
|
|
161 |
K.int_shape(x)[channel_axis], |
|
|
162 |
1, |
|
|
163 |
activation=None, |
|
|
164 |
use_bias=True, |
|
|
165 |
name=block_name + '_conv') |
|
|
166 |
|
|
|
167 |
x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, |
|
|
168 |
output_shape=K.int_shape(x)[1:], |
|
|
169 |
arguments={'scale': scale}, |
|
|
170 |
name=block_name)([x, up]) |
|
|
171 |
if activation is not None: |
|
|
172 |
x = Activation(activation, name=block_name + '_ac')(x) |
|
|
173 |
return x |
|
|
174 |
|
|
|
175 |
|
|
|
176 |
def InceptionResNetV2Same(include_top=True, |
|
|
177 |
weights='imagenet', |
|
|
178 |
input_tensor=None, |
|
|
179 |
input_shape=None, |
|
|
180 |
pooling=None, |
|
|
181 |
classes=1000): |
|
|
182 |
"""Instantiates the Inception-ResNet v2 architecture. |
|
|
183 |
|
|
|
184 |
Optionally loads weights pre-trained on ImageNet. |
|
|
185 |
Note that when using TensorFlow, for best performance you should |
|
|
186 |
set `"image_data_format": "channels_last"` in your Keras config |
|
|
187 |
at `~/.keras/keras.json`. |
|
|
188 |
|
|
|
189 |
The model and the weights are compatible with TensorFlow, Theano and |
|
|
190 |
CNTK backends. The data format convention used by the model is |
|
|
191 |
the one specified in your Keras config file. |
|
|
192 |
|
|
|
193 |
Note that the default input image size for this model is 299x299, instead |
|
|
194 |
of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing |
|
|
195 |
function is different (i.e., do not use `imagenet_utils.preprocess_input()` |
|
|
196 |
with this model. Use `preprocess_input()` defined in this module instead). |
|
|
197 |
|
|
|
198 |
# Arguments |
|
|
199 |
include_top: whether to include the fully-connected |
|
|
200 |
layer at the top of the network. |
|
|
201 |
weights: one of `None` (random initialization), |
|
|
202 |
'imagenet' (pre-training on ImageNet), |
|
|
203 |
or the path to the weights file to be loaded. |
|
|
204 |
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) |
|
|
205 |
to use as image input for the model. |
|
|
206 |
input_shape: optional shape tuple, only to be specified |
|
|
207 |
if `include_top` is `False` (otherwise the input shape |
|
|
208 |
has to be `(299, 299, 3)` (with `'channels_last'` data format) |
|
|
209 |
or `(3, 299, 299)` (with `'channels_first'` data format). |
|
|
210 |
It should have exactly 3 inputs channels, |
|
|
211 |
and width and height should be no smaller than 139. |
|
|
212 |
E.g. `(150, 150, 3)` would be one valid value. |
|
|
213 |
pooling: Optional pooling mode for feature extraction |
|
|
214 |
when `include_top` is `False`. |
|
|
215 |
- `None` means that the output of the model will be |
|
|
216 |
the 4D tensor output of the last convolutional layer. |
|
|
217 |
- `'avg'` means that global average pooling |
|
|
218 |
will be applied to the output of the |
|
|
219 |
last convolutional layer, and thus |
|
|
220 |
the output of the model will be a 2D tensor. |
|
|
221 |
- `'max'` means that global max pooling will be applied. |
|
|
222 |
classes: optional number of classes to classify images |
|
|
223 |
into, only to be specified if `include_top` is `True`, and |
|
|
224 |
if no `weights` argument is specified. |
|
|
225 |
|
|
|
226 |
# Returns |
|
|
227 |
A Keras `Model` instance. |
|
|
228 |
|
|
|
229 |
# Raises |
|
|
230 |
ValueError: in case of invalid argument for `weights`, |
|
|
231 |
or invalid input shape. |
|
|
232 |
""" |
|
|
233 |
if not (weights in {'imagenet', None} or os.path.exists(weights)): |
|
|
234 |
raise ValueError('The `weights` argument should be either ' |
|
|
235 |
'`None` (random initialization), `imagenet` ' |
|
|
236 |
'(pre-training on ImageNet), ' |
|
|
237 |
'or the path to the weights file to be loaded.') |
|
|
238 |
|
|
|
239 |
if weights == 'imagenet' and include_top and classes != 1000: |
|
|
240 |
raise ValueError('If using `weights` as imagenet with `include_top`' |
|
|
241 |
' as true, `classes` should be 1000') |
|
|
242 |
|
|
|
243 |
# Determine proper input shape |
|
|
244 |
input_shape = _obtain_input_shape( |
|
|
245 |
input_shape, |
|
|
246 |
default_size=299, |
|
|
247 |
min_size=139, |
|
|
248 |
data_format=K.image_data_format(), |
|
|
249 |
require_flatten=False, |
|
|
250 |
weights=weights) |
|
|
251 |
|
|
|
252 |
if input_tensor is None: |
|
|
253 |
img_input = Input(shape=input_shape) |
|
|
254 |
else: |
|
|
255 |
if not K.is_keras_tensor(input_tensor): |
|
|
256 |
img_input = Input(tensor=input_tensor, shape=input_shape) |
|
|
257 |
else: |
|
|
258 |
img_input = input_tensor |
|
|
259 |
|
|
|
260 |
# Stem block: 35 x 35 x 192 |
|
|
261 |
x = conv2d_bn(img_input, 32, 3, strides=2, padding='same') |
|
|
262 |
x = conv2d_bn(x, 32, 3, padding='same') |
|
|
263 |
x = conv2d_bn(x, 64, 3) |
|
|
264 |
conv1 = x |
|
|
265 |
x = MaxPooling2D(3, strides=2, padding='same')(x) |
|
|
266 |
x = conv2d_bn(x, 80, 1, padding='same') |
|
|
267 |
x = conv2d_bn(x, 192, 3, padding='same') |
|
|
268 |
conv2 = x |
|
|
269 |
x = MaxPooling2D(3, strides=2, padding='same')(x) |
|
|
270 |
|
|
|
271 |
# Mixed 5b (Inception-A block): 35 x 35 x 320 |
|
|
272 |
branch_0 = conv2d_bn(x, 96, 1) |
|
|
273 |
branch_1 = conv2d_bn(x, 48, 1) |
|
|
274 |
branch_1 = conv2d_bn(branch_1, 64, 5) |
|
|
275 |
branch_2 = conv2d_bn(x, 64, 1) |
|
|
276 |
branch_2 = conv2d_bn(branch_2, 96, 3) |
|
|
277 |
branch_2 = conv2d_bn(branch_2, 96, 3) |
|
|
278 |
branch_pool = AveragePooling2D(3, strides=1, padding='same')(x) |
|
|
279 |
branch_pool = conv2d_bn(branch_pool, 64, 1) |
|
|
280 |
branches = [branch_0, branch_1, branch_2, branch_pool] |
|
|
281 |
channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 |
|
|
282 |
x = Concatenate(axis=channel_axis, name='mixed_5b')(branches) |
|
|
283 |
|
|
|
284 |
# 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 |
|
|
285 |
for block_idx in range(1, 11): |
|
|
286 |
x = inception_resnet_block(x, |
|
|
287 |
scale=0.17, |
|
|
288 |
block_type='block35', |
|
|
289 |
block_idx=block_idx) |
|
|
290 |
conv3 = x |
|
|
291 |
# Mixed 6a (Reduction-A block): 17 x 17 x 1088 |
|
|
292 |
branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='same') |
|
|
293 |
branch_1 = conv2d_bn(x, 256, 1) |
|
|
294 |
branch_1 = conv2d_bn(branch_1, 256, 3) |
|
|
295 |
branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='same') |
|
|
296 |
branch_pool = MaxPooling2D(3, strides=2, padding='same')(x) |
|
|
297 |
branches = [branch_0, branch_1, branch_pool] |
|
|
298 |
x = Concatenate(axis=channel_axis, name='mixed_6a')(branches) |
|
|
299 |
|
|
|
300 |
# 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 |
|
|
301 |
for block_idx in range(1, 21): |
|
|
302 |
x = inception_resnet_block(x, |
|
|
303 |
scale=0.1, |
|
|
304 |
block_type='block17', |
|
|
305 |
block_idx=block_idx) |
|
|
306 |
conv4 = x |
|
|
307 |
# Mixed 7a (Reduction-B block): 8 x 8 x 2080 |
|
|
308 |
branch_0 = conv2d_bn(x, 256, 1) |
|
|
309 |
branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='same') |
|
|
310 |
branch_1 = conv2d_bn(x, 256, 1) |
|
|
311 |
branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='same') |
|
|
312 |
branch_2 = conv2d_bn(x, 256, 1) |
|
|
313 |
branch_2 = conv2d_bn(branch_2, 288, 3) |
|
|
314 |
branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='same') |
|
|
315 |
branch_pool = MaxPooling2D(3, strides=2, padding='same')(x) |
|
|
316 |
branches = [branch_0, branch_1, branch_2, branch_pool] |
|
|
317 |
x = Concatenate(axis=channel_axis, name='mixed_7a')(branches) |
|
|
318 |
|
|
|
319 |
# 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 |
|
|
320 |
for block_idx in range(1, 10): |
|
|
321 |
x = inception_resnet_block(x, |
|
|
322 |
scale=0.2, |
|
|
323 |
block_type='block8', |
|
|
324 |
block_idx=block_idx) |
|
|
325 |
x = inception_resnet_block(x, |
|
|
326 |
scale=1., |
|
|
327 |
activation=None, |
|
|
328 |
block_type='block8', |
|
|
329 |
block_idx=10) |
|
|
330 |
|
|
|
331 |
# Final convolution block: 8 x 8 x 1536 |
|
|
332 |
x = conv2d_bn(x, 1536, 1, name='conv_7b') |
|
|
333 |
conv5 = x |
|
|
334 |
if include_top: |
|
|
335 |
# Classification block |
|
|
336 |
x = GlobalAveragePooling2D(name='avg_pool')(x) |
|
|
337 |
x = Dense(classes, activation='softmax', name='predictions')(x) |
|
|
338 |
else: |
|
|
339 |
if pooling == 'avg': |
|
|
340 |
x = GlobalAveragePooling2D()(x) |
|
|
341 |
elif pooling == 'max': |
|
|
342 |
x = GlobalMaxPooling2D()(x) |
|
|
343 |
|
|
|
344 |
# Ensure that the model takes into account |
|
|
345 |
# any potential predecessors of `input_tensor` |
|
|
346 |
if input_tensor is not None: |
|
|
347 |
inputs = get_source_inputs(input_tensor) |
|
|
348 |
else: |
|
|
349 |
inputs = img_input |
|
|
350 |
|
|
|
351 |
# Create model |
|
|
352 |
model = Model(inputs, [conv1, conv2, conv3, conv4, conv5], name='inception_resnet_v2') |
|
|
353 |
|
|
|
354 |
# Load weights |
|
|
355 |
if weights == 'imagenet': |
|
|
356 |
if K.image_data_format() == 'channels_first': |
|
|
357 |
if K.backend() == 'tensorflow': |
|
|
358 |
warnings.warn('You are using the TensorFlow backend, yet you ' |
|
|
359 |
'are using the Theano ' |
|
|
360 |
'image data format convention ' |
|
|
361 |
'(`image_data_format="channels_first"`). ' |
|
|
362 |
'For best performance, set ' |
|
|
363 |
'`image_data_format="channels_last"` in ' |
|
|
364 |
'your Keras config ' |
|
|
365 |
'at ~/.keras/keras.json.') |
|
|
366 |
if include_top: |
|
|
367 |
fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' |
|
|
368 |
weights_path = get_file(fname, |
|
|
369 |
BASE_WEIGHT_URL + fname, |
|
|
370 |
cache_subdir='models', |
|
|
371 |
file_hash='e693bd0210a403b3192acc6073ad2e96') |
|
|
372 |
else: |
|
|
373 |
fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5' |
|
|
374 |
weights_path = get_file(fname, |
|
|
375 |
BASE_WEIGHT_URL + fname, |
|
|
376 |
cache_subdir='models', |
|
|
377 |
file_hash='d19885ff4a710c122648d3b5c3b684e4') |
|
|
378 |
model.load_weights(weights_path) |
|
|
379 |
elif weights is not None: |
|
|
380 |
model.load_weights(weights) |
|
|
381 |
|
|
|
382 |
return model |
|
|
383 |
|
|
|
384 |
if __name__ == '__main__': |
|
|
385 |
InceptionResNetV2Same(include_top=False, input_shape=(256, 256, 3)).summary() |