|
a |
|
b/lungs/i3d.py |
|
|
1 |
# Copyright 2017 Google Inc. |
|
|
2 |
# |
|
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
4 |
# you may not use this file except in compliance with the License. |
|
|
5 |
# You may obtain a copy of the License at |
|
|
6 |
# |
|
|
7 |
# https://www.apache.org/licenses/LICENSE-2.0 |
|
|
8 |
# |
|
|
9 |
# Unless required by applicable law or agreed to in writing, software |
|
|
10 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
12 |
# See the License for the specific language governing permissions and |
|
|
13 |
# limitations under the License. |
|
|
14 |
# ============================================================================ |
|
|
15 |
"""Inception-v1 Inflated 3D ConvNet used for Kinetics CVPR paper. |
|
|
16 |
|
|
|
17 |
The model is introduced in: |
|
|
18 |
|
|
|
19 |
Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset |
|
|
20 |
Joao Carreira, Andrew Zisserman |
|
|
21 |
https://arxiv.org/pdf/1705.07750v1.pdf. |
|
|
22 |
""" |
|
|
23 |
|
|
|
24 |
from __future__ import absolute_import |
|
|
25 |
from __future__ import division |
|
|
26 |
from __future__ import print_function |
|
|
27 |
|
|
|
28 |
import sonnet as snt |
|
|
29 |
import tensorflow as tf |
|
|
30 |
class Unit3D(snt.AbstractModule): |
|
|
31 |
"""Basic unit containing Conv3D + BatchNorm + non-linearity.""" |
|
|
32 |
|
|
|
33 |
def __init__(self, output_channels, |
|
|
34 |
kernel_shape=(1, 1, 1), |
|
|
35 |
stride=(1, 1, 1), |
|
|
36 |
activation_fn=tf.nn.relu, |
|
|
37 |
use_batch_norm=True, |
|
|
38 |
use_bias=False, |
|
|
39 |
name='unit_3d'): |
|
|
40 |
"""Initializes Unit3D module.""" |
|
|
41 |
super(Unit3D, self).__init__(name=name) |
|
|
42 |
self._output_channels = output_channels |
|
|
43 |
self._kernel_shape = kernel_shape |
|
|
44 |
self._stride = stride |
|
|
45 |
self._use_batch_norm = use_batch_norm |
|
|
46 |
self._activation_fn = activation_fn |
|
|
47 |
self._use_bias = use_bias |
|
|
48 |
|
|
|
49 |
def _build(self, inputs, is_training): |
|
|
50 |
"""Connects the module to inputs. |
|
|
51 |
|
|
|
52 |
Args: |
|
|
53 |
inputs: Inputs to the Unit3D component. |
|
|
54 |
is_training: whether to use training mode for snt.BatchNorm (boolean). |
|
|
55 |
|
|
|
56 |
Returns: |
|
|
57 |
Outputs from the module. |
|
|
58 |
""" |
|
|
59 |
net = snt.Conv3D(output_channels=self._output_channels, |
|
|
60 |
kernel_shape=self._kernel_shape, |
|
|
61 |
stride=self._stride, |
|
|
62 |
padding=snt.SAME, |
|
|
63 |
use_bias=self._use_bias)(inputs) |
|
|
64 |
if self._use_batch_norm: |
|
|
65 |
bn = snt.BatchNorm() |
|
|
66 |
net = bn(net, is_training=is_training, test_local_stats=False) |
|
|
67 |
if self._activation_fn is not None: |
|
|
68 |
net = self._activation_fn(net) |
|
|
69 |
return net |
|
|
70 |
|
|
|
71 |
|
|
|
72 |
class InceptionI3d(snt.AbstractModule): |
|
|
73 |
"""Inception-v1 I3D architecture. |
|
|
74 |
|
|
|
75 |
The model is introduced in: |
|
|
76 |
|
|
|
77 |
Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset |
|
|
78 |
Joao Carreira, Andrew Zisserman |
|
|
79 |
https://arxiv.org/pdf/1705.07750v1.pdf. |
|
|
80 |
|
|
|
81 |
See also the Inception architecture, introduced in: |
|
|
82 |
|
|
|
83 |
Going deeper with convolutions |
|
|
84 |
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, |
|
|
85 |
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. |
|
|
86 |
http://arxiv.org/pdf/1409.4842v1.pdf. |
|
|
87 |
""" |
|
|
88 |
|
|
|
89 |
# Endpoints of the model in order. During construction, all the endpoints up |
|
|
90 |
# to a designated `final_endpoint` are returned in a dictionary as the |
|
|
91 |
# second return value. |
|
|
92 |
VALID_ENDPOINTS = ( |
|
|
93 |
'Conv3d_1a_7x7', |
|
|
94 |
'MaxPool3d_2a_3x3', |
|
|
95 |
'Conv3d_2b_1x1', |
|
|
96 |
'Conv3d_2c_3x3', |
|
|
97 |
'MaxPool3d_3a_3x3', |
|
|
98 |
'Mixed_3b', |
|
|
99 |
'Mixed_3c', |
|
|
100 |
'MaxPool3d_4a_3x3', |
|
|
101 |
'Mixed_4b', |
|
|
102 |
'Mixed_4c', |
|
|
103 |
'Mixed_4d', |
|
|
104 |
'Mixed_4e', |
|
|
105 |
'Mixed_4f', |
|
|
106 |
'MaxPool3d_5a_2x2', |
|
|
107 |
'Mixed_5b', |
|
|
108 |
'Mixed_5c', |
|
|
109 |
'Logits', |
|
|
110 |
'Predictions', |
|
|
111 |
) |
|
|
112 |
|
|
|
113 |
def __init__(self, num_classes=400, spatial_squeeze=True, |
|
|
114 |
final_endpoint='Logits', name='inception_i3d'): |
|
|
115 |
"""Initializes I3D model instance. |
|
|
116 |
|
|
|
117 |
Args: |
|
|
118 |
num_classes: The number of outputs in the logit layer (default 400, which |
|
|
119 |
matches the Kinetics dataset). |
|
|
120 |
spatial_squeeze: Whether to squeeze the spatial dimensions for the logits |
|
|
121 |
before returning (default True). |
|
|
122 |
final_endpoint: The model contains many possible endpoints. |
|
|
123 |
`final_endpoint` specifies the last endpoint for the model to be built |
|
|
124 |
up to. In addition to the output at `final_endpoint`, all the outputs |
|
|
125 |
at endpoints up to `final_endpoint` will also be returned, in a |
|
|
126 |
dictionary. `final_endpoint` must be one of |
|
|
127 |
InceptionI3d.VALID_ENDPOINTS (default 'Logits'). |
|
|
128 |
name: A string (optional). The name of this module. |
|
|
129 |
|
|
|
130 |
Raises: |
|
|
131 |
ValueError: if `final_endpoint` is not recognized. |
|
|
132 |
""" |
|
|
133 |
|
|
|
134 |
if final_endpoint not in self.VALID_ENDPOINTS: |
|
|
135 |
raise ValueError('Unknown final endpoint %s' % final_endpoint) |
|
|
136 |
|
|
|
137 |
super(InceptionI3d, self).__init__(name=name) |
|
|
138 |
self._num_classes = num_classes |
|
|
139 |
self._spatial_squeeze = spatial_squeeze |
|
|
140 |
self._final_endpoint = final_endpoint |
|
|
141 |
|
|
|
142 |
def _build(self, inputs, is_training, dropout_keep_prob=1.0): |
|
|
143 |
"""Connects the model to inputs. |
|
|
144 |
|
|
|
145 |
Args: |
|
|
146 |
inputs: Inputs to the model, which should have dimensions |
|
|
147 |
`batch_size` x `num_frames` x 224 x 224 x `num_channels`. |
|
|
148 |
is_training: whether to use training mode for snt.BatchNorm (boolean). |
|
|
149 |
dropout_keep_prob: Probability for the tf.nn.dropout layer (float in |
|
|
150 |
[0, 1)). |
|
|
151 |
|
|
|
152 |
Returns: |
|
|
153 |
A tuple consisting of: |
|
|
154 |
1. Network output at location `self._final_endpoint`. |
|
|
155 |
2. Dictionary containing all endpoints up to `self._final_endpoint`, |
|
|
156 |
indexed by endpoint name. |
|
|
157 |
|
|
|
158 |
Raises: |
|
|
159 |
ValueError: if `self._final_endpoint` is not recognized. |
|
|
160 |
""" |
|
|
161 |
if self._final_endpoint not in self.VALID_ENDPOINTS: |
|
|
162 |
raise ValueError('Unknown final endpoint %s' % self._final_endpoint) |
|
|
163 |
|
|
|
164 |
net = inputs |
|
|
165 |
end_points = {} |
|
|
166 |
end_point = 'Conv3d_1a_7x7' |
|
|
167 |
net = Unit3D(output_channels=64, kernel_shape=[7, 7, 7], |
|
|
168 |
stride=[2, 2, 2], name=end_point)(net, is_training=is_training) |
|
|
169 |
end_points[end_point] = net |
|
|
170 |
if self._final_endpoint == end_point: return net, end_points |
|
|
171 |
end_point = 'MaxPool3d_2a_3x3' |
|
|
172 |
net = tf.nn.max_pool3d(net, ksize=[1, 1, 3, 3, 1], strides=[1, 1, 2, 2, 1], |
|
|
173 |
padding=snt.SAME, name=end_point) |
|
|
174 |
end_points[end_point] = net |
|
|
175 |
if self._final_endpoint == end_point: return net, end_points |
|
|
176 |
end_point = 'Conv3d_2b_1x1' |
|
|
177 |
net = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
178 |
name=end_point)(net, is_training=is_training) |
|
|
179 |
end_points[end_point] = net |
|
|
180 |
if self._final_endpoint == end_point: return net, end_points |
|
|
181 |
end_point = 'Conv3d_2c_3x3' |
|
|
182 |
net = Unit3D(output_channels=192, kernel_shape=[3, 3, 3], |
|
|
183 |
name=end_point)(net, is_training=is_training) |
|
|
184 |
end_points[end_point] = net |
|
|
185 |
if self._final_endpoint == end_point: return net, end_points |
|
|
186 |
end_point = 'MaxPool3d_3a_3x3' |
|
|
187 |
net = tf.nn.max_pool3d(net, ksize=[1, 1, 3, 3, 1], strides=[1, 1, 2, 2, 1], |
|
|
188 |
padding=snt.SAME, name=end_point) |
|
|
189 |
end_points[end_point] = net |
|
|
190 |
if self._final_endpoint == end_point: return net, end_points |
|
|
191 |
|
|
|
192 |
end_point = 'Mixed_3b' |
|
|
193 |
with tf.variable_scope(end_point): |
|
|
194 |
with tf.variable_scope('Branch_0'): |
|
|
195 |
branch_0 = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
196 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
197 |
with tf.variable_scope('Branch_1'): |
|
|
198 |
branch_1 = Unit3D(output_channels=96, kernel_shape=[1, 1, 1], |
|
|
199 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
200 |
branch_1 = Unit3D(output_channels=128, kernel_shape=[3, 3, 3], |
|
|
201 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
202 |
is_training=is_training) |
|
|
203 |
with tf.variable_scope('Branch_2'): |
|
|
204 |
branch_2 = Unit3D(output_channels=16, kernel_shape=[1, 1, 1], |
|
|
205 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
206 |
branch_2 = Unit3D(output_channels=32, kernel_shape=[3, 3, 3], |
|
|
207 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
208 |
is_training=is_training) |
|
|
209 |
with tf.variable_scope('Branch_3'): |
|
|
210 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
211 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
212 |
name='MaxPool3d_0a_3x3') |
|
|
213 |
branch_3 = Unit3D(output_channels=32, kernel_shape=[1, 1, 1], |
|
|
214 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
215 |
is_training=is_training) |
|
|
216 |
|
|
|
217 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
218 |
end_points[end_point] = net |
|
|
219 |
if self._final_endpoint == end_point: return net, end_points |
|
|
220 |
|
|
|
221 |
end_point = 'Mixed_3c' |
|
|
222 |
with tf.variable_scope(end_point): |
|
|
223 |
with tf.variable_scope('Branch_0'): |
|
|
224 |
branch_0 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
225 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
226 |
with tf.variable_scope('Branch_1'): |
|
|
227 |
branch_1 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
228 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
229 |
branch_1 = Unit3D(output_channels=192, kernel_shape=[3, 3, 3], |
|
|
230 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
231 |
is_training=is_training) |
|
|
232 |
with tf.variable_scope('Branch_2'): |
|
|
233 |
branch_2 = Unit3D(output_channels=32, kernel_shape=[1, 1, 1], |
|
|
234 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
235 |
branch_2 = Unit3D(output_channels=96, kernel_shape=[3, 3, 3], |
|
|
236 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
237 |
is_training=is_training) |
|
|
238 |
with tf.variable_scope('Branch_3'): |
|
|
239 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
240 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
241 |
name='MaxPool3d_0a_3x3') |
|
|
242 |
branch_3 = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
243 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
244 |
is_training=is_training) |
|
|
245 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
246 |
end_points[end_point] = net |
|
|
247 |
if self._final_endpoint == end_point: return net, end_points |
|
|
248 |
|
|
|
249 |
end_point = 'MaxPool3d_4a_3x3' |
|
|
250 |
net = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], strides=[1, 2, 2, 2, 1], |
|
|
251 |
padding=snt.SAME, name=end_point) |
|
|
252 |
end_points[end_point] = net |
|
|
253 |
if self._final_endpoint == end_point: return net, end_points |
|
|
254 |
|
|
|
255 |
end_point = 'Mixed_4b' |
|
|
256 |
with tf.variable_scope(end_point): |
|
|
257 |
with tf.variable_scope('Branch_0'): |
|
|
258 |
branch_0 = Unit3D(output_channels=192, kernel_shape=[1, 1, 1], |
|
|
259 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
260 |
with tf.variable_scope('Branch_1'): |
|
|
261 |
branch_1 = Unit3D(output_channels=96, kernel_shape=[1, 1, 1], |
|
|
262 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
263 |
branch_1 = Unit3D(output_channels=208, kernel_shape=[3, 3, 3], |
|
|
264 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
265 |
is_training=is_training) |
|
|
266 |
with tf.variable_scope('Branch_2'): |
|
|
267 |
branch_2 = Unit3D(output_channels=16, kernel_shape=[1, 1, 1], |
|
|
268 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
269 |
branch_2 = Unit3D(output_channels=48, kernel_shape=[3, 3, 3], |
|
|
270 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
271 |
is_training=is_training) |
|
|
272 |
with tf.variable_scope('Branch_3'): |
|
|
273 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
274 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
275 |
name='MaxPool3d_0a_3x3') |
|
|
276 |
branch_3 = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
277 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
278 |
is_training=is_training) |
|
|
279 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
280 |
end_points[end_point] = net |
|
|
281 |
if self._final_endpoint == end_point: return net, end_points |
|
|
282 |
|
|
|
283 |
end_point = 'Mixed_4c' |
|
|
284 |
with tf.variable_scope(end_point): |
|
|
285 |
with tf.variable_scope('Branch_0'): |
|
|
286 |
branch_0 = Unit3D(output_channels=160, kernel_shape=[1, 1, 1], |
|
|
287 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
288 |
with tf.variable_scope('Branch_1'): |
|
|
289 |
branch_1 = Unit3D(output_channels=112, kernel_shape=[1, 1, 1], |
|
|
290 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
291 |
branch_1 = Unit3D(output_channels=224, kernel_shape=[3, 3, 3], |
|
|
292 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
293 |
is_training=is_training) |
|
|
294 |
with tf.variable_scope('Branch_2'): |
|
|
295 |
branch_2 = Unit3D(output_channels=24, kernel_shape=[1, 1, 1], |
|
|
296 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
297 |
branch_2 = Unit3D(output_channels=64, kernel_shape=[3, 3, 3], |
|
|
298 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
299 |
is_training=is_training) |
|
|
300 |
with tf.variable_scope('Branch_3'): |
|
|
301 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
302 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
303 |
name='MaxPool3d_0a_3x3') |
|
|
304 |
branch_3 = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
305 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
306 |
is_training=is_training) |
|
|
307 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
308 |
end_points[end_point] = net |
|
|
309 |
if self._final_endpoint == end_point: return net, end_points |
|
|
310 |
|
|
|
311 |
end_point = 'Mixed_4d' |
|
|
312 |
with tf.variable_scope(end_point): |
|
|
313 |
with tf.variable_scope('Branch_0'): |
|
|
314 |
branch_0 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
315 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
316 |
with tf.variable_scope('Branch_1'): |
|
|
317 |
branch_1 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
318 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
319 |
branch_1 = Unit3D(output_channels=256, kernel_shape=[3, 3, 3], |
|
|
320 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
321 |
is_training=is_training) |
|
|
322 |
with tf.variable_scope('Branch_2'): |
|
|
323 |
branch_2 = Unit3D(output_channels=24, kernel_shape=[1, 1, 1], |
|
|
324 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
325 |
branch_2 = Unit3D(output_channels=64, kernel_shape=[3, 3, 3], |
|
|
326 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
327 |
is_training=is_training) |
|
|
328 |
with tf.variable_scope('Branch_3'): |
|
|
329 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
330 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
331 |
name='MaxPool3d_0a_3x3') |
|
|
332 |
branch_3 = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
333 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
334 |
is_training=is_training) |
|
|
335 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
336 |
end_points[end_point] = net |
|
|
337 |
if self._final_endpoint == end_point: return net, end_points |
|
|
338 |
|
|
|
339 |
end_point = 'Mixed_4e' |
|
|
340 |
with tf.variable_scope(end_point): |
|
|
341 |
with tf.variable_scope('Branch_0'): |
|
|
342 |
branch_0 = Unit3D(output_channels=112, kernel_shape=[1, 1, 1], |
|
|
343 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
344 |
with tf.variable_scope('Branch_1'): |
|
|
345 |
branch_1 = Unit3D(output_channels=144, kernel_shape=[1, 1, 1], |
|
|
346 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
347 |
branch_1 = Unit3D(output_channels=288, kernel_shape=[3, 3, 3], |
|
|
348 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
349 |
is_training=is_training) |
|
|
350 |
with tf.variable_scope('Branch_2'): |
|
|
351 |
branch_2 = Unit3D(output_channels=32, kernel_shape=[1, 1, 1], |
|
|
352 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
353 |
branch_2 = Unit3D(output_channels=64, kernel_shape=[3, 3, 3], |
|
|
354 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
355 |
is_training=is_training) |
|
|
356 |
with tf.variable_scope('Branch_3'): |
|
|
357 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
358 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
359 |
name='MaxPool3d_0a_3x3') |
|
|
360 |
branch_3 = Unit3D(output_channels=64, kernel_shape=[1, 1, 1], |
|
|
361 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
362 |
is_training=is_training) |
|
|
363 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
364 |
end_points[end_point] = net |
|
|
365 |
if self._final_endpoint == end_point: return net, end_points |
|
|
366 |
|
|
|
367 |
end_point = 'Mixed_4f' |
|
|
368 |
with tf.variable_scope(end_point): |
|
|
369 |
with tf.variable_scope('Branch_0'): |
|
|
370 |
branch_0 = Unit3D(output_channels=256, kernel_shape=[1, 1, 1], |
|
|
371 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
372 |
with tf.variable_scope('Branch_1'): |
|
|
373 |
branch_1 = Unit3D(output_channels=160, kernel_shape=[1, 1, 1], |
|
|
374 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
375 |
branch_1 = Unit3D(output_channels=320, kernel_shape=[3, 3, 3], |
|
|
376 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
377 |
is_training=is_training) |
|
|
378 |
with tf.variable_scope('Branch_2'): |
|
|
379 |
branch_2 = Unit3D(output_channels=32, kernel_shape=[1, 1, 1], |
|
|
380 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
381 |
branch_2 = Unit3D(output_channels=128, kernel_shape=[3, 3, 3], |
|
|
382 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
383 |
is_training=is_training) |
|
|
384 |
with tf.variable_scope('Branch_3'): |
|
|
385 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
386 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
387 |
name='MaxPool3d_0a_3x3') |
|
|
388 |
branch_3 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
389 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
390 |
is_training=is_training) |
|
|
391 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
392 |
end_points[end_point] = net |
|
|
393 |
if self._final_endpoint == end_point: return net, end_points |
|
|
394 |
|
|
|
395 |
end_point = 'MaxPool3d_5a_2x2' |
|
|
396 |
net = tf.nn.max_pool3d(net, ksize=[1, 2, 2, 2, 1], strides=[1, 2, 2, 2, 1], |
|
|
397 |
padding=snt.SAME, name=end_point) |
|
|
398 |
end_points[end_point] = net |
|
|
399 |
if self._final_endpoint == end_point: return net, end_points |
|
|
400 |
|
|
|
401 |
end_point = 'Mixed_5b' |
|
|
402 |
with tf.variable_scope(end_point): |
|
|
403 |
with tf.variable_scope('Branch_0'): |
|
|
404 |
branch_0 = Unit3D(output_channels=256, kernel_shape=[1, 1, 1], |
|
|
405 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
406 |
with tf.variable_scope('Branch_1'): |
|
|
407 |
branch_1 = Unit3D(output_channels=160, kernel_shape=[1, 1, 1], |
|
|
408 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
409 |
branch_1 = Unit3D(output_channels=320, kernel_shape=[3, 3, 3], |
|
|
410 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
411 |
is_training=is_training) |
|
|
412 |
with tf.variable_scope('Branch_2'): |
|
|
413 |
branch_2 = Unit3D(output_channels=32, kernel_shape=[1, 1, 1], |
|
|
414 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
415 |
branch_2 = Unit3D(output_channels=128, kernel_shape=[3, 3, 3], |
|
|
416 |
name='Conv3d_0a_3x3')(branch_2, |
|
|
417 |
is_training=is_training) |
|
|
418 |
with tf.variable_scope('Branch_3'): |
|
|
419 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
420 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
421 |
name='MaxPool3d_0a_3x3') |
|
|
422 |
branch_3 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
423 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
424 |
is_training=is_training) |
|
|
425 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
426 |
end_points[end_point] = net |
|
|
427 |
if self._final_endpoint == end_point: return net, end_points |
|
|
428 |
|
|
|
429 |
end_point = 'Mixed_5c' |
|
|
430 |
with tf.variable_scope(end_point): |
|
|
431 |
with tf.variable_scope('Branch_0'): |
|
|
432 |
branch_0 = Unit3D(output_channels=384, kernel_shape=[1, 1, 1], |
|
|
433 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
434 |
with tf.variable_scope('Branch_1'): |
|
|
435 |
branch_1 = Unit3D(output_channels=192, kernel_shape=[1, 1, 1], |
|
|
436 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
437 |
branch_1 = Unit3D(output_channels=384, kernel_shape=[3, 3, 3], |
|
|
438 |
name='Conv3d_0b_3x3')(branch_1, |
|
|
439 |
is_training=is_training) |
|
|
440 |
with tf.variable_scope('Branch_2'): |
|
|
441 |
branch_2 = Unit3D(output_channels=48, kernel_shape=[1, 1, 1], |
|
|
442 |
name='Conv3d_0a_1x1')(net, is_training=is_training) |
|
|
443 |
branch_2 = Unit3D(output_channels=128, kernel_shape=[3, 3, 3], |
|
|
444 |
name='Conv3d_0b_3x3')(branch_2, |
|
|
445 |
is_training=is_training) |
|
|
446 |
with tf.variable_scope('Branch_3'): |
|
|
447 |
branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], |
|
|
448 |
strides=[1, 1, 1, 1, 1], padding=snt.SAME, |
|
|
449 |
name='MaxPool3d_0a_3x3') |
|
|
450 |
branch_3 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], |
|
|
451 |
name='Conv3d_0b_1x1')(branch_3, |
|
|
452 |
is_training=is_training) |
|
|
453 |
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) |
|
|
454 |
end_points[end_point] = net |
|
|
455 |
if self._final_endpoint == end_point: return net, end_points |
|
|
456 |
|
|
|
457 |
end_point = 'Logits' |
|
|
458 |
with tf.variable_scope(end_point): |
|
|
459 |
net = tf.nn.avg_pool3d(net, ksize=[1, 2, 7, 7, 1], |
|
|
460 |
strides=[1, 1, 1, 1, 1], padding=snt.VALID) |
|
|
461 |
net = tf.nn.dropout(net, dropout_keep_prob) |
|
|
462 |
logits = Unit3D(output_channels=self._num_classes, |
|
|
463 |
kernel_shape=[1, 1, 1], |
|
|
464 |
activation_fn=None, |
|
|
465 |
use_batch_norm=False, |
|
|
466 |
use_bias=True, |
|
|
467 |
name='Conv3d_0c_1x1')(net, is_training=is_training) |
|
|
468 |
if self._spatial_squeeze: |
|
|
469 |
logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze') |
|
|
470 |
averaged_logits = tf.reduce_mean(logits, axis=1) |
|
|
471 |
end_points[end_point] = averaged_logits |
|
|
472 |
if self._final_endpoint == end_point: return averaged_logits, end_points |
|
|
473 |
|
|
|
474 |
end_point = 'Predictions' |
|
|
475 |
predictions = tf.nn.softmax(averaged_logits) |
|
|
476 |
end_points[end_point] = predictions |
|
|
477 |
return predictions, end_points |