Switch to unified view

a b/MultiscaleNetRefactored.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 17,
6
   "metadata": {
7
    "collapsed": true
8
   },
9
   "outputs": [],
10
   "source": [
11
    "import os\n",
12
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\""
13
   ]
14
  },
15
  {
16
   "cell_type": "markdown",
17
   "metadata": {},
18
   "source": [
19
    "This notebook is a refactored version of a Multiscale Convolutional Network solution for NeuroHack at Yandex"
20
   ]
21
  },
22
  {
23
   "cell_type": "code",
24
   "execution_count": 4,
25
   "metadata": {
26
    "collapsed": true
27
   },
28
   "outputs": [],
29
   "source": [
30
    "import random\n",
31
    "\n",
32
    "from tqdm import tqdm\n",
33
    "import pandas as pd\n",
34
    "import h5py\n",
35
    "import numpy as np\n",
36
    "from scipy.signal import resample"
37
   ]
38
  },
39
  {
40
   "cell_type": "code",
41
   "execution_count": 5,
42
   "metadata": {
43
    "collapsed": true
44
   },
45
   "outputs": [],
46
   "source": [
47
    "train_path = \"hackaton_data/train.h5\"\n",
48
    "test_path = \"hackaton_data/test.h5\"\n",
49
    "model_dump_path = \"hackaton_data/convnet-multiscale-true-01988\"\n",
50
    "\n",
51
    "slice_len = 1125"
52
   ]
53
  },
54
  {
55
   "cell_type": "markdown",
56
   "metadata": {},
57
   "source": [
58
    "Read train data into a dict of (subject_id, (X, y)) pairs"
59
   ]
60
  },
61
  {
62
   "cell_type": "code",
63
   "execution_count": 6,
64
   "metadata": {
65
    "collapsed": false
66
   },
67
   "outputs": [],
68
   "source": [
69
    "subjects = {}\n",
70
    "with h5py.File(train_path, \"r\") as data_file:\n",
71
    "    for subject, subject_data in data_file.items():\n",
72
    "        X = subject_data[\"data\"][:]\n",
73
    "        y = subject_data[\"labels\"][:][0]\n",
74
    "        subjects[subject] = (X, y)"
75
   ]
76
  },
77
  {
78
   "cell_type": "markdown",
79
   "metadata": {},
80
   "source": [
81
    "We need to split our data into train and local validation sets. For local validation we select random slices of slice_len. They may overlap with slices in train set, this split is not optimal."
82
   ]
83
  },
84
  {
85
   "cell_type": "code",
86
   "execution_count": 7,
87
   "metadata": {
88
    "collapsed": false
89
   },
90
   "outputs": [],
91
   "source": [
92
    "from sklearn.model_selection import train_test_split\n",
93
    "\n",
94
    "def train_val_split(X, y):\n",
95
    "    start_indices = list(range(0, len(X) - slice_len))\n",
96
    "    y = y[:len(start_indices)]\n",
97
    "    indices_train, indices_test, _, _ = train_test_split(start_indices, y)\n",
98
    "    return {\"train_ind\": indices_train, \"val_ind\": indices_test, \"X\": X, \"y\": y}"
99
   ]
100
  },
101
  {
102
   "cell_type": "code",
103
   "execution_count": 8,
104
   "metadata": {
105
    "collapsed": false
106
   },
107
   "outputs": [],
108
   "source": [
109
    "for subject in subjects:\n",
110
    "    X, y = subjects[subject][0], subjects[subject][1]\n",
111
    "    X = X.T\n",
112
    "    subjects[subject] = train_val_split(X, y)"
113
   ]
114
  },
115
  {
116
   "cell_type": "markdown",
117
   "metadata": {},
118
   "source": [
119
    "Define utility function to convert class labels to one-hot encoding"
120
   ]
121
  },
122
  {
123
   "cell_type": "code",
124
   "execution_count": 9,
125
   "metadata": {
126
    "collapsed": false
127
   },
128
   "outputs": [],
129
   "source": [
130
    "def to_onehot(y):\n",
131
    "    onehot = np.zeros(3)\n",
132
    "    onehot[y] = 1\n",
133
    "    return onehot"
134
   ]
135
  },
136
  {
137
   "cell_type": "markdown",
138
   "metadata": {},
139
   "source": [
140
    "Define a funtion that will select random subject and find a random subsequence of consistent class"
141
   ]
142
  },
143
  {
144
   "cell_type": "code",
145
   "execution_count": 10,
146
   "metadata": {
147
    "collapsed": true
148
   },
149
   "outputs": [],
150
   "source": [
151
    "def generate_slice(slice_len, val=False):\n",
152
    "    subject_data = random.choice(list(subjects.values()))\n",
153
    "    if val is True:\n",
154
    "        indices, y, X = subject_data[\"val_ind\"], subject_data[\"y\"], subject_data[\"X\"]\n",
155
    "    else:\n",
156
    "        indices, y, X = subject_data[\"train_ind\"], subject_data[\"y\"], subject_data[\"X\"]\n",
157
    "    \n",
158
    "    while True:\n",
159
    "        slice_start = random.choice(indices)\n",
160
    "        slice_end = slice_start + slice_len\n",
161
    "        slice_x = X[slice_start:slice_end]\n",
162
    "        slice_y = y[slice_start:slice_end]\n",
163
    "        \n",
164
    "        if len(set(slice_y)) == 1:\n",
165
    "            return slice_x, to_onehot(slice_y)"
166
   ]
167
  },
168
  {
169
   "cell_type": "markdown",
170
   "metadata": {},
171
   "source": [
172
    "Define a generator that will yield batches of resampled input time series and according class labels in infinite loop"
173
   ]
174
  },
175
  {
176
   "cell_type": "code",
177
   "execution_count": 11,
178
   "metadata": {
179
    "collapsed": true
180
   },
181
   "outputs": [],
182
   "source": [
183
    "def data_generator(batch_size, slice_len, val=False):\n",
184
    "    while True:\n",
185
    "        batch_x = []\n",
186
    "        batch_y = []\n",
187
    "        \n",
188
    "        for i in range(0, batch_size):\n",
189
    "            x, y = generate_slice(slice_len, val=val)\n",
190
    "            batch_x.append(x)\n",
191
    "            batch_y.append(y)\n",
192
    "            \n",
193
    "        y = np.array(batch_y)\n",
194
    "        \n",
195
    "        x_256 = np.array([resample(i, 256) for i in batch_x])\n",
196
    "        x_500 = np.array([resample(i, 500) for i in batch_x])\n",
197
    "        x = np.array([i for i in batch_x])\n",
198
    "        yield ([x_256, x_500, x], y)"
199
   ]
200
  },
201
  {
202
   "cell_type": "markdown",
203
   "metadata": {},
204
   "source": [
205
    "Lets build a neural network. Import all needed layers and keras utilities"
206
   ]
207
  },
208
  {
209
   "cell_type": "code",
210
   "execution_count": 12,
211
   "metadata": {
212
    "collapsed": false
213
   },
214
   "outputs": [
215
    {
216
     "name": "stderr",
217
     "output_type": "stream",
218
     "text": [
219
      "Using TensorFlow backend.\n"
220
     ]
221
    }
222
   ],
223
   "source": [
224
    "from keras.layers import Convolution1D, Dense, Dropout, Input, merge, GlobalMaxPooling1D\n",
225
    "from keras.models import Model, load_model\n",
226
    "from keras.optimizers import RMSprop"
227
   ]
228
  },
229
  {
230
   "cell_type": "markdown",
231
   "metadata": {},
232
   "source": [
233
    "This function builds a base neural net model that performs feature extraction"
234
   ]
235
  },
236
  {
237
   "cell_type": "code",
238
   "execution_count": 13,
239
   "metadata": {
240
    "collapsed": false
241
   },
242
   "outputs": [],
243
   "source": [
244
    "def get_base_model(input_len, fsize):\n",
245
    "    '''Base network to be shared (eq. to feature extraction).\n",
246
    "    '''\n",
247
    "    input_seq = Input(shape=(input_len, 24))\n",
248
    "    nb_filters = 150\n",
249
    "    convolved = Convolution1D(nb_filters, fsize, border_mode=\"same\", activation=\"tanh\")(input_seq)\n",
250
    "    processed = GlobalMaxPooling1D()(convolved)\n",
251
    "    compressed = Dense(150, activation=\"tanh\")(processed)\n",
252
    "    compressed = Dropout(0.3)(compressed)\n",
253
    "    compressed = Dense(150, activation=\"tanh\")(compressed)\n",
254
    "    model = Model(input=input_seq, output=compressed)            \n",
255
    "    return model"
256
   ]
257
  },
258
  {
259
   "cell_type": "markdown",
260
   "metadata": {},
261
   "source": [
262
    "Build and compile a graph with 3 inputs and one output"
263
   ]
264
  },
265
  {
266
   "cell_type": "code",
267
   "execution_count": 14,
268
   "metadata": {
269
    "collapsed": false
270
   },
271
   "outputs": [],
272
   "source": [
273
    "input256_seq = Input(shape=(256, 24))\n",
274
    "input500_seq = Input(shape=(500, 24))\n",
275
    "input1125_seq = Input(shape=(1125, 24))\n",
276
    "    \n",
277
    "base_network256 = get_base_model(256, 4)\n",
278
    "base_network500 = get_base_model(500, 7)\n",
279
    "base_network1125 = get_base_model(1125, 10)\n",
280
    "\n",
281
    "embedding_256 = base_network256(input256_seq)\n",
282
    "embedding_500 = base_network500(input500_seq)\n",
283
    "embedding_1125 = base_network1125(input1125_seq)\n",
284
    "    \n",
285
    "merged = merge([embedding_256, embedding_500, embedding_1125], mode=\"concat\")\n",
286
    "out = Dense(3, activation='softmax')(merged)\n",
287
    "    \n",
288
    "model = Model(input=[input256_seq, input500_seq, input1125_seq], output=out)\n",
289
    "    \n",
290
    "opt = RMSprop(lr=0.005, clipvalue=10**6)\n",
291
    "model.compile(loss=\"categorical_crossentropy\", optimizer=opt)"
292
   ]
293
  },
294
  {
295
   "cell_type": "markdown",
296
   "metadata": {},
297
   "source": [
298
    "This cell will train the model from scratch, lets load it from the model dump instead (take a look at next code cell)!"
299
   ]
300
  },
301
  {
302
   "cell_type": "code",
303
   "execution_count": 15,
304
   "metadata": {
305
    "collapsed": false
306
   },
307
   "outputs": [
308
    {
309
     "name": "stdout",
310
     "output_type": "stream",
311
     "text": [
312
      "Epoch 1/100000\n",
313
      "1100/4000 [=======>......................] - ETA: 83s - loss: 9.9279 \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
314
     ]
315
    },
316
    {
317
     "ename": "KeyboardInterrupt",
318
     "evalue": "",
319
     "output_type": "error",
320
     "traceback": [
321
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
322
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
323
      "\u001b[0;32m<ipython-input-15-7db8dac50f16>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m model.fit_generator(data_generator(batch_size=50, slice_len=slice_len), samples_per_epoch, nb_epoch, \n\u001b[1;32m      8\u001b[0m                     \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mearlyStopping\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnb_val_samples\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2000\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m                     validation_data=data_generator(batch_size=50, slice_len=slice_len, val=True))\n\u001b[0m",
324
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit_generator\u001b[0;34m(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)\u001b[0m\n\u001b[1;32m   1551\u001b[0m                     outs = self.train_on_batch(x, y,\n\u001b[1;32m   1552\u001b[0m                                                \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1553\u001b[0;31m                                                class_weight=class_weight)\n\u001b[0m\u001b[1;32m   1554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1555\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
325
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mtrain_on_batch\u001b[0;34m(self, x, y, sample_weight, class_weight)\u001b[0m\n\u001b[1;32m   1314\u001b[0m             \u001b[0mins\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1315\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_train_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1316\u001b[0;31m         \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1317\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1318\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
326
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   1898\u001b[0m         \u001b[0msession\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1899\u001b[0m         updated = session.run(self.outputs + [self.updates_op],\n\u001b[0;32m-> 1900\u001b[0;31m                               feed_dict=feed_dict)\n\u001b[0m\u001b[1;32m   1901\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mupdated\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1902\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
327
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    764\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    765\u001b[0m       result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 766\u001b[0;31m                          run_metadata_ptr)\n\u001b[0m\u001b[1;32m    767\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    768\u001b[0m         \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
328
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    962\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    963\u001b[0m       results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 964\u001b[0;31m                              feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m    965\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    966\u001b[0m       \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
329
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m   1012\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1013\u001b[0m       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1014\u001b[0;31m                            target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m   1015\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1016\u001b[0m       return self._do_call(_prun_fn, self._session, handle, feed_dict,\n",
330
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m   1019\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1020\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1021\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1022\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1023\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
331
      "\u001b[0;32m/home/akis/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m   1001\u001b[0m         return tf_session.TF_Run(session, options,\n\u001b[1;32m   1002\u001b[0m                                  \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1003\u001b[0;31m                                  status, run_metadata)\n\u001b[0m\u001b[1;32m   1004\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1005\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
332
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
333
     ]
334
    }
335
   ],
336
   "source": [
337
    "from keras.callbacks import EarlyStopping\n",
338
    "\n",
339
    "nb_epoch = 100000\n",
340
    "earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='auto')\n",
341
    "samples_per_epoch = 4000\n",
342
    "\n",
343
    "model.fit_generator(data_generator(batch_size=50, slice_len=slice_len), samples_per_epoch, nb_epoch, \n",
344
    "                    callbacks=[earlyStopping], verbose=1, nb_val_samples=2000,\n",
345
    "                    validation_data=data_generator(batch_size=50, slice_len=slice_len, val=True))"
346
   ]
347
  },
348
  {
349
   "cell_type": "code",
350
   "execution_count": null,
351
   "metadata": {
352
    "collapsed": true
353
   },
354
   "outputs": [],
355
   "source": [
356
    "model = load_model(\"hackaton_data/convnet-multiscale-true-01988\")"
357
   ]
358
  },
359
  {
360
   "cell_type": "code",
361
   "execution_count": null,
362
   "metadata": {
363
    "collapsed": false
364
   },
365
   "outputs": [],
366
   "source": [
367
    "model.summary()"
368
   ]
369
  },
370
  {
371
   "cell_type": "markdown",
372
   "metadata": {},
373
   "source": [
374
    "Read test data into a nested structure with multiple chunks for each subject"
375
   ]
376
  },
377
  {
378
   "cell_type": "code",
379
   "execution_count": null,
380
   "metadata": {
381
    "collapsed": false
382
   },
383
   "outputs": [],
384
   "source": [
385
    "with h5py.File(\"hackaton_data/test.h5\", \"r\") as data_file:\n",
386
    "    test = {}\n",
387
    "    for subject, subject_data in data_file.items():\n",
388
    "        test[subject] = {}\n",
389
    "        for chunk_id, chunk in data_file[subject].items():\n",
390
    "            test[subject][chunk_id] = chunk[:]"
391
   ]
392
  },
393
  {
394
   "cell_type": "code",
395
   "execution_count": null,
396
   "metadata": {
397
    "collapsed": false
398
   },
399
   "outputs": [],
400
   "source": [
401
    "test['subject_0']['chunk_0'].shape"
402
   ]
403
  },
404
  {
405
   "cell_type": "code",
406
   "execution_count": null,
407
   "metadata": {
408
    "collapsed": true
409
   },
410
   "outputs": [],
411
   "source": [
412
    "# utility function that performs resampling of input timeseries \n",
413
    "def multiscale(chunk):\n",
414
    "    resampled_256 = resample(chunk, 256)\n",
415
    "    resampled_500 = resample(chunk, 500)\n",
416
    "    return [resampled_256, resampled_500, chunk]"
417
   ]
418
  },
419
  {
420
   "cell_type": "markdown",
421
   "metadata": {},
422
   "source": [
423
    "Make prediction for test data"
424
   ]
425
  },
426
  {
427
   "cell_type": "code",
428
   "execution_count": null,
429
   "metadata": {
430
    "collapsed": false
431
   },
432
   "outputs": [],
433
   "source": [
434
    "df = []\n",
435
    "for subj in test:\n",
436
    "    for chunk in tqdm(test[subj]):\n",
437
    "        data = {}\n",
438
    "        data[\"subject_id\"] = int(subj.split(\"_\")[-1])\n",
439
    "        data[\"chunk_id\"] = int(chunk.split(\"_\")[-1])\n",
440
    "        arr = test[subj][chunk].T\n",
441
    "        preds = model.predict([np.array([i]) for i in multiscale(arr)])[0]\n",
442
    "        data[\"class_0_score\"] = preds[0]\n",
443
    "        data[\"class_1_score\"] = preds[1]\n",
444
    "        data[\"class_2_score\"] = preds[2]\n",
445
    "        for i in range(0, 1125):\n",
446
    "            data[\"tick\"] = i\n",
447
    "            df.append(data.copy())\n",
448
    "df = pd.DataFrame(df)\n",
449
    "df = df[[\"subject_id\", \"chunk_id\", \"tick\", \"class_0_score\",\n",
450
    "         \"class_1_score\",\"class_2_score\"]]"
451
   ]
452
  },
453
  {
454
   "cell_type": "code",
455
   "execution_count": null,
456
   "metadata": {
457
    "collapsed": false
458
   },
459
   "outputs": [],
460
   "source": [
461
    "df.head()"
462
   ]
463
  },
464
  {
465
   "cell_type": "code",
466
   "execution_count": null,
467
   "metadata": {
468
    "collapsed": true
469
   },
470
   "outputs": [],
471
   "source": [
472
    "# save submission to .csv\n",
473
    "df.to_csv(\"submission.csv\")"
474
   ]
475
  }
476
 ],
477
 "metadata": {
478
  "kernelspec": {
479
   "display_name": "Python 3",
480
   "language": "python",
481
   "name": "python3"
482
  },
483
  "language_info": {
484
   "codemirror_mode": {
485
    "name": "ipython",
486
    "version": 3
487
   },
488
   "file_extension": ".py",
489
   "mimetype": "text/x-python",
490
   "name": "python",
491
   "nbconvert_exporter": "python",
492
   "pygments_lexer": "ipython3",
493
   "version": "3.5.2"
494
  }
495
 },
496
 "nbformat": 4,
497
 "nbformat_minor": 2
498
}