a b/Acceleration Features, Train = 10 users Test = 5 users, Random Analysis.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "# train 1(14=0.01) test 14 (.99)"
10
   ]
11
  },
12
  {
13
   "cell_type": "code",
14
   "execution_count": 1,
15
   "metadata": {},
16
   "outputs": [
17
    {
18
     "name": "stdout",
19
     "output_type": "stream",
20
     "text": [
21
      "CPU times: user 1.18 s, sys: 577 ms, total: 1.76 s\n",
22
      "Wall time: 9.18 s\n"
23
     ]
24
    }
25
   ],
26
   "source": [
27
    "%%time\n",
28
    "import pandas as pd\n",
29
    "import numpy as np\n",
30
    "from sklearn.ensemble import ExtraTreesClassifier\n",
31
    "from sklearn.metrics import classification_report\n",
32
    "from sklearn.model_selection import train_test_split"
33
   ]
34
  },
35
  {
36
   "cell_type": "code",
37
   "execution_count": 2,
38
   "metadata": {},
39
   "outputs": [
40
    {
41
     "name": "stdout",
42
     "output_type": "stream",
43
     "text": [
44
      "CPU times: user 1min 25s, sys: 13.7 s, total: 1min 38s\n",
45
      "Wall time: 2min 26s\n"
46
     ]
47
    }
48
   ],
49
   "source": [
50
    "%%time\n",
51
    "df = pd.read_csv(\"master_data.csv\")"
52
   ]
53
  },
54
  {
55
   "cell_type": "code",
56
   "execution_count": 3,
57
   "metadata": {},
58
   "outputs": [
59
    {
60
     "data": {
61
      "text/plain": [
62
       "Index(['target', 'subject', 'chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z',\n",
63
       "       'chest_ECG', 'chest_EMG', 'chest_EDA', 'chest_Temp', 'chest_Resp'],\n",
64
       "      dtype='object')"
65
      ]
66
     },
67
     "execution_count": 3,
68
     "metadata": {},
69
     "output_type": "execute_result"
70
    }
71
   ],
72
   "source": [
73
    "df.columns"
74
   ]
75
  },
76
  {
77
   "cell_type": "code",
78
   "execution_count": 4,
79
   "metadata": {},
80
   "outputs": [
81
    {
82
     "name": "stdout",
83
     "output_type": "stream",
84
     "text": [
85
      "CPU times: user 533 ms, sys: 124 ms, total: 657 ms\n",
86
      "Wall time: 698 ms\n"
87
     ]
88
    }
89
   ],
90
   "source": [
91
    "%%time\n",
92
    "df=df[['chest_ACC_x','chest_ACC_y','chest_ACC_z','target','subject']]"
93
   ]
94
  },
95
  {
96
   "cell_type": "code",
97
   "execution_count": 5,
98
   "metadata": {},
99
   "outputs": [
100
    {
101
     "name": "stdout",
102
     "output_type": "stream",
103
     "text": [
104
      "CPU times: user 510 ms, sys: 43 µs, total: 510 ms\n",
105
      "Wall time: 508 ms\n"
106
     ]
107
    },
108
    {
109
     "data": {
110
      "text/plain": [
111
       "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]"
112
      ]
113
     },
114
     "execution_count": 5,
115
     "metadata": {},
116
     "output_type": "execute_result"
117
    }
118
   ],
119
   "source": [
120
    "%%time\n",
121
    "df['subject'].unique()\n",
122
    "list_of_subjects=list(df['subject'].unique())\n",
123
    "list_of_subjects.sort()\n",
124
    "list_of_subjects"
125
   ]
126
  },
127
  {
128
   "cell_type": "code",
129
   "execution_count": 6,
130
   "metadata": {},
131
   "outputs": [
132
    {
133
     "name": "stdout",
134
     "output_type": "stream",
135
     "text": [
136
      "CPU times: user 107 µs, sys: 18 µs, total: 125 µs\n",
137
      "Wall time: 129 µs\n"
138
     ]
139
    },
140
    {
141
     "data": {
142
      "text/plain": [
143
       "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']"
144
      ]
145
     },
146
     "execution_count": 6,
147
     "metadata": {},
148
     "output_type": "execute_result"
149
    }
150
   ],
151
   "source": [
152
    "%%time\n",
153
    "features=df.columns.tolist()\n",
154
    "to_remove = [fea for fea in features if \"target\"  in fea or \"subject\"  in fea]\n",
155
    "feature = [x for x in features if x not in to_remove]\n",
156
    "feature"
157
   ]
158
  },
159
  {
160
   "cell_type": "code",
161
   "execution_count": 7,
162
   "metadata": {},
163
   "outputs": [
164
    {
165
     "name": "stdout",
166
     "output_type": "stream",
167
     "text": [
168
      "2\n",
169
      "subject_2\n",
170
      "3\n",
171
      "subject_3\n",
172
      "4\n",
173
      "subject_4\n",
174
      "5\n",
175
      "subject_5\n",
176
      "6\n",
177
      "subject_6\n",
178
      "7\n",
179
      "subject_7\n",
180
      "8\n",
181
      "subject_8\n",
182
      "9\n",
183
      "subject_9\n",
184
      "10\n",
185
      "subject_10\n",
186
      "11\n",
187
      "subject_11\n",
188
      "13\n",
189
      "subject_13\n",
190
      "14\n",
191
      "subject_14\n",
192
      "15\n",
193
      "subject_15\n",
194
      "16\n",
195
      "subject_16\n",
196
      "17\n",
197
      "subject_17\n",
198
      "CPU times: user 16.3 s, sys: 2.31 s, total: 18.6 s\n",
199
      "Wall time: 18.7 s\n"
200
     ]
201
    }
202
   ],
203
   "source": [
204
    "%%time\n",
205
    "for i in list_of_subjects:\n",
206
    "    print(i)\n",
207
    "    globals()['subject_%s' % i]=df[df['subject'] == i]\n",
208
    "    globals()['subject_%s_train' % i],globals()['subject_%s_test' % i]=train_test_split(globals()['subject_%s' % i], test_size=0.3)\n",
209
    "    print('subject_'+str(i))"
210
   ]
211
  },
212
  {
213
   "cell_type": "code",
214
   "execution_count": 8,
215
   "metadata": {},
216
   "outputs": [
217
    {
218
     "name": "stdout",
219
     "output_type": "stream",
220
     "text": [
221
      "(4165000, 5)\n",
222
      "(2915500, 5)\n",
223
      "(1249500, 5)\n"
224
     ]
225
    }
226
   ],
227
   "source": [
228
    "print(subject_2.shape)\n",
229
    "print(subject_2_train.shape)\n",
230
    "print(subject_2_test.shape)"
231
   ]
232
  },
233
  {
234
   "cell_type": "code",
235
   "execution_count": 8,
236
   "metadata": {},
237
   "outputs": [
238
    {
239
     "name": "stdout",
240
     "output_type": "stream",
241
     "text": [
242
      "CPU times: user 614 ms, sys: 1.47 s, total: 2.08 s\n",
243
      "Wall time: 2.08 s\n"
244
     ]
245
    }
246
   ],
247
   "source": [
248
    "%%time\n",
249
    "train=pd.concat([subject_2,subject_3,subject_4,subject_5,subject_6,subject_7,subject_8,subject_9,subject_10,subject_11])\n",
250
    "test=pd.concat([subject_13,subject_14,subject_15,subject_16,subject_17])"
251
   ]
252
  },
253
  {
254
   "cell_type": "code",
255
   "execution_count": 10,
256
   "metadata": {},
257
   "outputs": [
258
    {
259
     "name": "stdout",
260
     "output_type": "stream",
261
     "text": [
262
      "(40143599, 5)\n",
263
      "(18981901, 5)\n"
264
     ]
265
    }
266
   ],
267
   "source": [
268
    "print(train.shape)\n",
269
    "print(test.shape)"
270
   ]
271
  },
272
  {
273
   "cell_type": "code",
274
   "execution_count": 11,
275
   "metadata": {},
276
   "outputs": [
277
    {
278
     "name": "stdout",
279
     "output_type": "stream",
280
     "text": [
281
      "Index          321148792\n",
282
      "chest_ACC_x    321148792\n",
283
      "chest_ACC_y    321148792\n",
284
      "chest_ACC_z    321148792\n",
285
      "target         321148792\n",
286
      "subject        321148792\n",
287
      "dtype: int64\n",
288
      "Index          151855208\n",
289
      "chest_ACC_x    151855208\n",
290
      "chest_ACC_y    151855208\n",
291
      "chest_ACC_z    151855208\n",
292
      "target         151855208\n",
293
      "subject        151855208\n",
294
      "dtype: int64\n"
295
     ]
296
    }
297
   ],
298
   "source": [
299
    "print(train.memory_usage(index=True, deep=False))\n",
300
    "print(test.memory_usage(index=True, deep=False))"
301
   ]
302
  },
303
  {
304
   "cell_type": "code",
305
   "execution_count": 12,
306
   "metadata": {},
307
   "outputs": [
308
    {
309
     "name": "stdout",
310
     "output_type": "stream",
311
     "text": [
312
      "<class 'pandas.core.frame.DataFrame'>\n",
313
      "Int64Index: 40143599 entries, 26710599 to 8382499\n",
314
      "Data columns (total 5 columns):\n",
315
      "chest_ACC_x    float64\n",
316
      "chest_ACC_y    float64\n",
317
      "chest_ACC_z    float64\n",
318
      "target         int64\n",
319
      "subject        int64\n",
320
      "dtypes: float64(3), int64(2)\n",
321
      "memory usage: 1.8 GB\n",
322
      "None\n",
323
      "<class 'pandas.core.frame.DataFrame'>\n",
324
      "Int64Index: 18981901 entries, 39094999 to 51311399\n",
325
      "Data columns (total 5 columns):\n",
326
      "chest_ACC_x    float64\n",
327
      "chest_ACC_y    float64\n",
328
      "chest_ACC_z    float64\n",
329
      "target         int64\n",
330
      "subject        int64\n",
331
      "dtypes: float64(3), int64(2)\n",
332
      "memory usage: 868.9 MB\n",
333
      "None\n"
334
     ]
335
    }
336
   ],
337
   "source": [
338
    "print(train.info(memory_usage='deep'))\n",
339
    "print(test.info(memory_usage='deep'))"
340
   ]
341
  },
342
  {
343
   "cell_type": "code",
344
   "execution_count": 10,
345
   "metadata": {},
346
   "outputs": [
347
    {
348
     "data": {
349
      "text/plain": [
350
       "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']"
351
      ]
352
     },
353
     "execution_count": 10,
354
     "metadata": {},
355
     "output_type": "execute_result"
356
    }
357
   ],
358
   "source": [
359
    "feature"
360
   ]
361
  },
362
  {
363
   "cell_type": "code",
364
   "execution_count": 9,
365
   "metadata": {},
366
   "outputs": [],
367
   "source": [
368
    "features=['chest_ACC_x','chest_ACC_y','chest_ACC_z']\n",
369
    "target=['target']"
370
   ]
371
  },
372
  {
373
   "cell_type": "code",
374
   "execution_count": 11,
375
   "metadata": {},
376
   "outputs": [
377
    {
378
     "name": "stdout",
379
     "output_type": "stream",
380
     "text": [
381
      "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']\n",
382
      "CPU times: user 98 µs, sys: 17 µs, total: 115 µs\n",
383
      "Wall time: 120 µs\n"
384
     ]
385
    }
386
   ],
387
   "source": [
388
    "%%time\n",
389
    "features=feature\n",
390
    "print(features)"
391
   ]
392
  },
393
  {
394
   "cell_type": "code",
395
   "execution_count": 12,
396
   "metadata": {},
397
   "outputs": [
398
    {
399
     "name": "stdout",
400
     "output_type": "stream",
401
     "text": [
402
      "['target']\n",
403
      "CPU times: user 99 µs, sys: 0 ns, total: 99 µs\n",
404
      "Wall time: 103 µs\n"
405
     ]
406
    }
407
   ],
408
   "source": [
409
    "%%time\n",
410
    "target=['target']\n",
411
    "print(target)"
412
   ]
413
  },
414
  {
415
   "cell_type": "code",
416
   "execution_count": 13,
417
   "metadata": {},
418
   "outputs": [
419
    {
420
     "name": "stdout",
421
     "output_type": "stream",
422
     "text": [
423
      "2\n",
424
      "3\n",
425
      "4\n",
426
      "5\n",
427
      "6\n",
428
      "7\n",
429
      "8\n",
430
      "9\n",
431
      "10\n",
432
      "11\n",
433
      "13\n",
434
      "14\n",
435
      "15\n",
436
      "16\n",
437
      "17\n",
438
      "CPU times: user 870 µs, sys: 127 ms, total: 127 ms\n",
439
      "Wall time: 122 ms\n"
440
     ]
441
    }
442
   ],
443
   "source": [
444
    "%%time\n",
445
    "for i in list_of_subjects[0:]:\n",
446
    "    print(i)\n",
447
    "    del(globals()['subject_%s' % i])\n",
448
    "    del(globals()['subject_%s_train' % i])\n",
449
    "    del(globals()['subject_%s_test' % i])\n",
450
    "del df"
451
   ]
452
  },
453
  {
454
   "cell_type": "code",
455
   "execution_count": 14,
456
   "metadata": {},
457
   "outputs": [
458
    {
459
     "name": "stdout",
460
     "output_type": "stream",
461
     "text": [
462
      "ExtraTreesClassifier\t classification_report\t feature\t features\t i\t list_of_subjects\t np\t pd\t target\t \n",
463
      "test\t to_remove\t train\t train_test_split\t \n"
464
     ]
465
    }
466
   ],
467
   "source": [
468
    "who"
469
   ]
470
  },
471
  {
472
   "cell_type": "code",
473
   "execution_count": 19,
474
   "metadata": {},
475
   "outputs": [
476
    {
477
     "data": {
478
      "text/plain": [
479
       "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]"
480
      ]
481
     },
482
     "execution_count": 19,
483
     "metadata": {},
484
     "output_type": "execute_result"
485
    }
486
   ],
487
   "source": [
488
    "list_of_subjects"
489
   ]
490
  },
491
  {
492
   "cell_type": "code",
493
   "execution_count": null,
494
   "metadata": {},
495
   "outputs": [
496
    {
497
     "name": "stderr",
498
     "output_type": "stream",
499
     "text": [
500
      "/home/sf/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
501
      "  \n",
502
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n"
503
     ]
504
    },
505
    {
506
     "name": "stdout",
507
     "output_type": "stream",
508
     "text": [
509
      "building tree 1 of 50\n",
510
      "building tree 2 of 50\n",
511
      "building tree 3 of 50building tree 4 of 50\n",
512
      "building tree 5 of 50\n",
513
      "building tree 6 of 50\n",
514
      "\n",
515
      "building tree 7 of 50\n",
516
      "building tree 8 of 50\n",
517
      "building tree 9 of 50\n",
518
      "building tree 10 of 50\n"
519
     ]
520
    }
521
   ],
522
   "source": [
523
    "%%time\n",
524
    "et = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=2)\n",
525
    "et.fit(train[features],train[target])\n",
526
    "y_pred=et.predict(test[features])"
527
   ]
528
  },
529
  {
530
   "cell_type": "code",
531
   "execution_count": 17,
532
   "metadata": {},
533
   "outputs": [
534
    {
535
     "name": "stdout",
536
     "output_type": "stream",
537
     "text": [
538
      "              precision    recall  f1-score   support\n",
539
      "\n",
540
      "           0       0.86      0.87      0.87   8295722\n",
541
      "           1       0.85      0.83      0.84   3698748\n",
542
      "           2       0.74      0.73      0.74   2092335\n",
543
      "           3       0.82      0.80      0.81   1170648\n",
544
      "           4       0.88      0.88      0.88   2480199\n",
545
      "\n",
546
      "    accuracy                           0.84  17737652\n",
547
      "   macro avg       0.83      0.82      0.83  17737652\n",
548
      "weighted avg       0.84      0.84      0.84  17737652\n",
549
      "\n",
550
      "CPU times: user 34.9 s, sys: 6.72 s, total: 41.6 s\n",
551
      "Wall time: 41.6 s\n"
552
     ]
553
    }
554
   ],
555
   "source": [
556
    "%%time\n",
557
    "print(classification_report(test[target],y_pred ))"
558
   ]
559
  },
560
  {
561
   "cell_type": "code",
562
   "execution_count": null,
563
   "metadata": {},
564
   "outputs": [],
565
   "source": [
566
    "del train\n",
567
    "del test"
568
   ]
569
  },
570
  {
571
   "cell_type": "code",
572
   "execution_count": null,
573
   "metadata": {},
574
   "outputs": [],
575
   "source": []
576
  },
577
  {
578
   "cell_type": "code",
579
   "execution_count": 16,
580
   "metadata": {
581
    "collapsed": true
582
   },
583
   "outputs": [
584
    {
585
     "name": "stdout",
586
     "output_type": "stream",
587
     "text": [
588
      "11\n",
589
      "14\n",
590
      "8\n",
591
      "15\n",
592
      "9\n",
593
      "10\n",
594
      "16\n",
595
      "4\n",
596
      "13\n",
597
      "3\n",
598
      "17\n",
599
      "5\n",
600
      "7\n",
601
      "CPU times: user 19.1 ms, sys: 306 ms, total: 325 ms\n",
602
      "Wall time: 318 ms\n"
603
     ]
604
    }
605
   ],
606
   "source": []
607
  },
608
  {
609
   "cell_type": "code",
610
   "execution_count": null,
611
   "metadata": {},
612
   "outputs": [],
613
   "source": []
614
  }
615
 ],
616
 "metadata": {
617
  "kernelspec": {
618
   "display_name": "Python 3",
619
   "language": "python",
620
   "name": "python3"
621
  },
622
  "language_info": {
623
   "codemirror_mode": {
624
    "name": "ipython",
625
    "version": 3
626
   },
627
   "file_extension": ".py",
628
   "mimetype": "text/x-python",
629
   "name": "python",
630
   "nbconvert_exporter": "python",
631
   "pygments_lexer": "ipython3",
632
   "version": "3.6.8"
633
  }
634
 },
635
 "nbformat": 4,
636
 "nbformat_minor": 2
637
}