--- a +++ b/Random/Random 5.ipynb @@ -0,0 +1,613 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# train 1(14=0.01) test 14 (.99)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'keras'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-1-068283ec6ed0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackend\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mcfg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mConfigProto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mcfg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgpu_options\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mallow_growth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'keras'" + ] + } + ], + "source": [ + "import keras.backend as K\n", + "cfg = K.tf.ConfigProto()\n", + "cfg.gpu_options.allow_growth = True\n", + "K.set_session(K.tf.Session(config=cfg))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.19 s, sys: 617 ms, total: 1.81 s\n", + "Wall time: 8.44 s\n" + ] + } + ], + "source": [ + "%%time\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.ensemble import ExtraTreesClassifier\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1min 23s, sys: 13.8 s, total: 1min 37s\n", + "Wall time: 2min 25s\n" + ] + } + ], + "source": [ + "%%time\n", + "df = pd.read_csv(\"master_data.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['target', 'subject', 'chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z',\n", + " 'chest_ECG', 'chest_EMG', 'chest_EDA', 'chest_Temp', 'chest_Resp'],\n", + " dtype='object')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 666 ms, sys: 1.32 s, total: 1.99 s\n", + "Wall time: 2.03 s\n" + ] + } + ], + "source": [ + "%%time\n", + "df=df[['chest_ACC_x','chest_ACC_y','chest_ACC_z','target','subject']]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 516 ms, sys: 57 µs, total: 516 ms\n", + "Wall time: 514 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "df['subject'].unique()\n", + "list_of_subjects=list(df['subject'].unique())\n", + "list_of_subjects.sort()\n", + "list_of_subjects" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 85 µs, sys: 15 µs, total: 100 µs\n", + "Wall time: 103 µs\n" + ] + }, + { + "data": { + "text/plain": [ + "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "features=df.columns.tolist()\n", + "to_remove = [fea for fea in features if \"target\" in fea or \"subject\" in fea]\n", + "feature = [x for x in features if x not in to_remove]\n", + "feature" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "subject_2\n", + "3\n", + "subject_3\n", + "4\n", + "subject_4\n", + "5\n", + "subject_5\n", + "6\n", + "subject_6\n", + "7\n", + "subject_7\n", + "8\n", + "subject_8\n", + "9\n", + "subject_9\n", + "10\n", + "subject_10\n", + "11\n", + "subject_11\n", + "13\n", + "subject_13\n", + "14\n", + "subject_14\n", + "15\n", + "subject_15\n", + "16\n", + "subject_16\n", + "17\n", + "subject_17\n", + "CPU times: user 15.5 s, sys: 3.94 s, total: 19.4 s\n", + "Wall time: 19.5 s\n" + ] + } + ], + "source": [ + "%%time\n", + "for i in list_of_subjects:\n", + " print(i)\n", + " globals()['subject_%s' % i]=df[df['subject'] == i]\n", + " globals()['subject_%s_train' % i],globals()['subject_%s_test' % i]=train_test_split(globals()['subject_%s' % i], test_size=0.3)\n", + " print('subject_'+str(i))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4165000, 5)\n", + "(2915500, 5)\n", + "(1249500, 5)\n" + ] + } + ], + "source": [ + "print(subject_2.shape)\n", + "print(subject_2_train.shape)\n", + "print(subject_2_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 571 ms, sys: 1.89 s, total: 2.46 s\n", + "Wall time: 2.46 s\n" + ] + } + ], + "source": [ + "%%time\n", + "train=pd.concat([subject_2_train,subject_3_train,subject_4_train,subject_5_train,subject_6_train,subject_7_train,subject_8_train,subject_9_train,subject_10_train,subject_11_train,subject_13_train,subject_14_train,subject_15_train,subject_16_train,subject_17_train])\n", + "test=pd.concat([subject_2_test,subject_3_test,subject_4_test,subject_5_test,subject_6_test,subject_7_test,subject_8_test,subject_9_test,subject_10_test,subject_11_test,subject_13_test,subject_14_test,subject_15_test,subject_16_test,subject_17_test])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(41387848, 5)\n", + "(17737652, 5)\n" + ] + } + ], + "source": [ + "print(train.shape)\n", + "print(test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index 331102784\n", + "chest_ACC_x 331102784\n", + "chest_ACC_y 331102784\n", + "chest_ACC_z 331102784\n", + "target 331102784\n", + "subject 331102784\n", + "dtype: int64\n", + "Index 141901216\n", + "chest_ACC_x 141901216\n", + "chest_ACC_y 141901216\n", + "chest_ACC_z 141901216\n", + "target 141901216\n", + "subject 141901216\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(train.memory_usage(index=True, deep=False))\n", + "print(test.memory_usage(index=True, deep=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "Int64Index: 41387848 entries, 28163906 to 51287127\n", + "Data columns (total 5 columns):\n", + "chest_ACC_x float64\n", + "chest_ACC_y float64\n", + "chest_ACC_z float64\n", + "target int64\n", + "subject int64\n", + "dtypes: float64(3), int64(2)\n", + "memory usage: 1.9 GB\n", + "None\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "Int64Index: 17737652 entries, 27073284 to 48673791\n", + "Data columns (total 5 columns):\n", + "chest_ACC_x float64\n", + "chest_ACC_y float64\n", + "chest_ACC_z float64\n", + "target int64\n", + "subject int64\n", + "dtypes: float64(3), int64(2)\n", + "memory usage: 812.0 MB\n", + "None\n" + ] + } + ], + "source": [ + "print(train.info(memory_usage='deep'))\n", + "print(test.info(memory_usage='deep'))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "features=['chest_ACC_x','chest_ACC_y','chest_ACC_z']\n", + "target=['target']" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']\n", + "CPU times: user 82 µs, sys: 17 µs, total: 99 µs\n", + "Wall time: 103 µs\n" + ] + } + ], + "source": [ + "%%time\n", + "features=feature\n", + "print(features)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['target']\n", + "CPU times: user 149 µs, sys: 31 µs, total: 180 µs\n", + "Wall time: 135 µs\n" + ] + } + ], + "source": [ + "%%time\n", + "target=['target']\n", + "print(target)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "13\n", + "14\n", + "15\n", + "16\n", + "17\n", + "CPU times: user 15.9 ms, sys: 290 ms, total: 306 ms\n", + "Wall time: 301 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "for i in list_of_subjects[0:]:\n", + " print(i)\n", + " del(globals()['subject_%s' % i])\n", + " del(globals()['subject_%s_train' % i])\n", + " del(globals()['subject_%s_test' % i])\n", + "del df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sf/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " \n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "building tree 1 of 20\n", + "building tree 2 of 20\n", + "building tree 3 of 20\n", + "building tree 4 of 20\n", + "building tree 5 of 20\n", + "building tree 6 of 20\n", + "building tree 7 of 20\n", + "building tree 8 of 20\n", + "building tree 9 of 20building tree 10 of 20\n", + "\n" + ] + } + ], + "source": [ + "%%time\n", + "et = ExtraTreesClassifier(n_estimators=20, n_jobs=10, verbose=2)\n", + "et.fit(train[features],train[target])\n", + "y_pred=et.predict(test[features])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.86 0.87 0.87 8295722\n", + " 1 0.85 0.83 0.84 3698748\n", + " 2 0.74 0.73 0.74 2092335\n", + " 3 0.82 0.80 0.81 1170648\n", + " 4 0.88 0.88 0.88 2480199\n", + "\n", + " accuracy 0.84 17737652\n", + " macro avg 0.83 0.82 0.83 17737652\n", + "weighted avg 0.84 0.84 0.84 17737652\n", + "\n", + "CPU times: user 34.9 s, sys: 6.72 s, total: 41.6 s\n", + "Wall time: 41.6 s\n" + ] + } + ], + "source": [ + "%%time\n", + "print(classification_report(test[target],y_pred ))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del train\n", + "del test" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Interactive namespace is empty.\n" + ] + } + ], + "source": [ + "who" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11\n", + "14\n", + "8\n", + "15\n", + "9\n", + "10\n", + "16\n", + "4\n", + "13\n", + "3\n", + "17\n", + "5\n", + "7\n", + "CPU times: user 19.1 ms, sys: 306 ms, total: 325 ms\n", + "Wall time: 318 ms\n" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}