1314 lines (1313 with data), 45.6 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"\"\"\"\n",
"Created on Sun Aug 16 17:10:53 2020\n",
"\n",
"@author: wanxiang.shen@u.nus.edu\n",
"\"\"\"\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from sklearn.model_selection import KFold, StratifiedKFold\n",
"from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_auc_score\n",
"from sklearn.metrics import auc as calculate_auc\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"from aggmap import AggMap, AggMapNet, loadmap\n",
"\n",
"np.random.seed(666) #just for reaptable results\n",
"\n",
"def prc_auc_score(y_true, y_score):\n",
" precision, recall, threshold = precision_recall_curve(y_true, y_score) #PRC_AUC\n",
" auc = calculate_auc(recall, precision)\n",
" return auc"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2021-11-16 12:58:04,252 - \u001b[32mINFO\u001b[0m - [bidd-aggmap]\u001b[0m - Calculating distance ...\u001b[0m\n",
"2021-11-16 12:58:04,253 - \u001b[32mINFO\u001b[0m - [bidd-aggmap]\u001b[0m - the number of process is 16\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 3828/3828 [00:00<00:00, 8574.06it/s] \n",
"100%|##########| 3828/3828 [00:00<00:00, 599029.80it/s]\n",
"100%|##########| 88/88 [00:00<00:00, 1125.91it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2021-11-16 12:58:05,243 - \u001b[32mINFO\u001b[0m - [bidd-aggmap]\u001b[0m - applying hierarchical clustering to obtain group information ...\u001b[0m\n",
"2021-11-16 12:58:05,395 - \u001b[32mINFO\u001b[0m - [bidd-aggmap]\u001b[0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)\u001b[0m\n",
"2021-11-16 12:58:05,407 - \u001b[32mINFO\u001b[0m - [bidd-aggmap]\u001b[0m - Finished\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"['./saved_model/aggmap.mp']"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfx = pd.read_excel('./COVID19.xlsx', sheet_name='data')\n",
"dfy = pd.read_excel('./COVID19.xlsx', sheet_name='sample_info')\n",
"\n",
"dfx = dfx[dfx.columns[1:]]\n",
"cols = [\"p-%s\" % c for c in dfx.columns]\n",
"dfx.columns = cols\n",
"\n",
"X = dfx.values\n",
"Y = pd.get_dummies(dfy['class']).values.astype(float)\n",
"\n",
"mp = AggMap(dfx, metric = 'correlation')\n",
"mp.fit(cluster_channels = 10, verbose = 0)\n",
"mp.save('./saved_model/aggmap.mp')\n",
"#mp = loadmap('./aggmap.mp')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4-fold cross validation and repeat 5 times "
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:02<00:00, 95.68it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 960.49it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1000.26it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 695.32it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1041.22it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 597.11it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1152.91it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 650.94it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1149.39it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 756.82it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1087.79it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 653.68it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1009.65it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 738.31it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1013.17it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 727.41it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1092.84it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 657.90it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1019.78it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 646.78it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1019.86it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 740.29it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 992.44it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 686.55it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1014.65it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 671.59it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 976.02it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 677.21it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1018.80it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 743.82it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1104.03it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 736.50it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 981.88it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 691.56it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 271/271 [00:00<00:00, 1025.32it/s]\n",
"100%|##########| 91/91 [00:00<00:00, 757.39it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (271, 10, 9, 10), (91, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1013.61it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 688.75it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|##########| 272/272 [00:00<00:00, 1075.89it/s]\n",
"100%|##########| 90/90 [00:00<00:00, 741.75it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" input train and test X shape is (272, 10, 9, 10), (90, 10, 9, 10) \n",
"MultiClassEstimator(batch_norm=False, batch_size=4, conv1_kernel_size=11,\n",
" dense_avf='relu', dense_layers=[128], dropout=0.0,\n",
" epochs=50, gpuid='0', last_avf='softmax',\n",
" loss='categorical_crossentropy', lr=0.0001, metric='ACC',\n",
" monitor='val_loss', n_inception=2,\n",
" name='AggMap MultiClass Estimator', patience=10000,\n",
" random_state=32, verbose=0)\n"
]
}
],
"source": [
"outer_fold = 4\n",
"\n",
"run_all = []\n",
"for repeat_seed in [8, 16, 32, 64, 128]: #5 repeats random seeds\n",
" \n",
" outer = KFold(n_splits = outer_fold, shuffle = True, random_state = repeat_seed)\n",
" outer_idx = outer.split(range(len(Y)), Y[:,0])\n",
"\n",
" for i, idx in enumerate(outer_idx):\n",
" \n",
" fold_num = \"fold_%s\" % str(i).zfill(2) \n",
" \n",
" train_idx, test_idx = idx\n",
" \n",
" testY = Y[test_idx]\n",
" testx = X[test_idx]\n",
" \n",
" trainx = X[train_idx]\n",
" trainY = Y[train_idx]\n",
"\n",
" trainX = mp.batch_transform(trainx, scale_method = 'standard')\n",
" testX = mp.batch_transform(testx, scale_method = 'standard')\n",
" \n",
" print(\"\\n input train and test X shape is %s, %s \" % (trainX.shape, testX.shape))\n",
" \n",
" # fit the model\n",
" clf = AggMapNet.MultiClassEstimator(epochs = 50, conv1_kernel_size=11,\n",
" batch_size = 4, gpuid=0, verbose = 0)\n",
" clf.fit(trainX, trainY)\n",
" \n",
" # save the model for explaination\n",
" clf._model.save( './saved_model/seed_%s-%s.h5' % (repeat_seed, fold_num))\n",
" \n",
" # make prediction\n",
" y_true = testY[:,0]\n",
" y_pred = 1-clf.predict(testX)\n",
" y_score = clf.predict_proba(testX)[:,0]\n",
"\n",
" tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()\n",
" \n",
" acc = (tp + tn) / sum([tn, fp, fn, tp])\n",
" \n",
" sensitivity = tp / sum([tp, fn])\n",
" specificity = tn / sum([tn, fp])\n",
" \n",
" prc_auc = prc_auc_score(y_true, y_score)\n",
" roc_auc = roc_auc_score(y_true, y_score)\n",
" \n",
" precision = tp / sum([tp, fp])\n",
" recall = tp / sum([tp, fn]) #equals to sensitivity\n",
" \n",
" \n",
" res = {'fold': fold_num,\n",
" 'repeat_seed':repeat_seed,\n",
" \n",
" 'accuracy':acc, \n",
" \n",
" 'prc_auc':prc_auc, \n",
" 'roc_auc':roc_auc,\n",
"\n",
" 'sensitivity': sensitivity, \n",
" 'specificity': specificity,\n",
" \n",
" 'precision':precision,\n",
" 'recall':recall,\n",
" \n",
" 'F1': 2*precision*sensitivity/(precision+sensitivity)\n",
" }\n",
" \n",
" run_all.append(res)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fold</th>\n",
" <th>repeat_seed</th>\n",
" <th>accuracy</th>\n",
" <th>prc_auc</th>\n",
" <th>roc_auc</th>\n",
" <th>sensitivity</th>\n",
" <th>specificity</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>fold_00</td>\n",
" <td>8</td>\n",
" <td>0.956044</td>\n",
" <td>0.998491</td>\n",
" <td>0.998028</td>\n",
" <td>1.000000</td>\n",
" <td>0.897436</td>\n",
" <td>0.928571</td>\n",
" <td>1.000000</td>\n",
" <td>0.962963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fold_01</td>\n",
" <td>8</td>\n",
" <td>0.956044</td>\n",
" <td>0.992434</td>\n",
" <td>0.985552</td>\n",
" <td>0.964912</td>\n",
" <td>0.941176</td>\n",
" <td>0.964912</td>\n",
" <td>0.964912</td>\n",
" <td>0.964912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>fold_02</td>\n",
" <td>8</td>\n",
" <td>0.911111</td>\n",
" <td>0.967566</td>\n",
" <td>0.969383</td>\n",
" <td>0.955556</td>\n",
" <td>0.866667</td>\n",
" <td>0.877551</td>\n",
" <td>0.955556</td>\n",
" <td>0.914894</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>fold_03</td>\n",
" <td>8</td>\n",
" <td>0.977778</td>\n",
" <td>0.996747</td>\n",
" <td>0.994684</td>\n",
" <td>0.982456</td>\n",
" <td>0.969697</td>\n",
" <td>0.982456</td>\n",
" <td>0.982456</td>\n",
" <td>0.982456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>fold_00</td>\n",
" <td>16</td>\n",
" <td>0.978022</td>\n",
" <td>0.994168</td>\n",
" <td>0.988776</td>\n",
" <td>0.964286</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.964286</td>\n",
" <td>0.981818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>fold_01</td>\n",
" <td>16</td>\n",
" <td>0.901099</td>\n",
" <td>0.990433</td>\n",
" <td>0.982972</td>\n",
" <td>0.877193</td>\n",
" <td>0.941176</td>\n",
" <td>0.961538</td>\n",
" <td>0.877193</td>\n",
" <td>0.917431</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>fold_02</td>\n",
" <td>16</td>\n",
" <td>0.933333</td>\n",
" <td>0.994449</td>\n",
" <td>0.993552</td>\n",
" <td>1.000000</td>\n",
" <td>0.857143</td>\n",
" <td>0.888889</td>\n",
" <td>1.000000</td>\n",
" <td>0.941176</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>fold_03</td>\n",
" <td>16</td>\n",
" <td>0.955556</td>\n",
" <td>0.976956</td>\n",
" <td>0.981500</td>\n",
" <td>0.980000</td>\n",
" <td>0.925000</td>\n",
" <td>0.942308</td>\n",
" <td>0.980000</td>\n",
" <td>0.960784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>fold_00</td>\n",
" <td>32</td>\n",
" <td>0.901099</td>\n",
" <td>0.988024</td>\n",
" <td>0.976531</td>\n",
" <td>0.946429</td>\n",
" <td>0.828571</td>\n",
" <td>0.898305</td>\n",
" <td>0.946429</td>\n",
" <td>0.921739</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>fold_01</td>\n",
" <td>32</td>\n",
" <td>0.934066</td>\n",
" <td>0.985338</td>\n",
" <td>0.982843</td>\n",
" <td>0.980392</td>\n",
" <td>0.875000</td>\n",
" <td>0.909091</td>\n",
" <td>0.980392</td>\n",
" <td>0.943396</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>fold_02</td>\n",
" <td>32</td>\n",
" <td>0.944444</td>\n",
" <td>0.997285</td>\n",
" <td>0.995885</td>\n",
" <td>0.907407</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.907407</td>\n",
" <td>0.951456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>fold_03</td>\n",
" <td>32</td>\n",
" <td>0.944444</td>\n",
" <td>0.997300</td>\n",
" <td>0.996500</td>\n",
" <td>1.000000</td>\n",
" <td>0.875000</td>\n",
" <td>0.909091</td>\n",
" <td>1.000000</td>\n",
" <td>0.952381</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>fold_00</td>\n",
" <td>64</td>\n",
" <td>0.967033</td>\n",
" <td>0.994111</td>\n",
" <td>0.991220</td>\n",
" <td>0.980000</td>\n",
" <td>0.951220</td>\n",
" <td>0.960784</td>\n",
" <td>0.980000</td>\n",
" <td>0.970297</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>fold_01</td>\n",
" <td>64</td>\n",
" <td>0.912088</td>\n",
" <td>0.972290</td>\n",
" <td>0.937245</td>\n",
" <td>0.857143</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.857143</td>\n",
" <td>0.923077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>fold_02</td>\n",
" <td>64</td>\n",
" <td>0.955556</td>\n",
" <td>0.990493</td>\n",
" <td>0.988636</td>\n",
" <td>0.956522</td>\n",
" <td>0.954545</td>\n",
" <td>0.956522</td>\n",
" <td>0.956522</td>\n",
" <td>0.956522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>fold_03</td>\n",
" <td>64</td>\n",
" <td>0.933333</td>\n",
" <td>0.976174</td>\n",
" <td>0.963368</td>\n",
" <td>1.000000</td>\n",
" <td>0.806452</td>\n",
" <td>0.907692</td>\n",
" <td>1.000000</td>\n",
" <td>0.951613</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>fold_00</td>\n",
" <td>128</td>\n",
" <td>0.923077</td>\n",
" <td>0.985936</td>\n",
" <td>0.979479</td>\n",
" <td>0.925926</td>\n",
" <td>0.918919</td>\n",
" <td>0.943396</td>\n",
" <td>0.925926</td>\n",
" <td>0.934579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>fold_01</td>\n",
" <td>128</td>\n",
" <td>0.945055</td>\n",
" <td>0.994228</td>\n",
" <td>0.991707</td>\n",
" <td>0.960000</td>\n",
" <td>0.926829</td>\n",
" <td>0.941176</td>\n",
" <td>0.960000</td>\n",
" <td>0.950495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>fold_02</td>\n",
" <td>128</td>\n",
" <td>0.966667</td>\n",
" <td>0.997533</td>\n",
" <td>0.996399</td>\n",
" <td>1.000000</td>\n",
" <td>0.916667</td>\n",
" <td>0.947368</td>\n",
" <td>1.000000</td>\n",
" <td>0.972973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>fold_03</td>\n",
" <td>128</td>\n",
" <td>0.911111</td>\n",
" <td>0.988550</td>\n",
" <td>0.980622</td>\n",
" <td>0.924528</td>\n",
" <td>0.891892</td>\n",
" <td>0.924528</td>\n",
" <td>0.924528</td>\n",
" <td>0.924528</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fold repeat_seed accuracy prc_auc roc_auc sensitivity \\\n",
"0 fold_00 8 0.956044 0.998491 0.998028 1.000000 \n",
"1 fold_01 8 0.956044 0.992434 0.985552 0.964912 \n",
"2 fold_02 8 0.911111 0.967566 0.969383 0.955556 \n",
"3 fold_03 8 0.977778 0.996747 0.994684 0.982456 \n",
"4 fold_00 16 0.978022 0.994168 0.988776 0.964286 \n",
"5 fold_01 16 0.901099 0.990433 0.982972 0.877193 \n",
"6 fold_02 16 0.933333 0.994449 0.993552 1.000000 \n",
"7 fold_03 16 0.955556 0.976956 0.981500 0.980000 \n",
"8 fold_00 32 0.901099 0.988024 0.976531 0.946429 \n",
"9 fold_01 32 0.934066 0.985338 0.982843 0.980392 \n",
"10 fold_02 32 0.944444 0.997285 0.995885 0.907407 \n",
"11 fold_03 32 0.944444 0.997300 0.996500 1.000000 \n",
"12 fold_00 64 0.967033 0.994111 0.991220 0.980000 \n",
"13 fold_01 64 0.912088 0.972290 0.937245 0.857143 \n",
"14 fold_02 64 0.955556 0.990493 0.988636 0.956522 \n",
"15 fold_03 64 0.933333 0.976174 0.963368 1.000000 \n",
"16 fold_00 128 0.923077 0.985936 0.979479 0.925926 \n",
"17 fold_01 128 0.945055 0.994228 0.991707 0.960000 \n",
"18 fold_02 128 0.966667 0.997533 0.996399 1.000000 \n",
"19 fold_03 128 0.911111 0.988550 0.980622 0.924528 \n",
"\n",
" specificity precision recall F1 \n",
"0 0.897436 0.928571 1.000000 0.962963 \n",
"1 0.941176 0.964912 0.964912 0.964912 \n",
"2 0.866667 0.877551 0.955556 0.914894 \n",
"3 0.969697 0.982456 0.982456 0.982456 \n",
"4 1.000000 1.000000 0.964286 0.981818 \n",
"5 0.941176 0.961538 0.877193 0.917431 \n",
"6 0.857143 0.888889 1.000000 0.941176 \n",
"7 0.925000 0.942308 0.980000 0.960784 \n",
"8 0.828571 0.898305 0.946429 0.921739 \n",
"9 0.875000 0.909091 0.980392 0.943396 \n",
"10 1.000000 1.000000 0.907407 0.951456 \n",
"11 0.875000 0.909091 1.000000 0.952381 \n",
"12 0.951220 0.960784 0.980000 0.970297 \n",
"13 1.000000 1.000000 0.857143 0.923077 \n",
"14 0.954545 0.956522 0.956522 0.956522 \n",
"15 0.806452 0.907692 1.000000 0.951613 \n",
"16 0.918919 0.943396 0.925926 0.934579 \n",
"17 0.926829 0.941176 0.960000 0.950495 \n",
"18 0.916667 0.947368 1.000000 0.972973 \n",
"19 0.891892 0.924528 0.924528 0.924528 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(run_all)\n",
"df.to_excel('results.xlsx')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>repeat_seed</th>\n",
" <th>accuracy</th>\n",
" <th>prc_auc</th>\n",
" <th>roc_auc</th>\n",
" <th>sensitivity</th>\n",
" <th>specificity</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F1</th>\n",
" </tr>\n",
" <tr>\n",
" <th>repeat_seed</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8.0</td>\n",
" <td>0.950244</td>\n",
" <td>0.988810</td>\n",
" <td>0.986912</td>\n",
" <td>0.975731</td>\n",
" <td>0.918744</td>\n",
" <td>0.938373</td>\n",
" <td>0.975731</td>\n",
" <td>0.956306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>16.0</td>\n",
" <td>0.942002</td>\n",
" <td>0.989001</td>\n",
" <td>0.986700</td>\n",
" <td>0.955370</td>\n",
" <td>0.930830</td>\n",
" <td>0.948184</td>\n",
" <td>0.955370</td>\n",
" <td>0.950303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>32.0</td>\n",
" <td>0.931013</td>\n",
" <td>0.991987</td>\n",
" <td>0.987940</td>\n",
" <td>0.958557</td>\n",
" <td>0.894643</td>\n",
" <td>0.929122</td>\n",
" <td>0.958557</td>\n",
" <td>0.942243</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>64.0</td>\n",
" <td>0.942002</td>\n",
" <td>0.983267</td>\n",
" <td>0.970117</td>\n",
" <td>0.948416</td>\n",
" <td>0.928054</td>\n",
" <td>0.956250</td>\n",
" <td>0.948416</td>\n",
" <td>0.950377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>128.0</td>\n",
" <td>0.936477</td>\n",
" <td>0.991562</td>\n",
" <td>0.987052</td>\n",
" <td>0.952614</td>\n",
" <td>0.913577</td>\n",
" <td>0.939117</td>\n",
" <td>0.952614</td>\n",
" <td>0.945644</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" repeat_seed accuracy prc_auc roc_auc sensitivity \\\n",
"repeat_seed \n",
"8 8.0 0.950244 0.988810 0.986912 0.975731 \n",
"16 16.0 0.942002 0.989001 0.986700 0.955370 \n",
"32 32.0 0.931013 0.991987 0.987940 0.958557 \n",
"64 64.0 0.942002 0.983267 0.970117 0.948416 \n",
"128 128.0 0.936477 0.991562 0.987052 0.952614 \n",
"\n",
" specificity precision recall F1 \n",
"repeat_seed \n",
"8 0.918744 0.938373 0.975731 0.956306 \n",
"16 0.930830 0.948184 0.955370 0.950303 \n",
"32 0.894643 0.929122 0.958557 0.942243 \n",
"64 0.928054 0.956250 0.948416 0.950377 \n",
"128 0.913577 0.939117 0.952614 0.945644 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = df.groupby(['repeat_seed']).apply(np.mean)\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"repeat_seed 49.600\n",
"accuracy 0.940\n",
"prc_auc 0.989\n",
"roc_auc 0.984\n",
"sensitivity 0.958\n",
"specificity 0.917\n",
"precision 0.942\n",
"recall 0.958\n",
"F1 0.949\n",
"dtype: float64"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('repeat_seed').apply(np.mean).mean().round(3)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"repeat_seed 0.000\n",
"accuracy 0.023\n",
"prc_auc 0.008\n",
"roc_auc 0.011\n",
"sensitivity 0.037\n",
"specificity 0.048\n",
"precision 0.033\n",
"recall 0.037\n",
"F1 0.019\n",
"dtype: float64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('repeat_seed').apply(np.std).mean().round(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}