204 lines (203 with data), 5.8 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy\n",
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load ensemble of models\n",
"\n",
"models_dict = joblib.load('models_dict.joblib')\n",
"models_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load data: Data should be formatted as sample cohort. See README for example\n",
"\n",
"df_cohort = pd.read_csv('sample_cohort.csv')\n",
"test_hosp, test_window, test_y = df_cohort['hosp_id'], df_cohort['window_id'], df_cohort['y']\n",
"cohort_IDs = df_cohort.set_index('ID')[[]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_cohort"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Functions to assist with evaluation \n",
"\n",
"from sklearn import metrics, utils\n",
"from joblib import Parallel, delayed\n",
"\n",
"\n",
"def bootstrap_func(i, y_true, y_score):\n",
" # Bootstrap resample to calculate AUROC\n",
" yte_true_b, yte_pred_b = utils.resample(y_true, y_score, replace=True, random_state=i)\n",
" return metrics.roc_curve(yte_true_b, yte_pred_b), metrics.roc_auc_score(yte_true_b, yte_pred_b)\n",
"\n",
"def get_roc_CI(y_true, y_score):\n",
" # Bootstrap confidence intervals \n",
" roc_curves, auc_scores = zip(*Parallel(n_jobs=4)(delayed(bootstrap_func)(i, y_true, y_score) for i in range(1000)))\n",
" print('Test AUC: ({:.3f}, {:.3f}) percentile 95% CI'.format(np.percentile(auc_scores, 2.5), np.percentile(auc_scores, 97.5)))\n",
"\n",
" tprs = []\n",
" aucs = []\n",
" mean_fpr = np.linspace(0, 1, 100)\n",
" for fpr, tpr, _ in roc_curves:\n",
" tprs.append(np.interp(mean_fpr, fpr, tpr))\n",
" tprs[-1][0] = 0.0\n",
" aucs.append(metrics.auc(fpr, tpr))\n",
"\n",
" mean_tpr = np.mean(tprs, axis=0)\n",
" std_tpr = np.std(tprs, axis=0)\n",
" tprs_upper = np.minimum(mean_tpr + 1.96 * std_tpr, 1)\n",
" tprs_lower = np.maximum(mean_tpr - 1.96 * std_tpr, 0)\n",
" return roc_curves, auc_scores, mean_fpr, tprs_lower, tprs_upper\n",
"\n",
"def eval3():\n",
" # Calculate hospital admission level AUROC for every complete window\n",
" df_Yte = df_Yte_all.copy()\n",
" df_Yte = df_Yte[df_Yte['window_id'] >= 1]\n",
" df_Yte_agg = df_Yte.groupby(['hosp_id']).max()\n",
" return df_Yte_agg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## M-CURES model performance and scores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load exact model and features of patients. Features should be generated from preprocessing script. \n",
"\n",
"mcures_clfs = models_dict['M-CURES']\n",
"df_mcures = pd.read_csv('../preprocessing/sample_output/mcures.csv').set_index('ID')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_mcures"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### AUROC on given dataset\n",
"\n",
"# Calculate model outputs for all patients, average over all models\n",
"eval_matrix = scipy.sparse.csr_matrix(cohort_IDs.join(df_mcures).values.astype(float))\n",
"all_y = np.array([clf.predict_proba(eval_matrix)[:,1] for clf in mcures_clfs])\n",
"y_scores = all_y.mean(0)\n",
"\n",
"# To evaluate models, take maximum over all windows\n",
"df_Yte_all = pd.DataFrame({'hosp_id': test_hosp, 'window_id': test_window, 'y': test_y, 'y_score': y_scores})\n",
"df_Yte_agg = eval3()\n",
"y_score = df_Yte_agg['y_score']\n",
"y_true = df_Yte_agg['y']\n",
"fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score)\n",
"print('Test AUC: {:.3f}'.format(metrics.roc_auc_score(y_true, y_score)))\n",
"\n",
"# # Optionally: Generate 95% CI\n",
"# try:\n",
"# roc_curves, auc_scores, mean_fpr, tprs_lower, tprs_upper = get_roc_CI(y_true, y_score)\n",
"# except:\n",
"# pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Generate list of scores for each example \n",
"\n",
"y_score_lst = df_Yte_all.groupby(['hosp_id'])['y_score'].apply(list)\n",
"df1 = pd.DataFrame({'y_scores_mcures_lst': y_score_lst})\n",
"df2 = pd.DataFrame({'id': df_Yte_agg.index, 'y_scores_mcures': y_score})\n",
"\n",
"outcome_outputs = df1.merge(df2, left_index=True, right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"outcome_outputs "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}