189 lines (188 with data), 5.4 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import pandas as pd\n",
"import numpy as np\n",
"import joblib\n",
"import scipy\n",
"import pickle\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## This notebook assumes: \n",
"#1: The loaded outcome is if the outcome happens ever, as opposed to the other evaluation, which was focused on the first 5 days\n",
"#2: num_windows is the number of hours // 4 in which we want to make the triaging decision. Our default is making predictions using 48 hours of data to triage\n",
"\n",
"num_windows = 12"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load models\n",
"\n",
"models_dict = joblib.load('models_dict.joblib')\n",
"models_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### Read in sample cohort\n",
"# Cohort should include those who did not have outcome within the first two days\n",
"# Each individual should have exactly 12 windows\n",
"# The outcome shoudl be whether deterioation occurred ever (not just within the first five days)\n",
"\n",
"df_cohort = pd.read_csv('sample_cohort_outcome_ever_past_2days.csv')\n",
"\n",
"# Remove windows after 2 days\n",
"df_cohort = df_cohort[df_cohort['window_id'] < num_windows]\n",
"\n",
"# Remove incomplete windows\n",
"df_cohort = df_cohort[df_cohort['window_id'] >= 1]\n",
"\n",
"test_hosp, test_window, test_y = df_cohort['hosp_id'], df_cohort['window_id'], df_cohort['y']\n",
"\n",
"cohort_IDs = df_cohort.set_index('ID')[[]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(np.unique(test_hosp))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## M-CURES Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mcures_clfs = models_dict['M-CURES']\n",
"df_mcures = pd.read_csv('../preprocessing/sample_output/mcures.csv').set_index('ID')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate aggregated scores for all examples\n",
"\n",
"eval_matrix = scipy.sparse.csr_matrix(cohort_IDs.join(df_mcures).values.astype(float))\n",
"all_y = np.array([clf.predict_proba(eval_matrix)[:,1] for clf in mcures_clfs])\n",
"y_scores = all_y.mean(0)\n",
"\n",
"df_Yte_all = pd.DataFrame({'hosp_id': test_hosp, 'window_id': test_window, 'y': test_y, 'y_score': y_scores})\n",
"df_Yte_agg = df_Yte_all.groupby('hosp_id').mean() #Can be changed to max, depending on how you want to aggregate scores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"scores = np.sort(df_Yte_agg['y_score'])\n",
"total_negs = df_Yte_agg['y']\n",
"for s in scores: \n",
" curr = df_Yte_agg[df_Yte_agg['y_score'] <= s]\n",
" # How many people do we correctly flag with atleast an NPV of 0.95 (i.e. At most 5% of people we flagged have the event)\n",
" if 1 - curr['y'].mean() == 0.95: \n",
" curr_no_outcome = curr[curr['y'] == 0]\n",
" print('NPV: {:.2f}, Population % Flagged Correctly as Low-Risk {:.2%}'.format(1 - curr['y'].mean(), curr_no_outcome.shape[0] / len(scores)))\n",
" latest = curr"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sweep over NPV"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot the percentage of correctly flagged low-risk patients (true negatives) as NPV varies\n",
"scores = np.sort(df_Yte_agg['y_score'])\n",
"mcures_npvs = []\n",
"mcures_flagged = []\n",
"\n",
"for s in scores: \n",
" curr = df_Yte_agg[df_Yte_agg['y_score'] <= s]\n",
" curr_no_outcome = curr[curr['y'] == 0]\n",
" mcures_npvs.append(1 - curr['y'].mean())\n",
" mcures_flagged.append(curr_no_outcome.shape[0] / len(scores))\n",
" \n",
"fig, ax = plt.subplots(figsize=(3.5, 3.5))\n",
"\n",
"plt.plot(mcures_flagged, mcures_npvs, label = 'M-CURES Model', lw = 1.25)\n",
"\n",
"plt.xlabel('Percentage Correctly Flagged as Low-Risk')\n",
"plt.ylabel('Negative Predictive Value')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}