--- a +++ b/evaluation/Evaluation_Secondary.ipynb @@ -0,0 +1,188 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import pandas as pd\n", + "import numpy as np\n", + "import joblib\n", + "import scipy\n", + "import pickle\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## This notebook assumes: \n", + "#1: The loaded outcome is if the outcome happens ever, as opposed to the other evaluation, which was focused on the first 5 days\n", + "#2: num_windows is the number of hours // 4 in which we want to make the triaging decision. Our default is making predictions using 48 hours of data to triage\n", + "\n", + "num_windows = 12" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load models\n", + "\n", + "models_dict = joblib.load('models_dict.joblib')\n", + "models_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Read in sample cohort\n", + "# Cohort should include those who did not have outcome within the first two days\n", + "# Each individual should have exactly 12 windows\n", + "# The outcome shoudl be whether deterioation occurred ever (not just within the first five days)\n", + "\n", + "df_cohort = pd.read_csv('sample_cohort_outcome_ever_past_2days.csv')\n", + "\n", + "# Remove windows after 2 days\n", + "df_cohort = df_cohort[df_cohort['window_id'] < num_windows]\n", + "\n", + "# Remove incomplete windows\n", + "df_cohort = df_cohort[df_cohort['window_id'] >= 1]\n", + "\n", + "test_hosp, test_window, test_y = df_cohort['hosp_id'], df_cohort['window_id'], df_cohort['y']\n", + "\n", + "cohort_IDs = df_cohort.set_index('ID')[[]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(np.unique(test_hosp))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## M-CURES Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mcures_clfs = models_dict['M-CURES']\n", + "df_mcures = pd.read_csv('../preprocessing/sample_output/mcures.csv').set_index('ID')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate aggregated scores for all examples\n", + "\n", + "eval_matrix = scipy.sparse.csr_matrix(cohort_IDs.join(df_mcures).values.astype(float))\n", + "all_y = np.array([clf.predict_proba(eval_matrix)[:,1] for clf in mcures_clfs])\n", + "y_scores = all_y.mean(0)\n", + "\n", + "df_Yte_all = pd.DataFrame({'hosp_id': test_hosp, 'window_id': test_window, 'y': test_y, 'y_score': y_scores})\n", + "df_Yte_agg = df_Yte_all.groupby('hosp_id').mean() #Can be changed to max, depending on how you want to aggregate scores" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scores = np.sort(df_Yte_agg['y_score'])\n", + "total_negs = df_Yte_agg['y']\n", + "for s in scores: \n", + " curr = df_Yte_agg[df_Yte_agg['y_score'] <= s]\n", + " # How many people do we correctly flag with atleast an NPV of 0.95 (i.e. At most 5% of people we flagged have the event)\n", + " if 1 - curr['y'].mean() == 0.95: \n", + " curr_no_outcome = curr[curr['y'] == 0]\n", + " print('NPV: {:.2f}, Population % Flagged Correctly as Low-Risk {:.2%}'.format(1 - curr['y'].mean(), curr_no_outcome.shape[0] / len(scores)))\n", + " latest = curr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sweep over NPV" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the percentage of correctly flagged low-risk patients (true negatives) as NPV varies\n", + "scores = np.sort(df_Yte_agg['y_score'])\n", + "mcures_npvs = []\n", + "mcures_flagged = []\n", + "\n", + "for s in scores: \n", + " curr = df_Yte_agg[df_Yte_agg['y_score'] <= s]\n", + " curr_no_outcome = curr[curr['y'] == 0]\n", + " mcures_npvs.append(1 - curr['y'].mean())\n", + " mcures_flagged.append(curr_no_outcome.shape[0] / len(scores))\n", + " \n", + "fig, ax = plt.subplots(figsize=(3.5, 3.5))\n", + "\n", + "plt.plot(mcures_flagged, mcures_npvs, label = 'M-CURES Model', lw = 1.25)\n", + "\n", + "plt.xlabel('Percentage Correctly Flagged as Low-Risk')\n", + "plt.ylabel('Negative Predictive Value')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}