In [None]:
import json
import pandas as pd
import numpy as np
import joblib
import scipy
import pickle
import matplotlib.pyplot as plt

In [None]:
## This notebook assumes: 
#1: The loaded outcome is if the outcome happens ever, as opposed to the other evaluation, which was focused on the first 5 days
#2: num_windows is the number of hours // 4 in which we want to make the triaging decision. Our default is making predictions using 48 hours of data to triage

num_windows = 12

In [None]:
# Load models

models_dict = joblib.load('models_dict.joblib')
models_dict.keys()

In [None]:
### Read in sample cohort
# Cohort should include those who did not have outcome within the first two days
# Each individual should have exactly 12 windows
# The outcome shoudl be whether deterioation occurred ever (not just within the first five days)

df_cohort = pd.read_csv('sample_cohort_outcome_ever_past_2days.csv')

# Remove windows after 2 days
df_cohort = df_cohort[df_cohort['window_id'] < num_windows]

# Remove incomplete windows
df_cohort = df_cohort[df_cohort['window_id'] >= 1]

test_hosp, test_window, test_y = df_cohort['hosp_id'], df_cohort['window_id'], df_cohort['y']

cohort_IDs = df_cohort.set_index('ID')[[]]

In [None]:
len(np.unique(test_hosp))

## M-CURES Model

In [None]:
mcures_clfs = models_dict['M-CURES']
df_mcures = pd.read_csv('../preprocessing/sample_output/mcures.csv').set_index('ID')

In [None]:
# Calculate aggregated scores for all examples

eval_matrix = scipy.sparse.csr_matrix(cohort_IDs.join(df_mcures).values.astype(float))
all_y = np.array([clf.predict_proba(eval_matrix)[:,1] for clf in mcures_clfs])
y_scores = all_y.mean(0)

df_Yte_all = pd.DataFrame({'hosp_id': test_hosp, 'window_id': test_window, 'y': test_y, 'y_score': y_scores})
df_Yte_agg = df_Yte_all.groupby('hosp_id').mean() #Can be changed to max, depending on how you want to aggregate scores

In [None]:
scores = np.sort(df_Yte_agg['y_score'])
total_negs = df_Yte_agg['y']
for s in scores: 
    curr = df_Yte_agg[df_Yte_agg['y_score'] <= s]
    # How many people do we correctly flag with atleast an NPV of 0.95 (i.e. At most 5% of people we flagged have the event)
    if 1 - curr['y'].mean() == 0.95: 
        curr_no_outcome = curr[curr['y'] == 0]
        print('NPV: {:.2f}, Population % Flagged Correctly as Low-Risk {:.2%}'.format(1 - curr['y'].mean(), curr_no_outcome.shape[0] / len(scores)))
        latest = curr

## Sweep over NPV

In [None]:
# Plot the percentage of correctly flagged low-risk patients (true negatives) as NPV varies
scores = np.sort(df_Yte_agg['y_score'])
mcures_npvs = []
mcures_flagged = []

for s in scores: 
    curr = df_Yte_agg[df_Yte_agg['y_score'] <= s]
    curr_no_outcome = curr[curr['y'] == 0]
    mcures_npvs.append(1 - curr['y'].mean())
    mcures_flagged.append(curr_no_outcome.shape[0] / len(scores))
    
fig, ax = plt.subplots(figsize=(3.5, 3.5))

plt.plot(mcures_flagged, mcures_npvs, label = 'M-CURES Model', lw = 1.25)

plt.xlabel('Percentage Correctly Flagged as Low-Risk')
plt.ylabel('Negative Predictive Value')
plt.show()