--- a
+++ b/1. Applying AI to 2D Medical Imaging Data/14. Translate Performance into Clinical Utility Exercise/solution.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.preprocessing import binarize\n",
+    "\n",
+    "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score, plot_precision_recall_curve, f1_score, confusion_matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "performances = pd.read_csv('performances.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ground_truth</th>\n",
+       "      <th>probability</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.99</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.98</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.96</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.95</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   ground_truth  probability\n",
+       "0             1         0.99\n",
+       "1             1         0.98\n",
+       "2             1         0.97\n",
+       "3             1         0.96\n",
+       "4             1         0.95"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "performances.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_auc(t_y, p_y):\n",
+    "    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))\n",
+    "    fpr, tpr, thresholds = roc_curve(t_y, p_y)\n",
+    "    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % ('Pneumonia', auc(fpr, tpr)))\n",
+    "    c_ax.legend()\n",
+    "    c_ax.set_xlabel('False Positive Rate')\n",
+    "    c_ax.set_ylabel('True Positive Rate')\n",
+    "    \n",
+    "def plot_pr(t_y, p_y):\n",
+    "    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))\n",
+    "    precision, recall, thresholds = precision_recall_curve(t_y, p_y)\n",
+    "    c_ax.plot(precision, recall, label = '%s (AP Score:%0.2f)'  % ('Pneumonia', average_precision_score(t_y,p_y)))\n",
+    "    c_ax.legend()\n",
+    "    c_ax.set_xlabel('Recall')\n",
+    "    c_ax.set_ylabel('Precision')\n",
+    "\n",
+    "def  calc_f1(prec,recall):\n",
+    "\n",
+    "    return 2*(prec*recall)/(prec+recall)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 648x648 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plot_auc(performances['ground_truth'],performances['probability'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 648x648 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plot_pr(performances['ground_truth'],performances['probability'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "I'm going to calculate F1 score for two different scenarios here, one where I choose a threshold that favors precision, and one with a threshold that favors recall: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "precision, recall, thresholds = precision_recall_curve(performances['ground_truth'],performances['probability'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Precision is: 0.8043478260869565\n",
+      "Recall is: 0.37373737373737376\n",
+      "Threshold is: 0.79\n",
+      "F1 Score is: 0.5103448275862069\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Look at the threshold where precision is 0.8\n",
+    "precision_value = 0.8\n",
+    "idx = (np.abs(precision - precision_value)).argmin() \n",
+    "print('Precision is: '+ str(precision[idx]))\n",
+    "print('Recall is: '+ str(recall[idx]))\n",
+    "print('Threshold is: '+ str(thresholds[idx]))\n",
+    "print('F1 Score is: ' + str(calc_f1(precision[idx],recall[idx])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Precision is: 0.398989898989899\n",
+      "Recall is: 0.797979797979798\n",
+      "Threshold is: 0.39\n",
+      "F1 Score is: 0.531986531986532\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Look at the threshold where recall is 0.8\n",
+    "recall_value = 0.8\n",
+    "idx = (np.abs(recall - recall_value)).argmin() \n",
+    "print('Precision is: '+ str(precision[idx]))\n",
+    "print('Recall is: '+ str(recall[idx]))\n",
+    "print('Threshold is: '+ str(thresholds[idx]))\n",
+    "print('F1 Score is: ' + str(calc_f1(precision[idx],recall[idx])))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As we can see, the thresholds are really different for the two cases. Let's look at what the _accuracy_ of our model would be using those two values, and think about why accuracy really isn't a great performance statistic when evaluating and interpreting the utility of our models. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "probs = performances['probability']\n",
+    "t1 = (probs > 0.79)\n",
+    "t2 = (probs > 0.39)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t1 = t1.replace(True,1).replace(False,0)\n",
+    "t2 = t2.replace(True,1).replace(False,0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compare_t1 = (t1 == performances['ground_truth'])\n",
+    "compare_t2 = (t2 == performances['ground_truth'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('Accuracy at threshold 1: ' + str(len(compare_t1[compare_t1])/len(performances)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('Accuracy at threshold 2: ' + str(len(compare_t2[compare_t2])/len(performances)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}