--- a +++ b/1. Applying AI to 2D Medical Imaging Data/13. Algorithmic Limitations Exercise/solution.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import sklearn.metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read in labels and performance data:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Pneumonia</th>\n", + " <th>Atelectasis</th>\n", + " <th>Effusion</th>\n", + " <th>Pneumothorax</th>\n", + " <th>Infiltration</th>\n", + " <th>Cardiomegaly</th>\n", + " <th>Mass</th>\n", + " <th>Nodule</th>\n", + " <th>algorithm_output</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Pneumonia Atelectasis Effusion Pneumothorax Infiltration Cardiomegaly \\\n", + "0 1 1 1 0 0 0 \n", + "1 1 1 0 0 0 1 \n", + "2 1 0 1 0 0 0 \n", + "3 1 1 1 0 0 1 \n", + "4 0 1 0 0 0 0 \n", + "\n", + " Mass Nodule algorithm_output \n", + "0 0 0 1 \n", + "1 0 0 1 \n", + "2 0 0 1 \n", + "3 0 0 1 \n", + "4 0 0 0 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('labels_and_performance.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, look at the overall performance of the algorithm for the detection of pneumonia:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "tn, fp, fn, tp = sklearn.metrics.confusion_matrix(data.Pneumonia.values,\n", + " data.algorithm_output.values,labels=[1,0]).ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8235294117647058" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sens = tp/(tp+fn)\n", + "sens" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8166666666666667" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spec = tn/(tn+fp)\n", + "spec" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, look at the algorithm's performance in the presence of the other diseases: " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Atelectasis\n", + "Sensitivity: 0.8333333333333334\n", + "Specificity: 0.782608695652174\n", + "\n", + "Effusion\n", + "Sensitivity: 0.8571428571428571\n", + "Specificity: 0.6521739130434783\n", + "\n", + "Pneumothorax\n", + "Sensitivity: 0.6666666666666666\n", + "Specificity: 0.8571428571428571\n", + "\n", + "Infiltration\n", + "Sensitivity: 0.0\n", + "Specificity: 0.3888888888888889\n", + "\n", + "Cardiomegaly\n", + "Sensitivity: 1.0\n", + "Specificity: 0.8888888888888888\n", + "\n", + "Mass\n", + "Sensitivity: 0.8666666666666667\n", + "Specificity: 0.9285714285714286\n", + "\n", + "Nodule\n", + "Sensitivity: 0.5384615384615384\n", + "Specificity: 1.0\n", + "\n" + ] + } + ], + "source": [ + "for i in ['Atelectasis','Effusion','Pneumothorax','Infiltration','Cardiomegaly','Mass','Nodule']:\n", + "\n", + " tn, fp, fn, tp = sklearn.metrics.confusion_matrix(data[data[i]==1].Pneumonia.values,\n", + " data[data[i]==1].algorithm_output.values,labels=[1,0]).ravel()\n", + " sens = tp/(tp+fn)\n", + " spec = tn/(tn+fp)\n", + "\n", + " print(i)\n", + " print('Sensitivity: '+ str(sens))\n", + " print('Specificity: ' +str(spec))\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Statement on algorithmic limitations:\n", + "\n", + "The results above indicate that the presence of infiltrations in a chest x-ray is a limitation of this algorithm, and that the algorithm performs very poorly on the accurate detection of pneumonia in the presence of infiltration. The presence of nodules and pneumothorax have a slight impact on the algorithm's sensitivity and may reduce the ability to detect pneumonia, while the presence of effusion has a slight impact on specificity and may increase the number of false positive pneumonia classifications." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}