--- a
+++ b/1. Applying AI to 2D Medical Imaging Data/8. Obtaining a Gold Standard Exercise/solution.ipynb
@@ -0,0 +1,583 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read in your label data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>rad1</th>\n",
+       "      <th>rad2</th>\n",
+       "      <th>rad3</th>\n",
+       "      <th>biopsy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>cancer</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>cancer</td>\n",
+       "      <td>cancer</td>\n",
+       "      <td>cancer</td>\n",
+       "      <td>cancer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>cancer</td>\n",
+       "      <td>cancer</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>cancer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>benign</td>\n",
+       "      <td>benign</td>\n",
+       "      <td>cancer</td>\n",
+       "      <td>benign</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     rad1    rad2    rad3  biopsy\n",
+       "0  benign  benign  benign  benign\n",
+       "1  benign  benign  benign  benign\n",
+       "2  benign  benign  benign  benign\n",
+       "3  benign  benign  benign  benign\n",
+       "4  benign  benign  cancer  benign\n",
+       "5  cancer  cancer  cancer  cancer\n",
+       "6  benign  benign  benign  benign\n",
+       "7  benign  benign  benign  benign\n",
+       "8  cancer  cancer  benign  cancer\n",
+       "9  benign  benign  cancer  benign"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "labels = pd.read_csv('labels.csv')\n",
+    "labels.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create your first ground truth as derived from biopsy labels: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>rad1</th>\n",
+       "      <th>rad2</th>\n",
+       "      <th>rad3</th>\n",
+       "      <th>biopsy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   rad1  rad2  rad3  biopsy\n",
+       "0     1     1     1       1\n",
+       "1     1     1     1       1\n",
+       "2     1     1     1       1\n",
+       "3     1     1     1       1\n",
+       "4     1     1     0       1\n",
+       "5     0     0     0       0\n",
+       "6     1     1     1       1\n",
+       "7     1     1     1       1\n",
+       "8     0     0     1       0\n",
+       "9     1     1     0       1"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## I'm going to replace everything in my 'labels' dataframe with 0's and 1's for easier processing later:\n",
+    "labels2 = labels.replace('benign',1).replace('cancer',0)\n",
+    "labels2.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    1\n",
+       "1    1\n",
+       "2    1\n",
+       "3    1\n",
+       "4    1\n",
+       "Name: biopsy, dtype: int64"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gt1 = labels2['biopsy']\n",
+    "gt1.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create your second truth by voting system from the three radiologists:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    1.0\n",
+       "1    1.0\n",
+       "2    1.0\n",
+       "3    1.0\n",
+       "4    1.0\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gt2 = labels2[['rad1','rad2','rad3']].sum(axis=1)\n",
+    "gt2 = (gt2 > 1).replace(True,1).replace(False,0)\n",
+    "gt2.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create your third ground truth by weighting the three radiologists:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>rad1</th>\n",
+       "      <th>rad2</th>\n",
+       "      <th>rad3</th>\n",
+       "      <th>biopsy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.33</td>\n",
+       "      <td>0.67</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.33</td>\n",
+       "      <td>0.67</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.33</td>\n",
+       "      <td>0.67</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.33</td>\n",
+       "      <td>0.67</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.33</td>\n",
+       "      <td>0.67</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   rad1  rad2  rad3  biopsy\n",
+       "0  0.33  0.67     1       1\n",
+       "1  0.33  0.67     1       1\n",
+       "2  0.33  0.67     1       1\n",
+       "3  0.33  0.67     1       1\n",
+       "4  0.33  0.67     0       1"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "weighted_labels = labels2.copy()\n",
+    "weighted_labels['rad2'] = weighted_labels['rad2'] * 0.67\n",
+    "weighted_labels['rad1'] = weighted_labels['rad1'] * 0.33\n",
+    "weighted_labels.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    1.0\n",
+       "1    1.0\n",
+       "2    1.0\n",
+       "3    1.0\n",
+       "4    0.0\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gt3 = weighted_labels[['rad1','rad2','rad3']].sum(axis=1)\n",
+    "gt3 = (gt3 > 1).replace(True,1).replace(False,0)\n",
+    "gt3.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compare the three ground truths:\n",
+    "\n",
+    "Here, just explore the three sets of labels you created and see how often they agree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "12    False\n",
+       "14    False\n",
+       "22    False\n",
+       "29    False\n",
+       "30    False\n",
+       "34    False\n",
+       "37    False\n",
+       "52    False\n",
+       "57    False\n",
+       "dtype: bool"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "biopsy_to_votes = gt1 == gt2\n",
+    "biopsy_to_votes[biopsy_to_votes==False]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4     False\n",
+       "9     False\n",
+       "12    False\n",
+       "14    False\n",
+       "17    False\n",
+       "20    False\n",
+       "22    False\n",
+       "29    False\n",
+       "30    False\n",
+       "34    False\n",
+       "37    False\n",
+       "52    False\n",
+       "56    False\n",
+       "57    False\n",
+       "58    False\n",
+       "dtype: bool"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "biopsy_to_weights = gt1 == gt3\n",
+    "biopsy_to_weights[biopsy_to_weights==False]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Interestingly, in the example above the weighting example performs worse against biopsy labels than simple voting. This may be an artefact of the weightings that we chose, and is not always sub-optimal to simple voting. "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}