Switch to unified view

a b/notebooks/USPSTF_recommendations.ipynb
1
{
2
 "cells": [
3
  {
4
   "attachments": {},
5
   "cell_type": "markdown",
6
   "metadata": {},
7
   "source": [
8
    "# USPSTF recommendations notebook\n",
9
    "\n",
10
    "P. Benveniste $^1$, J. Alberge $^1$\n",
11
    "\n",
12
    "$^1$ Ecole Normale Supérieure Paris-Saclay\n",
13
    "\n",
14
    "In this Notebook, we look at the results of the USPSTF recommendations on PLCO and NLST. "
15
   ]
16
  },
17
  {
18
   "cell_type": "code",
19
   "execution_count": 1,
20
   "metadata": {},
21
   "outputs": [],
22
   "source": [
23
    "#Import of the librairies\n",
24
    "import pandas as pd\n",
25
    "import numpy as np\n",
26
    "import matplotlib.pyplot as plt\n",
27
    "from tabulate import tabulate"
28
   ]
29
  },
30
  {
31
   "attachments": {},
32
   "cell_type": "markdown",
33
   "metadata": {},
34
   "source": [
35
    "We now import both datasets."
36
   ]
37
  },
38
  {
39
   "cell_type": "code",
40
   "execution_count": 2,
41
   "metadata": {},
42
   "outputs": [
43
    {
44
     "name": "stdout",
45
     "output_type": "stream",
46
     "text": [
47
      "(55161, 10)\n",
48
      "(48595, 10)\n"
49
     ]
50
    }
51
   ],
52
   "source": [
53
    "#Loading of both datasets\n",
54
    "plco_file = './preprocessed_plco.csv'\n",
55
    "plco = pd.read_csv(plco_file)\n",
56
    "nlst_file = './preprocessed_nlst.csv'\n",
57
    "nlst = pd.read_csv(nlst_file)\n",
58
    "\n",
59
    "total_plco = len(plco)\n",
60
    "print(plco.shape)\n",
61
    "total_nlst = len(nlst)\n",
62
    "print(nlst.shape)"
63
   ]
64
  },
65
  {
66
   "attachments": {},
67
   "cell_type": "markdown",
68
   "metadata": {},
69
   "source": [
70
    "##### US RECOMMENDATION TOOL\n",
71
    "\n",
72
    "Now we look into the USPSTF recommendation tool on PLCO and NLST."
73
   ]
74
  },
75
  {
76
   "cell_type": "code",
77
   "execution_count": 3,
78
   "metadata": {},
79
   "outputs": [
80
    {
81
     "name": "stdout",
82
     "output_type": "stream",
83
     "text": [
84
      "Pre-processed PLCO size: 55161\n",
85
      "Pre-processed PLCO with lung cancer: 2752\n",
86
      "Patients from PLCO who fit into US recommendation: 22609\n",
87
      "Patients from PLCO who fit into US recommendation with lung cancer: 2105\n",
88
      "------- USPSTF RECOMMENDATION ON PLCO --------\n",
89
      "TP :  2105\n",
90
      "FN :  647\n",
91
      "TN :  31905\n",
92
      "FP :  20504\n",
93
      "Precision :  0.093\n",
94
      "Recall :  0.765\n"
95
     ]
96
    }
97
   ],
98
   "source": [
99
    "print(\"Pre-processed PLCO size:\", len(plco))\n",
100
    "print(\"Pre-processed PLCO with lung cancer:\", len(plco[plco[\"lung_cancer\"]==1]))\n",
101
    "\n",
102
    "plco_criteria = plco.copy()\n",
103
    "plco_criteria = plco_criteria[plco_criteria[\"age\"]>=50]\n",
104
    "plco_criteria = plco_criteria[plco_criteria[\"age\"]<=80]\n",
105
    "plco_criteria = plco_criteria[plco_criteria[\"pack_years\"]>=20]\n",
106
    "plco_criteria = plco_criteria[ (plco_criteria[\"cig_stat\"]==1) | (plco_criteria[\"age\"] - plco_criteria[\"ssmokea_f\"] <=15) ]\n",
107
    "\n",
108
    "print(\"Patients from PLCO who fit into US recommendation:\", len(plco_criteria))\n",
109
    "print(\"Patients from PLCO who fit into US recommendation with lung cancer:\", len(plco_criteria[plco_criteria[\"lung_cancer\"]==1]))\n",
110
    "\n",
111
    "TP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])\n",
112
    "FN_plco = len(plco[plco[\"lung_cancer\"]==1])-TP_plco\n",
113
    "TN_plco = len(plco[plco[\"lung_cancer\"]==0]) - len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n",
114
    "FP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n",
115
    "\n",
116
    "print(\"------- USPSTF RECOMMENDATION ON PLCO --------\")\n",
117
    "print(\"TP : \", TP_plco)\n",
118
    "print(\"FN : \", FN_plco)\n",
119
    "print(\"TN : \", TN_plco)\n",
120
    "print(\"FP : \", FP_plco)\n",
121
    "print(\"Precision : \",  round(TP_plco/(TP_plco+FP_plco),3))\n",
122
    "print(\"Recall : \", round(TP_plco/(TP_plco+FN_plco),3) )"
123
   ]
124
  },
125
  {
126
   "cell_type": "code",
127
   "execution_count": 4,
128
   "metadata": {},
129
   "outputs": [
130
    {
131
     "name": "stdout",
132
     "output_type": "stream",
133
     "text": [
134
      "Pre-processed NLST size: 48595\n",
135
      "Pre-processed NLST with cancer: 1511\n",
136
      "Patients from NLST who fit into US recommendation: 48034\n",
137
      "Patients from NLST who fit into US recommendation with cancer: 1495\n",
138
      "------- USPSTF RECOMMENDATION ON NLST --------\n",
139
      "TP :  1495\n",
140
      "FN :  16\n",
141
      "TN :  545\n",
142
      "FP :  46539\n",
143
      "Precision :  0.031\n",
144
      "Recall :  0.989\n"
145
     ]
146
    }
147
   ],
148
   "source": [
149
    "print(\"Pre-processed NLST size:\", len(nlst))\n",
150
    "print(\"Pre-processed NLST with cancer:\", len(nlst[nlst[\"lung_cancer\"]==1]))\n",
151
    "\n",
152
    "nlst_criteria = nlst.copy()\n",
153
    "nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]>=50]\n",
154
    "nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]<=80]\n",
155
    "nlst_criteria = nlst_criteria[nlst_criteria[\"pack_years\"]>=20]\n",
156
    "nlst_criteria = nlst_criteria[ (nlst_criteria[\"cig_stat\"]==1) | (nlst_criteria[\"age\"] - nlst_criteria[\"ssmokea_f\"] <=15) ]\n",
157
    "\n",
158
    "print(\"Patients from NLST who fit into US recommendation:\", len(nlst_criteria))\n",
159
    "print(\"Patients from NLST who fit into US recommendation with cancer:\", len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1]))\n",
160
    "\n",
161
    "TP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])\n",
162
    "FN_nlst = len(nlst[nlst[\"lung_cancer\"]==1])-TP_nlst\n",
163
    "TN_nlst = len(nlst[nlst[\"lung_cancer\"]==0]) - len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n",
164
    "FP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n",
165
    "\n",
166
    "print(\"------- USPSTF RECOMMENDATION ON NLST --------\")\n",
167
    "print(\"TP : \", TP_nlst)\n",
168
    "print(\"FN : \", FN_nlst)\n",
169
    "print(\"TN : \", TN_nlst)\n",
170
    "print(\"FP : \", FP_nlst)\n",
171
    "print(\"Precision : \",  round(TP_nlst/(TP_nlst+FP_nlst),3))\n",
172
    "print(\"Recall : \", round(TP_nlst/(TP_nlst+FN_nlst),3) )"
173
   ]
174
  },
175
  {
176
   "attachments": {},
177
   "cell_type": "markdown",
178
   "metadata": {},
179
   "source": [
180
    "### Saving a txt file\n",
181
    "\n",
182
    "Now we write a text file to concatenate these analyses. "
183
   ]
184
  },
185
  {
186
   "cell_type": "code",
187
   "execution_count": 5,
188
   "metadata": {},
189
   "outputs": [
190
    {
191
     "name": "stdout",
192
     "output_type": "stream",
193
     "text": [
194
      "File edited\n"
195
     ]
196
    }
197
   ],
198
   "source": [
199
    "with open('./USPSTF_recommendations.txt', 'w') as f:\n",
200
    "    f.write('------------ COMPARISON WITH USPSTF ON PLCO------------ \\n \\n')\n",
201
    "    f.write(\"Pre-processed PLCO size: \" +str(len(plco)) + '\\n')\n",
202
    "    f.write(\"Pre-processed PLCO with lung cancer: \" + str(len(plco[plco[\"lung_cancer\"]==1])) + '\\n')\n",
203
    "    f.write(\"Patients from PLCO who fit into US recommendation: \"+ str(len(plco_criteria))+ '\\n')\n",
204
    "    f.write(\"Patients from PLCO who fit into US recommendation with lung cancer: \"+ str(len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n",
205
    "    f.write(\"------- USPSTF RECOMMENDATION ON PLCO -------- \\n\")\n",
206
    "    f.write(\"TP : \" + str(TP_plco) + '\\n')\n",
207
    "    f.write(\"FN : \" + str(FN_plco) + '\\n')\n",
208
    "    f.write(\"TN : \" + str(TN_plco) + '\\n')\n",
209
    "    f.write(\"FP : \" + str(FP_plco) + '\\n')\n",
210
    "    f.write(\"Precision : \" +  str(round(TP_plco/(TP_plco+FP_plco),3)) + '\\n')\n",
211
    "    f.write(\"Recall : \" + str(round(TP_plco/(TP_plco+FN_plco),3)) + '\\n\\n\\n')\n",
212
    "    f.write('------------ COMPARISON WITH USPSTF ON NLST------------ \\n \\n')\n",
213
    "    f.write(\"Pre-processed NLST size: \" +str(len(nlst)) + '\\n')\n",
214
    "    f.write(\"Pre-processed NLST with lung cancer: \" + str(len(nlst[nlst[\"lung_cancer\"]==1])) + '\\n')\n",
215
    "    f.write(\"Patients from NLST who fit into US recommendation: \"+ str(len(nlst_criteria))+ '\\n')\n",
216
    "    f.write(\"Patients from NLST who fit into US recommendation with lung cancer: \"+ str(len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n",
217
    "    f.write(\"------- USPSTF RECOMMENDATION ON NLST -------- \\n\")\n",
218
    "    f.write(\"TP : \" + str(TP_nlst) + '\\n')\n",
219
    "    f.write(\"FN : \" + str(FN_nlst) + '\\n')\n",
220
    "    f.write(\"TN : \" + str(TN_nlst) + '\\n')\n",
221
    "    f.write(\"FP : \" + str(FP_nlst) + '\\n')\n",
222
    "    f.write(\"Precision : \" +  str(round(TP_nlst/(TP_nlst+FP_nlst),3)) + '\\n')\n",
223
    "    f.write(\"Recall : \" + str(round(TP_nlst/(TP_nlst+FN_nlst),3)) + '\\n\\n\\n')\n",
224
    "print(\"File edited\")"
225
   ]
226
  }
227
 ],
228
 "metadata": {
229
  "kernelspec": {
230
   "display_name": ".venv",
231
   "language": "python",
232
   "name": "python3"
233
  },
234
  "language_info": {
235
   "codemirror_mode": {
236
    "name": "ipython",
237
    "version": 3
238
   },
239
   "file_extension": ".py",
240
   "mimetype": "text/x-python",
241
   "name": "python",
242
   "nbconvert_exporter": "python",
243
   "pygments_lexer": "ipython3",
244
   "version": "3.8.9"
245
  },
246
  "orig_nbformat": 4
247
 },
248
 "nbformat": 4,
249
 "nbformat_minor": 2
250
}