--- a +++ b/notebooks/USPSTF_recommendations.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# USPSTF recommendations notebook\n", + "\n", + "P. Benveniste $^1$, J. Alberge $^1$\n", + "\n", + "$^1$ Ecole Normale Supérieure Paris-Saclay\n", + "\n", + "In this Notebook, we look at the results of the USPSTF recommendations on PLCO and NLST. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#Import of the librairies\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from tabulate import tabulate" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now import both datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(55161, 10)\n", + "(48595, 10)\n" + ] + } + ], + "source": [ + "#Loading of both datasets\n", + "plco_file = './preprocessed_plco.csv'\n", + "plco = pd.read_csv(plco_file)\n", + "nlst_file = './preprocessed_nlst.csv'\n", + "nlst = pd.read_csv(nlst_file)\n", + "\n", + "total_plco = len(plco)\n", + "print(plco.shape)\n", + "total_nlst = len(nlst)\n", + "print(nlst.shape)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### US RECOMMENDATION TOOL\n", + "\n", + "Now we look into the USPSTF recommendation tool on PLCO and NLST." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pre-processed PLCO size: 55161\n", + "Pre-processed PLCO with lung cancer: 2752\n", + "Patients from PLCO who fit into US recommendation: 22609\n", + "Patients from PLCO who fit into US recommendation with lung cancer: 2105\n", + "------- USPSTF RECOMMENDATION ON PLCO --------\n", + "TP : 2105\n", + "FN : 647\n", + "TN : 31905\n", + "FP : 20504\n", + "Precision : 0.093\n", + "Recall : 0.765\n" + ] + } + ], + "source": [ + "print(\"Pre-processed PLCO size:\", len(plco))\n", + "print(\"Pre-processed PLCO with lung cancer:\", len(plco[plco[\"lung_cancer\"]==1]))\n", + "\n", + "plco_criteria = plco.copy()\n", + "plco_criteria = plco_criteria[plco_criteria[\"age\"]>=50]\n", + "plco_criteria = plco_criteria[plco_criteria[\"age\"]<=80]\n", + "plco_criteria = plco_criteria[plco_criteria[\"pack_years\"]>=20]\n", + "plco_criteria = plco_criteria[ (plco_criteria[\"cig_stat\"]==1) | (plco_criteria[\"age\"] - plco_criteria[\"ssmokea_f\"] <=15) ]\n", + "\n", + "print(\"Patients from PLCO who fit into US recommendation:\", len(plco_criteria))\n", + "print(\"Patients from PLCO who fit into US recommendation with lung cancer:\", len(plco_criteria[plco_criteria[\"lung_cancer\"]==1]))\n", + "\n", + "TP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])\n", + "FN_plco = len(plco[plco[\"lung_cancer\"]==1])-TP_plco\n", + "TN_plco = len(plco[plco[\"lung_cancer\"]==0]) - len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n", + "FP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n", + "\n", + "print(\"------- USPSTF RECOMMENDATION ON PLCO --------\")\n", + "print(\"TP : \", TP_plco)\n", + "print(\"FN : \", FN_plco)\n", + "print(\"TN : \", TN_plco)\n", + "print(\"FP : \", FP_plco)\n", + "print(\"Precision : \", round(TP_plco/(TP_plco+FP_plco),3))\n", + "print(\"Recall : \", round(TP_plco/(TP_plco+FN_plco),3) )" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pre-processed NLST size: 48595\n", + "Pre-processed NLST with cancer: 1511\n", + "Patients from NLST who fit into US recommendation: 48034\n", + "Patients from NLST who fit into US recommendation with cancer: 1495\n", + "------- USPSTF RECOMMENDATION ON NLST --------\n", + "TP : 1495\n", + "FN : 16\n", + "TN : 545\n", + "FP : 46539\n", + "Precision : 0.031\n", + "Recall : 0.989\n" + ] + } + ], + "source": [ + "print(\"Pre-processed NLST size:\", len(nlst))\n", + "print(\"Pre-processed NLST with cancer:\", len(nlst[nlst[\"lung_cancer\"]==1]))\n", + "\n", + "nlst_criteria = nlst.copy()\n", + "nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]>=50]\n", + "nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]<=80]\n", + "nlst_criteria = nlst_criteria[nlst_criteria[\"pack_years\"]>=20]\n", + "nlst_criteria = nlst_criteria[ (nlst_criteria[\"cig_stat\"]==1) | (nlst_criteria[\"age\"] - nlst_criteria[\"ssmokea_f\"] <=15) ]\n", + "\n", + "print(\"Patients from NLST who fit into US recommendation:\", len(nlst_criteria))\n", + "print(\"Patients from NLST who fit into US recommendation with cancer:\", len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1]))\n", + "\n", + "TP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])\n", + "FN_nlst = len(nlst[nlst[\"lung_cancer\"]==1])-TP_nlst\n", + "TN_nlst = len(nlst[nlst[\"lung_cancer\"]==0]) - len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n", + "FP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n", + "\n", + "print(\"------- USPSTF RECOMMENDATION ON NLST --------\")\n", + "print(\"TP : \", TP_nlst)\n", + "print(\"FN : \", FN_nlst)\n", + "print(\"TN : \", TN_nlst)\n", + "print(\"FP : \", FP_nlst)\n", + "print(\"Precision : \", round(TP_nlst/(TP_nlst+FP_nlst),3))\n", + "print(\"Recall : \", round(TP_nlst/(TP_nlst+FN_nlst),3) )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Saving a txt file\n", + "\n", + "Now we write a text file to concatenate these analyses. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File edited\n" + ] + } + ], + "source": [ + "with open('./USPSTF_recommendations.txt', 'w') as f:\n", + " f.write('------------ COMPARISON WITH USPSTF ON PLCO------------ \\n \\n')\n", + " f.write(\"Pre-processed PLCO size: \" +str(len(plco)) + '\\n')\n", + " f.write(\"Pre-processed PLCO with lung cancer: \" + str(len(plco[plco[\"lung_cancer\"]==1])) + '\\n')\n", + " f.write(\"Patients from PLCO who fit into US recommendation: \"+ str(len(plco_criteria))+ '\\n')\n", + " f.write(\"Patients from PLCO who fit into US recommendation with lung cancer: \"+ str(len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n", + " f.write(\"------- USPSTF RECOMMENDATION ON PLCO -------- \\n\")\n", + " f.write(\"TP : \" + str(TP_plco) + '\\n')\n", + " f.write(\"FN : \" + str(FN_plco) + '\\n')\n", + " f.write(\"TN : \" + str(TN_plco) + '\\n')\n", + " f.write(\"FP : \" + str(FP_plco) + '\\n')\n", + " f.write(\"Precision : \" + str(round(TP_plco/(TP_plco+FP_plco),3)) + '\\n')\n", + " f.write(\"Recall : \" + str(round(TP_plco/(TP_plco+FN_plco),3)) + '\\n\\n\\n')\n", + " f.write('------------ COMPARISON WITH USPSTF ON NLST------------ \\n \\n')\n", + " f.write(\"Pre-processed NLST size: \" +str(len(nlst)) + '\\n')\n", + " f.write(\"Pre-processed NLST with lung cancer: \" + str(len(nlst[nlst[\"lung_cancer\"]==1])) + '\\n')\n", + " f.write(\"Patients from NLST who fit into US recommendation: \"+ str(len(nlst_criteria))+ '\\n')\n", + " f.write(\"Patients from NLST who fit into US recommendation with lung cancer: \"+ str(len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n", + " f.write(\"------- USPSTF RECOMMENDATION ON NLST -------- \\n\")\n", + " f.write(\"TP : \" + str(TP_nlst) + '\\n')\n", + " f.write(\"FN : \" + str(FN_nlst) + '\\n')\n", + " f.write(\"TN : \" + str(TN_nlst) + '\\n')\n", + " f.write(\"FP : \" + str(FP_nlst) + '\\n')\n", + " f.write(\"Precision : \" + str(round(TP_nlst/(TP_nlst+FP_nlst),3)) + '\\n')\n", + " f.write(\"Recall : \" + str(round(TP_nlst/(TP_nlst+FN_nlst),3)) + '\\n\\n\\n')\n", + "print(\"File edited\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}