251 lines (250 with data), 8.9 kB
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# USPSTF recommendations notebook\n",
"\n",
"P. Benveniste $^1$, J. Alberge $^1$\n",
"\n",
"$^1$ Ecole Normale Supérieure Paris-Saclay\n",
"\n",
"In this Notebook, we look at the results of the USPSTF recommendations on PLCO and NLST. "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#Import of the librairies\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from tabulate import tabulate"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"We now import both datasets."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(55161, 10)\n",
"(48595, 10)\n"
]
}
],
"source": [
"#Loading of both datasets\n",
"plco_file = './preprocessed_plco.csv'\n",
"plco = pd.read_csv(plco_file)\n",
"nlst_file = './preprocessed_nlst.csv'\n",
"nlst = pd.read_csv(nlst_file)\n",
"\n",
"total_plco = len(plco)\n",
"print(plco.shape)\n",
"total_nlst = len(nlst)\n",
"print(nlst.shape)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"##### US RECOMMENDATION TOOL\n",
"\n",
"Now we look into the USPSTF recommendation tool on PLCO and NLST."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pre-processed PLCO size: 55161\n",
"Pre-processed PLCO with lung cancer: 2752\n",
"Patients from PLCO who fit into US recommendation: 22609\n",
"Patients from PLCO who fit into US recommendation with lung cancer: 2105\n",
"------- USPSTF RECOMMENDATION ON PLCO --------\n",
"TP : 2105\n",
"FN : 647\n",
"TN : 31905\n",
"FP : 20504\n",
"Precision : 0.093\n",
"Recall : 0.765\n"
]
}
],
"source": [
"print(\"Pre-processed PLCO size:\", len(plco))\n",
"print(\"Pre-processed PLCO with lung cancer:\", len(plco[plco[\"lung_cancer\"]==1]))\n",
"\n",
"plco_criteria = plco.copy()\n",
"plco_criteria = plco_criteria[plco_criteria[\"age\"]>=50]\n",
"plco_criteria = plco_criteria[plco_criteria[\"age\"]<=80]\n",
"plco_criteria = plco_criteria[plco_criteria[\"pack_years\"]>=20]\n",
"plco_criteria = plco_criteria[ (plco_criteria[\"cig_stat\"]==1) | (plco_criteria[\"age\"] - plco_criteria[\"ssmokea_f\"] <=15) ]\n",
"\n",
"print(\"Patients from PLCO who fit into US recommendation:\", len(plco_criteria))\n",
"print(\"Patients from PLCO who fit into US recommendation with lung cancer:\", len(plco_criteria[plco_criteria[\"lung_cancer\"]==1]))\n",
"\n",
"TP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])\n",
"FN_plco = len(plco[plco[\"lung_cancer\"]==1])-TP_plco\n",
"TN_plco = len(plco[plco[\"lung_cancer\"]==0]) - len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n",
"FP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n",
"\n",
"print(\"------- USPSTF RECOMMENDATION ON PLCO --------\")\n",
"print(\"TP : \", TP_plco)\n",
"print(\"FN : \", FN_plco)\n",
"print(\"TN : \", TN_plco)\n",
"print(\"FP : \", FP_plco)\n",
"print(\"Precision : \", round(TP_plco/(TP_plco+FP_plco),3))\n",
"print(\"Recall : \", round(TP_plco/(TP_plco+FN_plco),3) )"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pre-processed NLST size: 48595\n",
"Pre-processed NLST with cancer: 1511\n",
"Patients from NLST who fit into US recommendation: 48034\n",
"Patients from NLST who fit into US recommendation with cancer: 1495\n",
"------- USPSTF RECOMMENDATION ON NLST --------\n",
"TP : 1495\n",
"FN : 16\n",
"TN : 545\n",
"FP : 46539\n",
"Precision : 0.031\n",
"Recall : 0.989\n"
]
}
],
"source": [
"print(\"Pre-processed NLST size:\", len(nlst))\n",
"print(\"Pre-processed NLST with cancer:\", len(nlst[nlst[\"lung_cancer\"]==1]))\n",
"\n",
"nlst_criteria = nlst.copy()\n",
"nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]>=50]\n",
"nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]<=80]\n",
"nlst_criteria = nlst_criteria[nlst_criteria[\"pack_years\"]>=20]\n",
"nlst_criteria = nlst_criteria[ (nlst_criteria[\"cig_stat\"]==1) | (nlst_criteria[\"age\"] - nlst_criteria[\"ssmokea_f\"] <=15) ]\n",
"\n",
"print(\"Patients from NLST who fit into US recommendation:\", len(nlst_criteria))\n",
"print(\"Patients from NLST who fit into US recommendation with cancer:\", len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1]))\n",
"\n",
"TP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])\n",
"FN_nlst = len(nlst[nlst[\"lung_cancer\"]==1])-TP_nlst\n",
"TN_nlst = len(nlst[nlst[\"lung_cancer\"]==0]) - len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n",
"FP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n",
"\n",
"print(\"------- USPSTF RECOMMENDATION ON NLST --------\")\n",
"print(\"TP : \", TP_nlst)\n",
"print(\"FN : \", FN_nlst)\n",
"print(\"TN : \", TN_nlst)\n",
"print(\"FP : \", FP_nlst)\n",
"print(\"Precision : \", round(TP_nlst/(TP_nlst+FP_nlst),3))\n",
"print(\"Recall : \", round(TP_nlst/(TP_nlst+FN_nlst),3) )"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Saving a txt file\n",
"\n",
"Now we write a text file to concatenate these analyses. "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File edited\n"
]
}
],
"source": [
"with open('./USPSTF_recommendations.txt', 'w') as f:\n",
" f.write('------------ COMPARISON WITH USPSTF ON PLCO------------ \\n \\n')\n",
" f.write(\"Pre-processed PLCO size: \" +str(len(plco)) + '\\n')\n",
" f.write(\"Pre-processed PLCO with lung cancer: \" + str(len(plco[plco[\"lung_cancer\"]==1])) + '\\n')\n",
" f.write(\"Patients from PLCO who fit into US recommendation: \"+ str(len(plco_criteria))+ '\\n')\n",
" f.write(\"Patients from PLCO who fit into US recommendation with lung cancer: \"+ str(len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n",
" f.write(\"------- USPSTF RECOMMENDATION ON PLCO -------- \\n\")\n",
" f.write(\"TP : \" + str(TP_plco) + '\\n')\n",
" f.write(\"FN : \" + str(FN_plco) + '\\n')\n",
" f.write(\"TN : \" + str(TN_plco) + '\\n')\n",
" f.write(\"FP : \" + str(FP_plco) + '\\n')\n",
" f.write(\"Precision : \" + str(round(TP_plco/(TP_plco+FP_plco),3)) + '\\n')\n",
" f.write(\"Recall : \" + str(round(TP_plco/(TP_plco+FN_plco),3)) + '\\n\\n\\n')\n",
" f.write('------------ COMPARISON WITH USPSTF ON NLST------------ \\n \\n')\n",
" f.write(\"Pre-processed NLST size: \" +str(len(nlst)) + '\\n')\n",
" f.write(\"Pre-processed NLST with lung cancer: \" + str(len(nlst[nlst[\"lung_cancer\"]==1])) + '\\n')\n",
" f.write(\"Patients from NLST who fit into US recommendation: \"+ str(len(nlst_criteria))+ '\\n')\n",
" f.write(\"Patients from NLST who fit into US recommendation with lung cancer: \"+ str(len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n",
" f.write(\"------- USPSTF RECOMMENDATION ON NLST -------- \\n\")\n",
" f.write(\"TP : \" + str(TP_nlst) + '\\n')\n",
" f.write(\"FN : \" + str(FN_nlst) + '\\n')\n",
" f.write(\"TN : \" + str(TN_nlst) + '\\n')\n",
" f.write(\"FP : \" + str(FP_nlst) + '\\n')\n",
" f.write(\"Precision : \" + str(round(TP_nlst/(TP_nlst+FP_nlst),3)) + '\\n')\n",
" f.write(\"Recall : \" + str(round(TP_nlst/(TP_nlst+FN_nlst),3)) + '\\n\\n\\n')\n",
"print(\"File edited\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}