[9ab7c1]: / notebooks / USPSTF_recommendations.ipynb

Download this file

251 lines (250 with data), 8.9 kB

{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# USPSTF recommendations notebook\n",
    "\n",
    "P. Benveniste $^1$, J. Alberge $^1$\n",
    "\n",
    "$^1$ Ecole Normale Supérieure Paris-Saclay\n",
    "\n",
    "In this Notebook, we look at the results of the USPSTF recommendations on PLCO and NLST. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Import of the librairies\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from tabulate import tabulate"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We now import both datasets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(55161, 10)\n",
      "(48595, 10)\n"
     ]
    }
   ],
   "source": [
    "#Loading of both datasets\n",
    "plco_file = './preprocessed_plco.csv'\n",
    "plco = pd.read_csv(plco_file)\n",
    "nlst_file = './preprocessed_nlst.csv'\n",
    "nlst = pd.read_csv(nlst_file)\n",
    "\n",
    "total_plco = len(plco)\n",
    "print(plco.shape)\n",
    "total_nlst = len(nlst)\n",
    "print(nlst.shape)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### US RECOMMENDATION TOOL\n",
    "\n",
    "Now we look into the USPSTF recommendation tool on PLCO and NLST."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pre-processed PLCO size: 55161\n",
      "Pre-processed PLCO with lung cancer: 2752\n",
      "Patients from PLCO who fit into US recommendation: 22609\n",
      "Patients from PLCO who fit into US recommendation with lung cancer: 2105\n",
      "------- USPSTF RECOMMENDATION ON PLCO --------\n",
      "TP :  2105\n",
      "FN :  647\n",
      "TN :  31905\n",
      "FP :  20504\n",
      "Precision :  0.093\n",
      "Recall :  0.765\n"
     ]
    }
   ],
   "source": [
    "print(\"Pre-processed PLCO size:\", len(plco))\n",
    "print(\"Pre-processed PLCO with lung cancer:\", len(plco[plco[\"lung_cancer\"]==1]))\n",
    "\n",
    "plco_criteria = plco.copy()\n",
    "plco_criteria = plco_criteria[plco_criteria[\"age\"]>=50]\n",
    "plco_criteria = plco_criteria[plco_criteria[\"age\"]<=80]\n",
    "plco_criteria = plco_criteria[plco_criteria[\"pack_years\"]>=20]\n",
    "plco_criteria = plco_criteria[ (plco_criteria[\"cig_stat\"]==1) | (plco_criteria[\"age\"] - plco_criteria[\"ssmokea_f\"] <=15) ]\n",
    "\n",
    "print(\"Patients from PLCO who fit into US recommendation:\", len(plco_criteria))\n",
    "print(\"Patients from PLCO who fit into US recommendation with lung cancer:\", len(plco_criteria[plco_criteria[\"lung_cancer\"]==1]))\n",
    "\n",
    "TP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])\n",
    "FN_plco = len(plco[plco[\"lung_cancer\"]==1])-TP_plco\n",
    "TN_plco = len(plco[plco[\"lung_cancer\"]==0]) - len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n",
    "FP_plco = len(plco_criteria[plco_criteria[\"lung_cancer\"]==0])\n",
    "\n",
    "print(\"------- USPSTF RECOMMENDATION ON PLCO --------\")\n",
    "print(\"TP : \", TP_plco)\n",
    "print(\"FN : \", FN_plco)\n",
    "print(\"TN : \", TN_plco)\n",
    "print(\"FP : \", FP_plco)\n",
    "print(\"Precision : \",  round(TP_plco/(TP_plco+FP_plco),3))\n",
    "print(\"Recall : \", round(TP_plco/(TP_plco+FN_plco),3) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pre-processed NLST size: 48595\n",
      "Pre-processed NLST with cancer: 1511\n",
      "Patients from NLST who fit into US recommendation: 48034\n",
      "Patients from NLST who fit into US recommendation with cancer: 1495\n",
      "------- USPSTF RECOMMENDATION ON NLST --------\n",
      "TP :  1495\n",
      "FN :  16\n",
      "TN :  545\n",
      "FP :  46539\n",
      "Precision :  0.031\n",
      "Recall :  0.989\n"
     ]
    }
   ],
   "source": [
    "print(\"Pre-processed NLST size:\", len(nlst))\n",
    "print(\"Pre-processed NLST with cancer:\", len(nlst[nlst[\"lung_cancer\"]==1]))\n",
    "\n",
    "nlst_criteria = nlst.copy()\n",
    "nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]>=50]\n",
    "nlst_criteria = nlst_criteria[nlst_criteria[\"age\"]<=80]\n",
    "nlst_criteria = nlst_criteria[nlst_criteria[\"pack_years\"]>=20]\n",
    "nlst_criteria = nlst_criteria[ (nlst_criteria[\"cig_stat\"]==1) | (nlst_criteria[\"age\"] - nlst_criteria[\"ssmokea_f\"] <=15) ]\n",
    "\n",
    "print(\"Patients from NLST who fit into US recommendation:\", len(nlst_criteria))\n",
    "print(\"Patients from NLST who fit into US recommendation with cancer:\", len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1]))\n",
    "\n",
    "TP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])\n",
    "FN_nlst = len(nlst[nlst[\"lung_cancer\"]==1])-TP_nlst\n",
    "TN_nlst = len(nlst[nlst[\"lung_cancer\"]==0]) - len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n",
    "FP_nlst = len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==0])\n",
    "\n",
    "print(\"------- USPSTF RECOMMENDATION ON NLST --------\")\n",
    "print(\"TP : \", TP_nlst)\n",
    "print(\"FN : \", FN_nlst)\n",
    "print(\"TN : \", TN_nlst)\n",
    "print(\"FP : \", FP_nlst)\n",
    "print(\"Precision : \",  round(TP_nlst/(TP_nlst+FP_nlst),3))\n",
    "print(\"Recall : \", round(TP_nlst/(TP_nlst+FN_nlst),3) )"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Saving a txt file\n",
    "\n",
    "Now we write a text file to concatenate these analyses. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File edited\n"
     ]
    }
   ],
   "source": [
    "with open('./USPSTF_recommendations.txt', 'w') as f:\n",
    "    f.write('------------ COMPARISON WITH USPSTF ON PLCO------------ \\n \\n')\n",
    "    f.write(\"Pre-processed PLCO size: \" +str(len(plco)) + '\\n')\n",
    "    f.write(\"Pre-processed PLCO with lung cancer: \" + str(len(plco[plco[\"lung_cancer\"]==1])) + '\\n')\n",
    "    f.write(\"Patients from PLCO who fit into US recommendation: \"+ str(len(plco_criteria))+ '\\n')\n",
    "    f.write(\"Patients from PLCO who fit into US recommendation with lung cancer: \"+ str(len(plco_criteria[plco_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n",
    "    f.write(\"------- USPSTF RECOMMENDATION ON PLCO -------- \\n\")\n",
    "    f.write(\"TP : \" + str(TP_plco) + '\\n')\n",
    "    f.write(\"FN : \" + str(FN_plco) + '\\n')\n",
    "    f.write(\"TN : \" + str(TN_plco) + '\\n')\n",
    "    f.write(\"FP : \" + str(FP_plco) + '\\n')\n",
    "    f.write(\"Precision : \" +  str(round(TP_plco/(TP_plco+FP_plco),3)) + '\\n')\n",
    "    f.write(\"Recall : \" + str(round(TP_plco/(TP_plco+FN_plco),3)) + '\\n\\n\\n')\n",
    "    f.write('------------ COMPARISON WITH USPSTF ON NLST------------ \\n \\n')\n",
    "    f.write(\"Pre-processed NLST size: \" +str(len(nlst)) + '\\n')\n",
    "    f.write(\"Pre-processed NLST with lung cancer: \" + str(len(nlst[nlst[\"lung_cancer\"]==1])) + '\\n')\n",
    "    f.write(\"Patients from NLST who fit into US recommendation: \"+ str(len(nlst_criteria))+ '\\n')\n",
    "    f.write(\"Patients from NLST who fit into US recommendation with lung cancer: \"+ str(len(nlst_criteria[nlst_criteria[\"lung_cancer\"]==1])) + '\\n\\n')\n",
    "    f.write(\"------- USPSTF RECOMMENDATION ON NLST -------- \\n\")\n",
    "    f.write(\"TP : \" + str(TP_nlst) + '\\n')\n",
    "    f.write(\"FN : \" + str(FN_nlst) + '\\n')\n",
    "    f.write(\"TN : \" + str(TN_nlst) + '\\n')\n",
    "    f.write(\"FP : \" + str(FP_nlst) + '\\n')\n",
    "    f.write(\"Precision : \" +  str(round(TP_nlst/(TP_nlst+FP_nlst),3)) + '\\n')\n",
    "    f.write(\"Recall : \" + str(round(TP_nlst/(TP_nlst+FN_nlst),3)) + '\\n\\n\\n')\n",
    "print(\"File edited\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}