# USPSTF recommendations notebook

P. Benveniste $^1$, J. Alberge $^1$

$^1$ Ecole Normale SupÃ©rieure Paris-Saclay

In this Notebook, we look at the results of the USPSTF recommendations on PLCO and NLST. 

In [1]:
#Import of the librairies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate

We now import both datasets.

In [2]:
#Loading of both datasets
plco_file = './preprocessed_plco.csv'
plco = pd.read_csv(plco_file)
nlst_file = './preprocessed_nlst.csv'
nlst = pd.read_csv(nlst_file)

total_plco = len(plco)
print(plco.shape)
total_nlst = len(nlst)
print(nlst.shape)

(55161, 10)
(48595, 10)


##### US RECOMMENDATION TOOL

Now we look into the USPSTF recommendation tool on PLCO and NLST.

In [3]:
print("Pre-processed PLCO size:", len(plco))
print("Pre-processed PLCO with lung cancer:", len(plco[plco["lung_cancer"]==1]))

plco_criteria = plco.copy()
plco_criteria = plco_criteria[plco_criteria["age"]>=50]
plco_criteria = plco_criteria[plco_criteria["age"]<=80]
plco_criteria = plco_criteria[plco_criteria["pack_years"]>=20]
plco_criteria = plco_criteria[ (plco_criteria["cig_stat"]==1) | (plco_criteria["age"] - plco_criteria["ssmokea_f"] <=15) ]

print("Patients from PLCO who fit into US recommendation:", len(plco_criteria))
print("Patients from PLCO who fit into US recommendation with lung cancer:", len(plco_criteria[plco_criteria["lung_cancer"]==1]))

TP_plco = len(plco_criteria[plco_criteria["lung_cancer"]==1])
FN_plco = len(plco[plco["lung_cancer"]==1])-TP_plco
TN_plco = len(plco[plco["lung_cancer"]==0]) - len(plco_criteria[plco_criteria["lung_cancer"]==0])
FP_plco = len(plco_criteria[plco_criteria["lung_cancer"]==0])

print("------- USPSTF RECOMMENDATION ON PLCO --------")
print("TP : ", TP_plco)
print("FN : ", FN_plco)
print("TN : ", TN_plco)
print("FP : ", FP_plco)
print("Precision : ",  round(TP_plco/(TP_plco+FP_plco),3))
print("Recall : ", round(TP_plco/(TP_plco+FN_plco),3) )

Pre-processed PLCO size: 55161
Pre-processed PLCO with lung cancer: 2752
Patients from PLCO who fit into US recommendation: 22609
Patients from PLCO who fit into US recommendation with lung cancer: 2105
------- USPSTF RECOMMENDATION ON PLCO --------
TP :  2105
FN :  647
TN :  31905
FP :  20504
Precision :  0.093
Recall :  0.765


In [4]:
print("Pre-processed NLST size:", len(nlst))
print("Pre-processed NLST with cancer:", len(nlst[nlst["lung_cancer"]==1]))

nlst_criteria = nlst.copy()
nlst_criteria = nlst_criteria[nlst_criteria["age"]>=50]
nlst_criteria = nlst_criteria[nlst_criteria["age"]<=80]
nlst_criteria = nlst_criteria[nlst_criteria["pack_years"]>=20]
nlst_criteria = nlst_criteria[ (nlst_criteria["cig_stat"]==1) | (nlst_criteria["age"] - nlst_criteria["ssmokea_f"] <=15) ]

print("Patients from NLST who fit into US recommendation:", len(nlst_criteria))
print("Patients from NLST who fit into US recommendation with cancer:", len(nlst_criteria[nlst_criteria["lung_cancer"]==1]))

TP_nlst = len(nlst_criteria[nlst_criteria["lung_cancer"]==1])
FN_nlst = len(nlst[nlst["lung_cancer"]==1])-TP_nlst
TN_nlst = len(nlst[nlst["lung_cancer"]==0]) - len(nlst_criteria[nlst_criteria["lung_cancer"]==0])
FP_nlst = len(nlst_criteria[nlst_criteria["lung_cancer"]==0])

print("------- USPSTF RECOMMENDATION ON NLST --------")
print("TP : ", TP_nlst)
print("FN : ", FN_nlst)
print("TN : ", TN_nlst)
print("FP : ", FP_nlst)
print("Precision : ",  round(TP_nlst/(TP_nlst+FP_nlst),3))
print("Recall : ", round(TP_nlst/(TP_nlst+FN_nlst),3) )

Pre-processed NLST size: 48595
Pre-processed NLST with cancer: 1511
Patients from NLST who fit into US recommendation: 48034
Patients from NLST who fit into US recommendation with cancer: 1495
------- USPSTF RECOMMENDATION ON NLST --------
TP :  1495
FN :  16
TN :  545
FP :  46539
Precision :  0.031
Recall :  0.989


### Saving a txt file

Now we write a text file to concatenate these analyses. 

In [5]:
with open('./USPSTF_recommendations.txt', 'w') as f:
    f.write('------------ COMPARISON WITH USPSTF ON PLCO------------ \n \n')
    f.write("Pre-processed PLCO size: " +str(len(plco)) + '\n')
    f.write("Pre-processed PLCO with lung cancer: " + str(len(plco[plco["lung_cancer"]==1])) + '\n')
    f.write("Patients from PLCO who fit into US recommendation: "+ str(len(plco_criteria))+ '\n')
    f.write("Patients from PLCO who fit into US recommendation with lung cancer: "+ str(len(plco_criteria[plco_criteria["lung_cancer"]==1])) + '\n\n')
    f.write("------- USPSTF RECOMMENDATION ON PLCO -------- \n")
    f.write("TP : " + str(TP_plco) + '\n')
    f.write("FN : " + str(FN_plco) + '\n')
    f.write("TN : " + str(TN_plco) + '\n')
    f.write("FP : " + str(FP_plco) + '\n')
    f.write("Precision : " +  str(round(TP_plco/(TP_plco+FP_plco),3)) + '\n')
    f.write("Recall : " + str(round(TP_plco/(TP_plco+FN_plco),3)) + '\n\n\n')
    f.write('------------ COMPARISON WITH USPSTF ON NLST------------ \n \n')
    f.write("Pre-processed NLST size: " +str(len(nlst)) + '\n')
    f.write("Pre-processed NLST with lung cancer: " + str(len(nlst[nlst["lung_cancer"]==1])) + '\n')
    f.write("Patients from NLST who fit into US recommendation: "+ str(len(nlst_criteria))+ '\n')
    f.write("Patients from NLST who fit into US recommendation with lung cancer: "+ str(len(nlst_criteria[nlst_criteria["lung_cancer"]==1])) + '\n\n')
    f.write("------- USPSTF RECOMMENDATION ON NLST -------- \n")
    f.write("TP : " + str(TP_nlst) + '\n')
    f.write("FN : " + str(FN_nlst) + '\n')
    f.write("TN : " + str(TN_nlst) + '\n')
    f.write("FP : " + str(FP_nlst) + '\n')
    f.write("Precision : " +  str(round(TP_nlst/(TP_nlst+FP_nlst),3)) + '\n')
    f.write("Recall : " + str(round(TP_nlst/(TP_nlst+FN_nlst),3)) + '\n\n\n')
print("File edited")

File edited
