--- a +++ b/benchmark/description2icd10.py @@ -0,0 +1,72 @@ +''' +icd code maps to description +input: "icdcode/ICD-10-Medical-Diagnosis-Codes.pdf" +output: "icdcode/description2icd10.pkl" + +''' + + + +import icd10, PyPDF2, pickle, os, csv + +# def code2description(code): +# # code = "A1803" +# code = icd10.find(code) +# description = code.description +# return description + +# def extract_icdcode(file, pkl_file): +# f = open(file, 'rb') +# description2icd = dict() +# fileReader = PyPDF2.PdfFileReader(file) +# for pagenum in range(fileReader.numPages): +# pageObj = fileReader.getPage(pagenum) +# text = pageObj.extractText() +# text = text.split('\n') +# text = list(filter(lambda x:len(x.strip())>0, text)) +# if pagenum == 0: +# text = list(filter(lambda x:x[0]=='A', text)) +# print(text) +# for i in text: +# icd = i.split()[0] +# description = ' '.join(i.strip().split()[1:]).lower() +# description2icd[description] = icd +# # print(description, icd) +# print("code number is", str(len(description2icd))) +# # code number is 76331 +# pickle.dump(description2icd, open(pkl_file, 'wb')) + + + +### csv_file = 'icdcode/icd_10_direct_mapping.csv' +def extract_icdcode(csv_file, pkl_file): + with open(csv_file, 'r') as csvfile: + rows = list(csv.reader(csvfile, delimiter=',')) + description2icd = dict() + for row in rows: + icd = row[0] + description = row[1] + print(icd) + description2icd[description] = icd + print("code number is", str(len(description2icd))) + pickle.dump(description2icd, open(pkl_file, 'wb')) + + + + +if __name__ == "__main__": + # file = "icdcode/ICD-10-Medical-Diagnosis-Codes.pdf" + csv_file = 'icdcode/icd_10_direct_mapping.csv' + pkl_file = "icdcode/description2icd10.pkl" + if not os.path.exists(pkl_file): + extract_icdcode(csv_file, pkl_file) + description2icd10 = pickle.load(open(pkl_file, 'rb')) + # for description, icd in description2icd10.items(): + # #if len(description.split())==1: + # print(description, " --> ", icd) + + + + + +