Uncertainty-Quantificatio / Git / [bc9e98] /benchmark/description2icd10.py

Models:

joseph-gordon/

Uncertainty-Quantificatio

Downloads: 1

[bc9e98]: / benchmark / description2icd10.py

History

Download this file

73 lines (53 with data), 2.0 kB

'''
icd code maps to description 
input: "icdcode/ICD-10-Medical-Diagnosis-Codes.pdf"
output:  "icdcode/description2icd10.pkl" 

'''



import icd10, PyPDF2, pickle, os, csv 

# def code2description(code):
# 	# code = "A1803"
# 	code = icd10.find(code)
# 	description = code.description 
# 	return description 

# def extract_icdcode(file, pkl_file):
# 	f = open(file, 'rb')
# 	description2icd = dict()
# 	fileReader = PyPDF2.PdfFileReader(file)
# 	for pagenum in range(fileReader.numPages):
# 		pageObj = fileReader.getPage(pagenum)
# 		text = pageObj.extractText()
# 		text = text.split('\n')
# 		text = list(filter(lambda x:len(x.strip())>0, text))
# 		if pagenum == 0:
# 			text = list(filter(lambda x:x[0]=='A', text))
# 			print(text)
# 		for i in text:
# 			icd = i.split()[0]
# 			description = ' '.join(i.strip().split()[1:]).lower()
# 			description2icd[description] = icd 
# 			# print(description, icd)
# 	print("code number is", str(len(description2icd)))
# 	# code number is 76331
# 	pickle.dump(description2icd, open(pkl_file, 'wb'))



###  csv_file = 'icdcode/icd_10_direct_mapping.csv'
def extract_icdcode(csv_file, pkl_file):
	with open(csv_file, 'r') as csvfile:
		rows = list(csv.reader(csvfile, delimiter=','))
	description2icd = dict() 
	for row in rows:
		icd = row[0]
		description = row[1]
		print(icd)
		description2icd[description] = icd
	print("code number is", str(len(description2icd)))	
	pickle.dump(description2icd, open(pkl_file, 'wb'))




if __name__ == "__main__":
	# file = "icdcode/ICD-10-Medical-Diagnosis-Codes.pdf"
	csv_file = 'icdcode/icd_10_direct_mapping.csv'
	pkl_file = "icdcode/description2icd10.pkl"
	if not os.path.exists(pkl_file):
		extract_icdcode(csv_file, pkl_file)
	description2icd10 = pickle.load(open(pkl_file, 'rb'))
	# for description, icd in description2icd10.items():
	# 	#if len(description.split())==1:
	# 	print(description, "  -->  ", icd)