Switch to unified view

a b/benchmark/description2icd10.py
1
'''
2
icd code maps to description 
3
input: "icdcode/ICD-10-Medical-Diagnosis-Codes.pdf"
4
output:  "icdcode/description2icd10.pkl" 
5
6
'''
7
8
9
10
import icd10, PyPDF2, pickle, os, csv 
11
12
# def code2description(code):
13
#   # code = "A1803"
14
#   code = icd10.find(code)
15
#   description = code.description 
16
#   return description 
17
18
# def extract_icdcode(file, pkl_file):
19
#   f = open(file, 'rb')
20
#   description2icd = dict()
21
#   fileReader = PyPDF2.PdfFileReader(file)
22
#   for pagenum in range(fileReader.numPages):
23
#       pageObj = fileReader.getPage(pagenum)
24
#       text = pageObj.extractText()
25
#       text = text.split('\n')
26
#       text = list(filter(lambda x:len(x.strip())>0, text))
27
#       if pagenum == 0:
28
#           text = list(filter(lambda x:x[0]=='A', text))
29
#           print(text)
30
#       for i in text:
31
#           icd = i.split()[0]
32
#           description = ' '.join(i.strip().split()[1:]).lower()
33
#           description2icd[description] = icd 
34
#           # print(description, icd)
35
#   print("code number is", str(len(description2icd)))
36
#   # code number is 76331
37
#   pickle.dump(description2icd, open(pkl_file, 'wb'))
38
39
40
41
###  csv_file = 'icdcode/icd_10_direct_mapping.csv'
42
def extract_icdcode(csv_file, pkl_file):
43
    with open(csv_file, 'r') as csvfile:
44
        rows = list(csv.reader(csvfile, delimiter=','))
45
    description2icd = dict() 
46
    for row in rows:
47
        icd = row[0]
48
        description = row[1]
49
        print(icd)
50
        description2icd[description] = icd
51
    print("code number is", str(len(description2icd)))  
52
    pickle.dump(description2icd, open(pkl_file, 'wb'))
53
54
55
56
57
if __name__ == "__main__":
58
    # file = "icdcode/ICD-10-Medical-Diagnosis-Codes.pdf"
59
    csv_file = 'icdcode/icd_10_direct_mapping.csv'
60
    pkl_file = "icdcode/description2icd10.pkl"
61
    if not os.path.exists(pkl_file):
62
        extract_icdcode(csv_file, pkl_file)
63
    description2icd10 = pickle.load(open(pkl_file, 'rb'))
64
    # for description, icd in description2icd10.items():
65
    #   #if len(description.split())==1:
66
    #   print(description, "  -->  ", icd)
67
68
69
70
71
72