[bc9e98]: / benchmark / disease2icd.py

Download this file

59 lines (35 with data), 1.1 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
'''
input:
"data/drugbank_trial_conditions.csv"
process:
disease maps to icd? code
output:
"data/disease2icd.pkl"
'''
import csv, pickle
from collections import defaultdict
def disease2icd_func():
file = "data/drugbank_trial_conditions.csv"
with open(file, 'r') as csvfile:
reader = list(csv.reader(csvfile, delimiter = ','))[1:]
disease2icdcode = defaultdict(set)
disease2icdcode2 = dict()
for row in reader:
diseasename1 = row[2].lower()
diseasename2 = row[6].lower()
icd10code = row[8]
if icd10code.strip() == '':
continue
disease2icdcode[diseasename1].add(icd10code)
disease2icdcode[diseasename2].add(icd10code)
for disease, icdcode in disease2icdcode.items():
assert len(icdcode)==1
disease2icdcode2[disease] = list(icdcode)[0]
return disease2icdcode2
### disease -> icd code
if __name__ == "__main__":
disease2icdcode = disease2icd_func()
pickle.dump(disease2icdcode, open("data/disease2icd.pkl", 'wb'))
for disease, icd in disease2icdcode.items():
if len(disease.split())==1:
print(disease, icd)