Diff of /benchmark/disease2icd.py [000000] .. [bc9e98]

Switch to unified view

a b/benchmark/disease2icd.py
1
'''
2
input:  
3
4
    "data/drugbank_trial_conditions.csv"
5
6
process:
7
    disease maps to icd? code 
8
9
output: 
10
11
    "data/disease2icd.pkl" 
12
13
14
15
'''
16
17
18
19
20
21
22
import csv, pickle 
23
from collections import defaultdict 
24
def disease2icd_func():
25
    file = "data/drugbank_trial_conditions.csv"
26
    with open(file, 'r') as csvfile:
27
        reader = list(csv.reader(csvfile, delimiter = ','))[1:]
28
    disease2icdcode = defaultdict(set)
29
    disease2icdcode2 = dict()
30
    for row in reader:
31
        diseasename1 = row[2].lower()
32
        diseasename2 = row[6].lower()
33
        icd10code = row[8]
34
        if icd10code.strip() == '':
35
            continue 
36
        disease2icdcode[diseasename1].add(icd10code)
37
        disease2icdcode[diseasename2].add(icd10code)
38
    for disease, icdcode in disease2icdcode.items():
39
        assert len(icdcode)==1 
40
        disease2icdcode2[disease] = list(icdcode)[0]
41
    return disease2icdcode2 
42
43
44
### disease -> icd code
45
if __name__ == "__main__":
46
    disease2icdcode = disease2icd_func()
47
    pickle.dump(disease2icdcode, open("data/disease2icd.pkl", 'wb')) 
48
    for disease, icd in disease2icdcode.items():
49
        if len(disease.split())==1:
50
            print(disease, icd)
51
52
53
54
55
56
57
58