Switch to unified view

a b/JSONparse/clinicalTrailsAPI.py
1
import json
2
import sys
3
import urllib.request
4
import urllib
5
import urllib.error
6
7
8
9
def getJSON(nct_id):
10
    try:
11
        with urllib.request.urlopen("https://clinicaltrialsapi.cancer.gov/v1/clinical-trial/" + nct_id) as url:
12
            try:
13
                return json.loads(url.read().decode())
14
            except urllib.error.HTTPError as e:
15
                return None
16
    except urllib.error.HTTPError as e:
17
        return None
18
19
20
def getEligabliltyUnstructured(jsonFile):
21
    unstructed=[]
22
    for criteria in jsonFile['eligibility']['unstructured']:
23
        unstructed.append(str(criteria["inclusion_indicator"])+":"+criteria["description"].replace("\n","").replace("\r","").strip())
24
    return("\t".join(unstructed))
25
26
def getEligabliltyStructured(jsonFile):
27
    headers=getEligabliltyStructuredHeader()
28
    structured=[]
29
    for h in headers:
30
        if 'eligibility' in jsonFile:
31
            if 'structured' in jsonFile['eligibility']:
32
                for k in jsonFile['eligibility']['structured'].keys():
33
                    if k not in headers:
34
                        print (k)
35
                if h in jsonFile['eligibility']['structured']:
36
                    structured.append(str(jsonFile['eligibility']['structured'][h]))
37
                else:
38
                    structured.append("None")
39
            else:
40
                structured.append("None")
41
        else:
42
            structured.append("None")
43
    return structured
44
45
def getEligabliltyStructuredHeader():
46
    return ["gender","max_age", "max_age_in_years", "max_age_number", "max_age_unit", "min_age","min_age_in_years","min_age_number","min_age_unit"]
47
48
def printStructedEligablility(in_file):
49
    print ("nct_id\t"+"\t".join(getEligabliltyStructuredHeader()))
50
    for line in in_file:
51
        if line.strip()=="":
52
            break
53
        nct_id=line.split("\t")[0].strip()
54
        jsonToParse=getJSON(nct_id)
55
        if jsonToParse is None:
56
            print (nct_id+"\t404ERROR")
57
        else:
58
            print (nct_id+"\t"+"\t".join(getEligabliltyStructured(getJSON(nct_id))))
59
    return
60
61
def main():
62
    printUnstructed=True
63
    with open(sys.argv[1], encoding="utf8", errors='ignore') as in_file:
64
        if printUnstructed:
65
            for line in in_file:
66
                if line.strip()=="":
67
                    break
68
                nct_id=line.split("\t")[0].strip()
69
                if nct_id=="nci_id":
70
                    continue
71
                jsonToParse=getJSON(nct_id)
72
                if jsonToParse is None:
73
                    print (nct_id+"\t404ERROR")
74
                else:
75
                    print (nct_id+"\t"+getEligabliltyUnstructured(jsonToParse))
76
77
78
79
80
81
82
83
84
85
if __name__ == '__main__':
86
    main()
87
    sys.stdout.flush()
88
    exit()