|
a |
|
b/JSONparse/clinicalTrailsAPI.py |
|
|
1 |
import json |
|
|
2 |
import sys |
|
|
3 |
import urllib.request |
|
|
4 |
import urllib |
|
|
5 |
import urllib.error |
|
|
6 |
|
|
|
7 |
|
|
|
8 |
|
|
|
9 |
def getJSON(nct_id): |
|
|
10 |
try: |
|
|
11 |
with urllib.request.urlopen("https://clinicaltrialsapi.cancer.gov/v1/clinical-trial/" + nct_id) as url: |
|
|
12 |
try: |
|
|
13 |
return json.loads(url.read().decode()) |
|
|
14 |
except urllib.error.HTTPError as e: |
|
|
15 |
return None |
|
|
16 |
except urllib.error.HTTPError as e: |
|
|
17 |
return None |
|
|
18 |
|
|
|
19 |
|
|
|
20 |
def getEligabliltyUnstructured(jsonFile): |
|
|
21 |
unstructed=[] |
|
|
22 |
for criteria in jsonFile['eligibility']['unstructured']: |
|
|
23 |
unstructed.append(str(criteria["inclusion_indicator"])+":"+criteria["description"].replace("\n","").replace("\r","").strip()) |
|
|
24 |
return("\t".join(unstructed)) |
|
|
25 |
|
|
|
26 |
def getEligabliltyStructured(jsonFile): |
|
|
27 |
headers=getEligabliltyStructuredHeader() |
|
|
28 |
structured=[] |
|
|
29 |
for h in headers: |
|
|
30 |
if 'eligibility' in jsonFile: |
|
|
31 |
if 'structured' in jsonFile['eligibility']: |
|
|
32 |
for k in jsonFile['eligibility']['structured'].keys(): |
|
|
33 |
if k not in headers: |
|
|
34 |
print (k) |
|
|
35 |
if h in jsonFile['eligibility']['structured']: |
|
|
36 |
structured.append(str(jsonFile['eligibility']['structured'][h])) |
|
|
37 |
else: |
|
|
38 |
structured.append("None") |
|
|
39 |
else: |
|
|
40 |
structured.append("None") |
|
|
41 |
else: |
|
|
42 |
structured.append("None") |
|
|
43 |
return structured |
|
|
44 |
|
|
|
45 |
def getEligabliltyStructuredHeader(): |
|
|
46 |
return ["gender","max_age", "max_age_in_years", "max_age_number", "max_age_unit", "min_age","min_age_in_years","min_age_number","min_age_unit"] |
|
|
47 |
|
|
|
48 |
def printStructedEligablility(in_file): |
|
|
49 |
print ("nct_id\t"+"\t".join(getEligabliltyStructuredHeader())) |
|
|
50 |
for line in in_file: |
|
|
51 |
if line.strip()=="": |
|
|
52 |
break |
|
|
53 |
nct_id=line.split("\t")[0].strip() |
|
|
54 |
jsonToParse=getJSON(nct_id) |
|
|
55 |
if jsonToParse is None: |
|
|
56 |
print (nct_id+"\t404ERROR") |
|
|
57 |
else: |
|
|
58 |
print (nct_id+"\t"+"\t".join(getEligabliltyStructured(getJSON(nct_id)))) |
|
|
59 |
return |
|
|
60 |
|
|
|
61 |
def main(): |
|
|
62 |
printUnstructed=True |
|
|
63 |
with open(sys.argv[1], encoding="utf8", errors='ignore') as in_file: |
|
|
64 |
if printUnstructed: |
|
|
65 |
for line in in_file: |
|
|
66 |
if line.strip()=="": |
|
|
67 |
break |
|
|
68 |
nct_id=line.split("\t")[0].strip() |
|
|
69 |
if nct_id=="nci_id": |
|
|
70 |
continue |
|
|
71 |
jsonToParse=getJSON(nct_id) |
|
|
72 |
if jsonToParse is None: |
|
|
73 |
print (nct_id+"\t404ERROR") |
|
|
74 |
else: |
|
|
75 |
print (nct_id+"\t"+getEligabliltyUnstructured(jsonToParse)) |
|
|
76 |
|
|
|
77 |
|
|
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
if __name__ == '__main__': |
|
|
86 |
main() |
|
|
87 |
sys.stdout.flush() |
|
|
88 |
exit() |