|
a |
|
b/utilities.py |
|
|
1 |
#!/usr/bin/python |
|
|
2 |
|
|
|
3 |
""" |
|
|
4 |
Utility functions. |
|
|
5 |
|
|
|
6 |
""" |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
import time |
|
|
10 |
import logging |
|
|
11 |
import requests |
|
|
12 |
import json |
|
|
13 |
from config import settings |
|
|
14 |
from Authentication import Authentication |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
# API-kEY FOR UMLS REST TICKET SERVICES |
|
|
18 |
umls_api = settings['apis']['umls'] |
|
|
19 |
# UMLS REST SERVICES INITIALIZATION OF CLIENT AND TICKET |
|
|
20 |
# GRANTING SERVICE TO BE USED IN ALL CASES |
|
|
21 |
AuthClient = Authentication(umls_api) |
|
|
22 |
tgt = AuthClient.gettgt() |
|
|
23 |
|
|
|
24 |
# To supress some kind of warning?! |
|
|
25 |
logging.getLogger("requests").setLevel(logging.WARNING) |
|
|
26 |
logging.getLogger("urllib3").setLevel(logging.WARNING) |
|
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
# logging.basicConfig( |
|
|
31 |
# format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s", |
|
|
32 |
# handlers=[ |
|
|
33 |
# #logging.FileHandler("%s" % settings['log_path']), |
|
|
34 |
# logging.StreamHandler() |
|
|
35 |
# ]) |
|
|
36 |
# logging.info('lala') |
|
|
37 |
|
|
|
38 |
# # create logger |
|
|
39 |
# logger = logging.getLogger() |
|
|
40 |
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
|
41 |
# ch = logging.StreamHandler() |
|
|
42 |
# ch.setFormatter(formatter) |
|
|
43 |
# logger.addHandler(ch) |
|
|
44 |
|
|
|
45 |
# fh = logging.FileHandler(settings['log_path']) |
|
|
46 |
# fh.setFormatter(formatter) |
|
|
47 |
# logger.addHandler(fh) |
|
|
48 |
|
|
|
49 |
|
|
|
50 |
def get_umls_ticket2(tgt=tgt, AuthClient=AuthClient, apikey=umls_api): |
|
|
51 |
""" |
|
|
52 |
Get a single use ticket for the UMLS REST services. |
|
|
53 |
It is supposed that an Author Client and a Ticket |
|
|
54 |
Granting Service have already been set-up in case |
|
|
55 |
the apikey = None. If an api-key is given, create the |
|
|
56 |
above needed instances and generate a new ticket. |
|
|
57 |
Input: |
|
|
58 |
- apikey: str, |
|
|
59 |
UMLS REST services api-key. Default is None and |
|
|
60 |
the already establised service is used |
|
|
61 |
Output: |
|
|
62 |
- string of the generated ticket |
|
|
63 |
""" |
|
|
64 |
|
|
|
65 |
# Get ticket from the already establised service |
|
|
66 |
if not(tgt) and not(AuthClient): |
|
|
67 |
AuthClient = Authentication(umls_api) |
|
|
68 |
tgt = AuthClient.gettgt() |
|
|
69 |
return AuthClient.getst(tgt) |
|
|
70 |
|
|
|
71 |
|
|
|
72 |
def get_umls_ticket(apikey=None, AuthClient=AuthClient, tgt=tgt): |
|
|
73 |
""" |
|
|
74 |
Get a single use ticket for the UMLS REST services. |
|
|
75 |
It is supposed that an Author Client and a Ticket |
|
|
76 |
Granting Service have already been set-up in case |
|
|
77 |
the apikey = None. If an api-key is given, create the |
|
|
78 |
above needed instances and generate a new ticket. |
|
|
79 |
Input: |
|
|
80 |
- apikey: str, |
|
|
81 |
UMLS REST services api-key. Default is None and |
|
|
82 |
the already establised service is used |
|
|
83 |
Output: |
|
|
84 |
- string of the generated ticket |
|
|
85 |
""" |
|
|
86 |
|
|
|
87 |
# Get ticket from the already establised service |
|
|
88 |
if apikey is None: |
|
|
89 |
return AuthClient.getst(tgt) |
|
|
90 |
else: |
|
|
91 |
# Establish new Client and Ticket granting service |
|
|
92 |
AuthClient = Authentication(apikey) |
|
|
93 |
tgt = AuthClient.gettgt() |
|
|
94 |
return AuthClient.getst(tgt) |
|
|
95 |
|
|
|
96 |
|
|
|
97 |
def time_log(phrase, time_start=None): |
|
|
98 |
""" |
|
|
99 |
A time_logger function so as to print info with time since elapsed if wanted, |
|
|
100 |
alongside with the current logging config. |
|
|
101 |
""" |
|
|
102 |
|
|
|
103 |
# If we want to also print time_elapsed |
|
|
104 |
if time_start: |
|
|
105 |
logging.info('%s in : %.2f seconds.' % (phrase, time.time() - time_start)) |
|
|
106 |
else: |
|
|
107 |
logging.info('%s' % (phrase)) |
|
|
108 |
return 1 |
|
|
109 |
|
|
|
110 |
|
|
|
111 |
def get_concept_from_source(source_id, source, apikey=tgt): |
|
|
112 |
""" |
|
|
113 |
Function that maps an entity from another source to UMLS concepts. |
|
|
114 |
Input: |
|
|
115 |
- source_id: str, |
|
|
116 |
string of the unique id from the source knowledge base |
|
|
117 |
- source: str, |
|
|
118 |
string code-name of the knowledge base, as used by the |
|
|
119 |
UMLS REST services (e.g. drugbank -> DRUGBANK, MESH->MSH) |
|
|
120 |
- apikey: str, |
|
|
121 |
UMLS REST services api-key. Default is None and |
|
|
122 |
the already establised service is used. Check get_umls_ticket |
|
|
123 |
function for details |
|
|
124 |
Output: |
|
|
125 |
- concepts: list, |
|
|
126 |
list of dictionaries representing concepts mapped to the |
|
|
127 |
source_id entity. Each dictionary has keys "label", "cuid", "sem_types" |
|
|
128 |
Check get_concept_from_cui for more details |
|
|
129 |
""" |
|
|
130 |
|
|
|
131 |
ticket = get_umls_ticket2() |
|
|
132 |
#ticket = get_umls_ticket(apikey) |
|
|
133 |
url = "https://uts-ws.nlm.nih.gov/rest/search/current" |
|
|
134 |
passed = False |
|
|
135 |
times = 0 |
|
|
136 |
while not(passed): |
|
|
137 |
params = {'string': source_id, 'sabs': source, 'searchType': 'exact', |
|
|
138 |
'inputType': 'sourceUi', 'ticket': ticket} |
|
|
139 |
r = requests.get(url, params=params) |
|
|
140 |
r.encoding = 'utf-8' |
|
|
141 |
concepts = [] |
|
|
142 |
if r.ok: |
|
|
143 |
items = json.loads(r.text) |
|
|
144 |
jsonData = items["result"] |
|
|
145 |
# Get cuis related to source_id |
|
|
146 |
cuis = [res['ui']for res in jsonData['results']] |
|
|
147 |
# Get concepts from cuis |
|
|
148 |
concepts = [get_concept_from_cui(cui) for cui in cuis if cui != 'NONE'] |
|
|
149 |
passed = True |
|
|
150 |
else: |
|
|
151 |
time_log(r.url) |
|
|
152 |
time_log('Error getting concept from: Source %s | ID: %s' % (source, source_id)) |
|
|
153 |
time_log('~'*25 + ' GETTING NEW TICKET SERVICE' + '~'*24) |
|
|
154 |
ticket = get_umls_ticket2(None, None, umls_api) |
|
|
155 |
times += 1 |
|
|
156 |
if times >= 2: |
|
|
157 |
passed = True |
|
|
158 |
time_log('Error getting concept from: Source %s | ID: %s' % (source, source_id)) |
|
|
159 |
time_log('~'*25 + ' EXITING AFTER TRYING TWICE WITH NEW TICKET ' + '~'*25) |
|
|
160 |
#exit(1) |
|
|
161 |
raise Exception |
|
|
162 |
return concepts |
|
|
163 |
|
|
|
164 |
|
|
|
165 |
def get_concept_from_cui(cui, apikey=None): |
|
|
166 |
""" |
|
|
167 |
Function that fetches a concept's attributes from the corresponding cui. |
|
|
168 |
Input: |
|
|
169 |
- cui: str, |
|
|
170 |
string of cui that will be looked up |
|
|
171 |
- apikey: str, |
|
|
172 |
UMLS REST services api-key. Default is None and |
|
|
173 |
the already establised service is used. Check get_umls_ticket |
|
|
174 |
function for details |
|
|
175 |
Output: |
|
|
176 |
- res: dictionary, |
|
|
177 |
dictionary with the concepts attributes as fetched. Specifically, |
|
|
178 |
"label", "cuid"(cui) and "sem_types"(comma delimited string of |
|
|
179 |
the semantic types us returned) |
|
|
180 |
""" |
|
|
181 |
|
|
|
182 |
ticket = get_umls_ticket2() |
|
|
183 |
#ticket = get_umls_ticket(apikey) |
|
|
184 |
url = "https://uts-ws.nlm.nih.gov/rest/content/current/CUI/" + cui |
|
|
185 |
passed = False |
|
|
186 |
times = 0 |
|
|
187 |
while not(passed): |
|
|
188 |
try: |
|
|
189 |
r = requests.get(url, params={'ticket': ticket}, timeout=120) |
|
|
190 |
passed = True |
|
|
191 |
except requests.exceptions.Timeout: |
|
|
192 |
time_log('~'*25 + ' TIMEOUT ERROR 120 SECONDS'+'~'*25) |
|
|
193 |
time_log('~'*25 + ' GETTING NEW TICKET SERVICE' + '~'*24) |
|
|
194 |
ticket = get_umls_ticket2(None, None, umls_api) |
|
|
195 |
times += 1 |
|
|
196 |
if times >= 2: |
|
|
197 |
passed = True |
|
|
198 |
time_log('Error getting concept from: CUI %s' % cui) |
|
|
199 |
time_log('~'*25 + ' EXITING AFTER TRYING TWICE WITH NEW TICKET ' + '~'*25) |
|
|
200 |
raise Exception |
|
|
201 |
#exit(1) |
|
|
202 |
r.encoding = 'utf-8' |
|
|
203 |
res = {} |
|
|
204 |
if r.ok: |
|
|
205 |
items = json.loads(r.text) |
|
|
206 |
jsonData = items["result"] |
|
|
207 |
res = {'label': jsonData['name'], 'cuid': cui} |
|
|
208 |
sem_types = [] |
|
|
209 |
# For each semantic type of the entity |
|
|
210 |
for stys in jsonData["semanticTypes"]: |
|
|
211 |
# Keep only the TUI code from the uri e.g. |
|
|
212 |
# https://uts-ws.nlm.nih.gov/rest/semantic-network/current/TUI/T116 |
|
|
213 |
code_tui = stys['uri'].split('/')[-1] |
|
|
214 |
# Fetch the abbreviation of this TUI code |
|
|
215 |
sem_types.append(get_sem_type_abbr(code_tui)) |
|
|
216 |
# Comma separated string |
|
|
217 |
sem_types = ",".join(sem_types) |
|
|
218 |
res['sem_types'] = sem_types |
|
|
219 |
else: |
|
|
220 |
time_log(r.url) |
|
|
221 |
time_log('Error getting concept from cui : %s' % cui) |
|
|
222 |
raise ValueError |
|
|
223 |
return res |
|
|
224 |
|
|
|
225 |
|
|
|
226 |
def get_sem_type_abbr(code_tui, apikey=None): |
|
|
227 |
""" |
|
|
228 |
Function that fetches a semantic-type's abbreviation. |
|
|
229 |
Input: |
|
|
230 |
- code_tui: str, |
|
|
231 |
string of TUI code that will be looked up |
|
|
232 |
- apikey: str, |
|
|
233 |
UMLS REST services api-key. Default is None and |
|
|
234 |
the already establised service is used. Check get_umls_ticket |
|
|
235 |
function for details |
|
|
236 |
Output: |
|
|
237 |
string, abbreviation of the code (e.g. "gngm") |
|
|
238 |
""" |
|
|
239 |
ticket = get_umls_ticket2() |
|
|
240 |
#ticket = get_umls_ticket(apikey) |
|
|
241 |
url = "https://uts-ws.nlm.nih.gov/rest/semantic-network/current/TUI/" + code_tui |
|
|
242 |
passed = False |
|
|
243 |
times = 0 |
|
|
244 |
while not(passed): |
|
|
245 |
try: |
|
|
246 |
r = requests.get(url, params={'ticket': ticket}, timeout=120) |
|
|
247 |
passed = True |
|
|
248 |
except requests.exceptions.Timeout: |
|
|
249 |
time_log('~'*25 + ' TIMEOUT ERROR 120 SECONDS'+'~'*25) |
|
|
250 |
time_log('~'*25 + ' GETTING NEW TICKET SERVICE' + '~'*24) |
|
|
251 |
ticket = get_umls_ticket2(None, None, umls_api) |
|
|
252 |
times += 1 |
|
|
253 |
if times >= 2: |
|
|
254 |
passed = True |
|
|
255 |
time_log('Error getting semantic type abbreviation: %s' % code_tui) |
|
|
256 |
time_log('~'*25 + ' EXITING AFTER TRYING TWICE WITH NEW TICKET ' + '~'*25) |
|
|
257 |
raise Exception |
|
|
258 |
#exit(1) |
|
|
259 |
r.encoding = 'utf-8' |
|
|
260 |
res = ' ' |
|
|
261 |
if r.ok: |
|
|
262 |
items = json.loads(r.text) |
|
|
263 |
jsonData = items["result"] |
|
|
264 |
res = jsonData['abbreviation'] |
|
|
265 |
else: |
|
|
266 |
time_log(r.url) |
|
|
267 |
time_log('Error getting sem-type from TUI : %s' % code_tui) |
|
|
268 |
raise ValueError |
|
|
269 |
return res |