|
a |
|
b/aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py |
|
|
1 |
#!/usr/bin/env python3 |
|
|
2 |
|
|
|
3 |
""" |
|
|
4 |
Enrichment class for enriching PubChem IDs with their STRINGS representation. |
|
|
5 |
""" |
|
|
6 |
|
|
|
7 |
import logging |
|
|
8 |
import requests |
|
|
9 |
import hydra |
|
|
10 |
|
|
|
11 |
# Initialize logger |
|
|
12 |
logging.basicConfig(level=logging.INFO) |
|
|
13 |
logger = logging.getLogger(__name__) |
|
|
14 |
|
|
|
15 |
def drugbank_id2pubchem_cid(drugbank_id): |
|
|
16 |
""" |
|
|
17 |
Convert DrugBank ID to PubChem CID. |
|
|
18 |
|
|
|
19 |
Args: |
|
|
20 |
drugbank_id: The DrugBank ID of the drug. |
|
|
21 |
|
|
|
22 |
Returns: |
|
|
23 |
The PubChem CID of the drug. |
|
|
24 |
""" |
|
|
25 |
logger.log(logging.INFO, "Load Hydra configuration for PubChem ID conversion.") |
|
|
26 |
with hydra.initialize(version_base=None, config_path="../configs"): |
|
|
27 |
cfg = hydra.compose(config_name='config', |
|
|
28 |
overrides=['utils/pubchem_utils=default']) |
|
|
29 |
cfg = cfg.utils.pubchem_utils |
|
|
30 |
# Prepare the URL |
|
|
31 |
pubchem_url_for_drug = cfg.drugbank_id_to_pubchem_cid_url + drugbank_id + '/JSON' |
|
|
32 |
# Get the data |
|
|
33 |
response = requests.get(pubchem_url_for_drug, timeout=60) |
|
|
34 |
data = response.json() |
|
|
35 |
# Extract the PubChem CID |
|
|
36 |
cid = None |
|
|
37 |
for substance in data.get("PC_Substances", []): |
|
|
38 |
for compound in substance.get("compound", []): |
|
|
39 |
if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1: |
|
|
40 |
cid = compound["id"].get("id", {}).get("cid") |
|
|
41 |
break |
|
|
42 |
return cid |