Switch to unified view

a b/aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py
1
#!/usr/bin/env python3
2
3
"""
4
Enrichment class for enriching PubChem IDs with their STRINGS representation.
5
"""
6
7
import logging
8
import requests
9
import hydra
10
11
# Initialize logger
12
logging.basicConfig(level=logging.INFO)
13
logger = logging.getLogger(__name__)
14
15
def drugbank_id2pubchem_cid(drugbank_id):
16
    """
17
    Convert DrugBank ID to PubChem CID.
18
19
    Args:
20
        drugbank_id: The DrugBank ID of the drug.
21
22
    Returns:
23
        The PubChem CID of the drug.
24
    """
25
    logger.log(logging.INFO, "Load Hydra configuration for PubChem ID conversion.")
26
    with hydra.initialize(version_base=None, config_path="../configs"):
27
        cfg = hydra.compose(config_name='config',
28
                            overrides=['utils/pubchem_utils=default'])
29
        cfg = cfg.utils.pubchem_utils
30
    # Prepare the URL
31
    pubchem_url_for_drug = cfg.drugbank_id_to_pubchem_cid_url + drugbank_id + '/JSON'
32
    # Get the data
33
    response = requests.get(pubchem_url_for_drug, timeout=60)
34
    data = response.json()
35
    # Extract the PubChem CID
36
    cid = None
37
    for substance in data.get("PC_Substances", []):
38
        for compound in substance.get("compound", []):
39
            if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1:
40
                cid = compound["id"].get("id", {}).get("cid")
41
                break
42
    return cid