Download this file

50 lines (39 with data), 1.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python3
"""
Enrichment class for enriching PubChem IDs with their STRINGS representation.
"""
from typing import List
import pubchempy as pcp
from .enrichments import Enrichments
class EnrichmentWithPubChem(Enrichments):
"""
Enrichment class using PubChem
"""
def enrich_documents(self, texts: List[str]) -> List[str]:
"""
Enrich a list of input PubChem IDs with their STRINGS representation.
Args:
texts: The list of pubchem IDs to be enriched.
Returns:
The list of enriched STRINGS
"""
enriched_pubchem_ids = []
pubchem_cids = texts
for pubchem_cid in pubchem_cids:
try:
c = pcp.Compound.from_cid(pubchem_cid)
except pcp.BadRequestError:
enriched_pubchem_ids.append(None)
continue
enriched_pubchem_ids.append(c.isomeric_smiles)
return enriched_pubchem_ids
def enrich_documents_with_rag(self, texts, docs):
"""
Enrich a list of input PubChem IDs with their STRINGS representation.
Args:
texts: The list of pubchem IDs to be enriched.
docs: None
Returns:
The list of enriched STRINGS
"""
return self.enrich_documents(texts)