184 lines (183 with data), 5.6 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"\n",
"import numpy as np\n",
"import rdkit\n",
"from rdkit import Chem\n",
"\n",
"import h5py, ast, pickle\n",
"\n",
"# Occupy a GPU for the model to be loaded \n",
"%env CUDA_DEVICE_ORDER=PCI_BUS_ID\n",
"# GPU ID, if occupied change to an available GPU ID listed under !nvidia-smi\n",
"%env CUDA_VISIBLE_DEVICES=2 \n",
"\n",
"from ddc_pub import ddc_v3 as ddc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_descriptors(smiles_list, qsar_model=None, show_actives=False, active_thresh=0.5, qed_thresh=0.5):\n",
" \"\"\"Calculate molecular descriptors of SMILES in a list.\n",
" The descriptors are logp, tpsa, mw, qed, hba, hbd and probability of being active towards DRD2.\n",
" \n",
" Returns:\n",
" A np.ndarray of descriptors.\n",
" \"\"\"\n",
" from tqdm import tqdm_notebook as tqdm\n",
" import rdkit\n",
" from rdkit import Chem, DataStructs\n",
" from rdkit.Chem import Descriptors, rdMolDescriptors, AllChem, QED\n",
" \n",
" descriptors = []\n",
" active_mols = []\n",
" \n",
" for idx, smiles in enumerate(smiles_list):\n",
" # Convert to mol\n",
" mol = Chem.MolFromSmiles(smiles)\n",
" # If valid, calculate its properties\n",
" if mol:\n",
" try:\n",
" logp = Descriptors.MolLogP(mol)\n",
" tpsa = Descriptors.TPSA(mol)\n",
" molwt = Descriptors.ExactMolWt(mol)\n",
" hba = rdMolDescriptors.CalcNumHBA(mol)\n",
" hbd = rdMolDescriptors.CalcNumHBD(mol)\n",
" qed = QED.qed(mol)\n",
" \n",
" # Calculate fingerprints\n",
" fp = AllChem.GetMorganFingerprintAsBitVect(mol,2, nBits=2048)\n",
" ecfp4 = np.zeros((2048,))\n",
" DataStructs.ConvertToNumpyArray(fp, ecfp4) \n",
" # Predict activity and pick only the second component\n",
" active = qsar_model.predict_proba([ecfp4])[0][1]\n",
" descriptors.append([logp, tpsa, molwt, qed, hba, hbd, active]) \n",
" \n",
" if active > active_thresh and qed > qed_thresh:\n",
" if show_actives:\n",
" active_mols.append(mol)\n",
" print(\"active_proba: %.2f, QED: %.2f.\" % (active, qed))\n",
" display(mol)\n",
" pass\n",
" \n",
" except Exception as e:\n",
" # Sanitization error: Explicit valence for atom # 17 N, 4, is greater than permitted\n",
" print(e)\n",
" # Else, return None\n",
" else:\n",
" print(\"Invalid generation.\")\n",
" \n",
" return np.asarray(descriptors)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load QSAR model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"qsar_model_name = \"models/qsar_model.pickle\"\n",
"with open(qsar_model_name, \"rb\") as file:\n",
" qsar_model = pickle.load(file)[\"classifier_sv\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load PCB cRNN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import existing (trained) model\n",
"# Ignore any warning(s) about training configuration or non-seriazable keyword arguments\n",
"model_name = \"models/pcb_model\"\n",
"model = ddc.DDC(model_name=model_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Select conditions for generated molecules"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Custom conditions\n",
"logp = 3.5\n",
"tpsa = 70.0\n",
"mw = 350.0\n",
"qed = 0.8\n",
"hba = 4.0\n",
"hbd = 1.0\n",
"drd2_active_proba = 0.9\n",
"\n",
"target = np.array([logp, tpsa, mw, qed, hba, hbd, drd2_active_proba])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Convert back to SMILES\n",
"smiles_out, _ = model.predict(latent=target, temp=0) # Change temp to 1 for more funky results\n",
"\n",
"# Calculate the properties of the generated structure and compare\n",
"get_descriptors(smiles_list=[smiles_out], qsar_model=qsar_model, show_actives=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ddc_env (python_3.6.7)",
"language": "python",
"name": "ddc_env"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}