200 lines (199 with data), 7.0 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.chdir('../')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import DeepPurpose.oneliner as oneliner\n",
"from DeepPurpose import dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"target, target_name = dataset.load_LCK()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'MGCGCSSHPEDDWMENIDVCENCHYPIVPLDGKGTLLIRNGSEVRDPLVTYEGSNPPASPLQDNLVIALHSYEPSHDGDLGFEKGEQLRILEQSGEWWKAQSLTTGQEGFIPFNFVAKANSLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQNQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYTNASDGLCTRLSRPCQTQKPQKPWWEDEWEVPRETLKLVERLGAGQFGEVWMGYYNGHTKVAVKSLKQGSMSPDAFLAEANLMKQLQHQRLVRLYAVVTQEPIYIITEYMENGSLVDFLKTPSGIKLTINKLLDMAAQIAEGMAFIEERNYIHRDLRAANILVSDTLSCKIADFGLARLIEDNEYTAREGAKFPIKWTAPEAINYGTFTIKSDVWSFGILLTEIVTHGRIPYPGMTNPEVIQNLERGYRMVRPDNCPEELYQLMRLCWKERPEDRPTFDYLRSVLEDFFTATEGQYQPQP'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Checking if pretrained directory is valid...\n",
"Beginning to load the pretrained models...\n",
"Using pretrained model and making predictions...\n",
"repurposing...\n",
"in total: 6111 drug-target pairs\n",
"encoding drug...\n",
"unique drugs: 6111\n",
"drug encoding finished...\n",
"encoding protein...\n",
"unique target sequence: 1\n",
"protein encoding finished...\n",
"Done.\n",
"predicting...\n",
"---------------\n",
"Predictions from model 1 with drug encoding MPNN and target encoding CNN are done...\n",
"-------------\n",
"repurposing...\n",
"in total: 6111 drug-target pairs\n",
"encoding drug...\n",
"unique drugs: 6111\n",
"drug encoding finished...\n",
"encoding protein...\n",
"unique target sequence: 1\n",
"protein encoding finished...\n",
"Done.\n",
"predicting...\n",
"---------------\n",
"Predictions from model 2 with drug encoding CNN and target encoding CNN are done...\n",
"-------------\n",
"repurposing...\n",
"in total: 6111 drug-target pairs\n",
"encoding drug...\n",
"unique drugs: 6111\n",
"drug encoding finished...\n",
"encoding protein...\n",
"unique target sequence: 1\n",
"protein encoding finished...\n",
"Done.\n",
"predicting...\n",
"---------------\n",
"Predictions from model 3 with drug encoding Morgan and target encoding CNN are done...\n",
"-------------\n",
"repurposing...\n",
"in total: 6111 drug-target pairs\n",
"encoding drug...\n",
"unique drugs: 6111\n",
"drug encoding finished...\n",
"encoding protein...\n",
"unique target sequence: 1\n",
"-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU. Calculate your time by the unique target sequence #, instead of the entire dataset.\n",
"protein encoding finished...\n",
"Done.\n",
"predicting...\n",
"---------------\n",
"Predictions from model 4 with drug encoding Morgan and target encoding AAC are done...\n",
"-------------\n",
"repurposing...\n",
"in total: 6111 drug-target pairs\n",
"encoding drug...\n",
"unique drugs: 6111\n",
"rdkit not found this smiles: [Y+3] convert to all 1 features\n",
"rdkit not found this smiles: [K].I convert to all 1 features\n",
"drug encoding finished...\n",
"encoding protein...\n",
"unique target sequence: 1\n",
"-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU. Calculate your time by the unique target sequence #, instead of the entire dataset.\n",
"protein encoding finished...\n",
"Done.\n",
"predicting...\n",
"---------------\n",
"Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...\n",
"-------------\n",
"repurposing...\n",
"in total: 6111 drug-target pairs\n",
"encoding drug...\n",
"unique drugs: 6111\n",
"drug encoding finished...\n",
"encoding protein...\n",
"unique target sequence: 1\n",
"protein encoding finished...\n",
"Done.\n",
"predicting...\n",
"---------------\n",
"Predictions from model 6 with drug encoding Transformer and target encoding CNN are done...\n",
"-------------\n",
"models prediction finished...\n",
"aggregating results...\n",
"---------------\n",
"Drug Repurposing Result for Tyrosine-protein kinase Lck\n",
"+------+-------------+-----------------------------+---------------+\n",
"| Rank | Drug Name | Target Name | Binding Score |\n",
"+------+-------------+-----------------------------+---------------+\n",
"| 1 | 441336.0 | Tyrosine-protein kinase Lck | 3.39 |\n",
"| 2 | 6917849.0 | Tyrosine-protein kinase Lck | 6.10 |\n",
"| 3 | 23947600.0 | Tyrosine-protein kinase Lck | 8.76 |\n",
"| 4 | 27924.0 | Tyrosine-protein kinase Lck | 9.56 |\n",
"| 5 | 445643.0 | Tyrosine-protein kinase Lck | 13.61 |\n",
"| 6 | 16490.0 | Tyrosine-protein kinase Lck | 13.77 |\n",
"| 7 | 13109.0 | Tyrosine-protein kinase Lck | 14.80 |\n",
"| 8 | 6230.0 | Tyrosine-protein kinase Lck | 18.10 |\n",
"| 9 | 11180808.0 | Tyrosine-protein kinase Lck | 18.32 |\n",
"| 10 | 124079495.0 | Tyrosine-protein kinase Lck | 19.91 |\n",
"checkout ./save_folder/results_aggregation/repurposing.txt for the whole list\n",
"\n"
]
}
],
"source": [
"oneliner.repurpose(target = target, \n",
" target_name = target_name, \n",
" save_dir = './save_folder',\n",
" pretrained_dir = './save_folder/pretrained_models/DeepPurpose_BindingDB/')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}