[4717e2]: / DEMO / oneliner_repurpose_LCK_gene.ipynb

Download this file

200 lines (199 with data), 7.0 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir('../')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import DeepPurpose.oneliner as oneliner\n",
    "from DeepPurpose import dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "target, target_name = dataset.load_LCK()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'MGCGCSSHPEDDWMENIDVCENCHYPIVPLDGKGTLLIRNGSEVRDPLVTYEGSNPPASPLQDNLVIALHSYEPSHDGDLGFEKGEQLRILEQSGEWWKAQSLTTGQEGFIPFNFVAKANSLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQNQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYTNASDGLCTRLSRPCQTQKPQKPWWEDEWEVPRETLKLVERLGAGQFGEVWMGYYNGHTKVAVKSLKQGSMSPDAFLAEANLMKQLQHQRLVRLYAVVTQEPIYIITEYMENGSLVDFLKTPSGIKLTINKLLDMAAQIAEGMAFIEERNYIHRDLRAANILVSDTLSCKIADFGLARLIEDNEYTAREGAKFPIKWTAPEAINYGTFTIKSDVWSFGILLTEIVTHGRIPYPGMTNPEVIQNLERGYRMVRPDNCPEELYQLMRLCWKERPEDRPTFDYLRSVLEDFFTATEGQYQPQP'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking if pretrained directory is valid...\n",
      "Beginning to load the pretrained models...\n",
      "Using pretrained model and making predictions...\n",
      "repurposing...\n",
      "in total: 6111 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 6111\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 1\n",
      "protein encoding finished...\n",
      "Done.\n",
      "predicting...\n",
      "---------------\n",
      "Predictions from model 1 with drug encoding MPNN and target encoding CNN are done...\n",
      "-------------\n",
      "repurposing...\n",
      "in total: 6111 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 6111\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 1\n",
      "protein encoding finished...\n",
      "Done.\n",
      "predicting...\n",
      "---------------\n",
      "Predictions from model 2 with drug encoding CNN and target encoding CNN are done...\n",
      "-------------\n",
      "repurposing...\n",
      "in total: 6111 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 6111\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 1\n",
      "protein encoding finished...\n",
      "Done.\n",
      "predicting...\n",
      "---------------\n",
      "Predictions from model 3 with drug encoding Morgan and target encoding CNN are done...\n",
      "-------------\n",
      "repurposing...\n",
      "in total: 6111 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 6111\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 1\n",
      "-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU. Calculate your time by the unique target sequence #, instead of the entire dataset.\n",
      "protein encoding finished...\n",
      "Done.\n",
      "predicting...\n",
      "---------------\n",
      "Predictions from model 4 with drug encoding Morgan and target encoding AAC are done...\n",
      "-------------\n",
      "repurposing...\n",
      "in total: 6111 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 6111\n",
      "rdkit not found this smiles: [Y+3] convert to all 1 features\n",
      "rdkit not found this smiles: [K].I convert to all 1 features\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 1\n",
      "-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU. Calculate your time by the unique target sequence #, instead of the entire dataset.\n",
      "protein encoding finished...\n",
      "Done.\n",
      "predicting...\n",
      "---------------\n",
      "Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...\n",
      "-------------\n",
      "repurposing...\n",
      "in total: 6111 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 6111\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 1\n",
      "protein encoding finished...\n",
      "Done.\n",
      "predicting...\n",
      "---------------\n",
      "Predictions from model 6 with drug encoding Transformer and target encoding CNN are done...\n",
      "-------------\n",
      "models prediction finished...\n",
      "aggregating results...\n",
      "---------------\n",
      "Drug Repurposing Result for Tyrosine-protein kinase Lck\n",
      "+------+-------------+-----------------------------+---------------+\n",
      "| Rank |  Drug Name  |         Target Name         | Binding Score |\n",
      "+------+-------------+-----------------------------+---------------+\n",
      "|  1   |   441336.0  | Tyrosine-protein kinase Lck |      3.39     |\n",
      "|  2   |  6917849.0  | Tyrosine-protein kinase Lck |      6.10     |\n",
      "|  3   |  23947600.0 | Tyrosine-protein kinase Lck |      8.76     |\n",
      "|  4   |   27924.0   | Tyrosine-protein kinase Lck |      9.56     |\n",
      "|  5   |   445643.0  | Tyrosine-protein kinase Lck |     13.61     |\n",
      "|  6   |   16490.0   | Tyrosine-protein kinase Lck |     13.77     |\n",
      "|  7   |   13109.0   | Tyrosine-protein kinase Lck |     14.80     |\n",
      "|  8   |    6230.0   | Tyrosine-protein kinase Lck |     18.10     |\n",
      "|  9   |  11180808.0 | Tyrosine-protein kinase Lck |     18.32     |\n",
      "|  10  | 124079495.0 | Tyrosine-protein kinase Lck |     19.91     |\n",
      "checkout ./save_folder/results_aggregation/repurposing.txt for the whole list\n",
      "\n"
     ]
    }
   ],
   "source": [
    "oneliner.repurpose(target = target, \n",
    "                    target_name = target_name, \n",
    "                    save_dir = './save_folder',\n",
    "                    pretrained_dir = './save_folder/pretrained_models/DeepPurpose_BindingDB/')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}