--- a +++ b/DEMO/MPNN_AAC_Kiba.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.chdir('../')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import DeepPurpose.DTI as models\n", + "from DeepPurpose.utils import *\n", + "from DeepPurpose.dataset import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Beginning Processing...\n", + "Beginning to extract zip file...\n", + "Done!\n", + "in total: 118254 drug-target pairs\n", + "encoding drug...\n", + "unique drugs: 2068\n", + "drug encoding finished...\n", + "encoding protein...\n", + "unique target sequence: 229\n", + "-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU. Calculate your time by the unique target sequence #, instead of the entire dataset.\n", + "protein encoding finished...\n", + "splitting dataset...\n", + "Done.\n", + "cost about 219 seconds\n" + ] + } + ], + "source": [ + "from time import time\n", + "\n", + "t1 = time()\n", + "X_drug, X_target, y = load_process_KIBA('./data/', binary=False)\n", + "\n", + "drug_encoding = 'MPNN'\n", + "target_encoding = 'AAC'\n", + "train, val, test = data_process(X_drug, X_target, y, \n", + " drug_encoding, target_encoding, \n", + " split_method='random',frac=[0.7,0.1,0.2])\n", + "\n", + "# use the parameters setting provided in the paper: https://arxiv.org/abs/1801.10193\n", + "config = generate_config(drug_encoding = drug_encoding, \n", + " target_encoding = target_encoding, \n", + " cls_hidden_dims = [1024,1024,512], \n", + " train_epoch = 100, \n", + " test_every_X_epoch = 10, \n", + " LR = 0.001, \n", + " batch_size = 128,\n", + " hidden_dim_drug = 128,\n", + " mpnn_hidden_size = 128,\n", + " mpnn_depth = 3, \n", + " cnn_target_filters = [32,64,96],\n", + " cnn_target_kernels = [4,8,12]\n", + " )\n", + "model = models.model_initialize(**config)\n", + "t2 = time()\n", + "print(\"cost about \" + str(int(t2-t1)) + \" seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Let's use CPU/s!\n", + "--- Data Preparation ---\n", + "--- Go for Training ---\n", + "Training at Epoch 1 iteration 0 with loss 139.375. Total time 0.00083 hours\n", + "Training at Epoch 1 iteration 100 with loss 0.99343. Total time 0.02472 hours\n", + "Training at Epoch 1 iteration 200 with loss 0.83609. Total time 0.04777 hours\n", + "Training at Epoch 1 iteration 300 with loss 0.96589. Total time 0.07444 hours\n", + "Training at Epoch 1 iteration 400 with loss 0.85949. Total time 0.1 hours\n", + "Training at Epoch 1 iteration 500 with loss 0.54122. Total time 0.13111 hours\n", + "Training at Epoch 1 iteration 600 with loss 0.58168. Total time 0.16777 hours\n", + "Validation at Epoch 1 , MSE: 0.57232 , Pearson Correlation: 0.43984 with p-value: 0.0 , Concordance Index: 0.68495\n", + "Training at Epoch 2 iteration 0 with loss 0.42175. Total time 0.18944 hours\n", + "Training at Epoch 2 iteration 100 with loss 0.59724. Total time 0.22166 hours\n", + "Training at Epoch 2 iteration 200 with loss 0.67120. Total time 0.25972 hours\n", + "Training at Epoch 2 iteration 300 with loss 0.66331. Total time 0.285 hours\n", + "Training at Epoch 2 iteration 400 with loss 0.86964. Total time 0.31 hours\n", + "Training at Epoch 2 iteration 500 with loss 0.63331. Total time 0.33472 hours\n", + "Training at Epoch 2 iteration 600 with loss 0.72784. Total time 0.36166 hours\n", + "Validation at Epoch 2 , MSE: 0.53912 , Pearson Correlation: 0.48472 with p-value: 0.0 , Concordance Index: 0.69957\n", + "Training at Epoch 3 iteration 0 with loss 0.53930. Total time 0.38305 hours\n", + "Training at Epoch 3 iteration 100 with loss 0.52499. Total time 0.40972 hours\n", + "Training at Epoch 3 iteration 200 with loss 1.20812. Total time 0.43444 hours\n", + "Training at Epoch 3 iteration 300 with loss 1.05639. Total time 0.45888 hours\n", + "Training at Epoch 3 iteration 400 with loss 0.94682. Total time 0.48416 hours\n", + "Training at Epoch 3 iteration 500 with loss 0.61475. Total time 0.50861 hours\n", + "Training at Epoch 3 iteration 600 with loss 0.68008. Total time 0.53305 hours\n", + "Validation at Epoch 3 , MSE: 0.53130 , Pearson Correlation: 0.51384 with p-value: 0.0 , Concordance Index: 0.70860\n", + "Training at Epoch 4 iteration 0 with loss 0.50197. Total time 0.55583 hours\n", + "Training at Epoch 4 iteration 100 with loss 0.97383. Total time 0.58055 hours\n", + "Training at Epoch 4 iteration 200 with loss 0.74706. Total time 0.60527 hours\n", + "Training at Epoch 4 iteration 300 with loss 0.61534. Total time 0.63027 hours\n", + "Training at Epoch 4 iteration 400 with loss 0.63400. Total time 0.65527 hours\n", + "Training at Epoch 4 iteration 500 with loss 0.72008. Total time 0.68 hours\n", + "Training at Epoch 4 iteration 600 with loss 0.65495. Total time 0.70472 hours\n", + "Validation at Epoch 4 , MSE: 0.50166 , Pearson Correlation: 0.53950 with p-value: 0.0 , Concordance Index: 0.71986\n", + "Training at Epoch 5 iteration 0 with loss 0.41625. Total time 0.72333 hours\n", + "Training at Epoch 5 iteration 100 with loss 0.49403. Total time 0.74805 hours\n", + "Training at Epoch 5 iteration 200 with loss 0.68507. Total time 0.77277 hours\n", + "Training at Epoch 5 iteration 300 with loss 0.48458. Total time 0.7975 hours\n", + "Training at Epoch 5 iteration 400 with loss 0.96469. Total time 0.82583 hours\n", + "Training at Epoch 5 iteration 500 with loss 0.55342. Total time 0.85305 hours\n", + "Training at Epoch 5 iteration 600 with loss 0.80672. Total time 0.87944 hours\n", + "Validation at Epoch 5 , MSE: 0.48277 , Pearson Correlation: 0.56151 with p-value: 0.0 , Concordance Index: 0.72517\n", + "Training at Epoch 6 iteration 0 with loss 0.65440. Total time 0.89972 hours\n", + "Training at Epoch 6 iteration 100 with loss 0.49574. Total time 0.92666 hours\n", + "Training at Epoch 6 iteration 200 with loss 0.65854. Total time 0.95333 hours\n", + "Training at Epoch 6 iteration 300 with loss 0.61574. Total time 0.98194 hours\n", + "Training at Epoch 6 iteration 400 with loss 0.57750. Total time 1.00944 hours\n", + "Training at Epoch 6 iteration 500 with loss 0.56961. Total time 1.03805 hours\n", + "Training at Epoch 6 iteration 600 with loss 0.54698. Total time 1.06611 hours\n", + "Validation at Epoch 6 , MSE: 0.48746 , Pearson Correlation: 0.58950 with p-value: 0.0 , Concordance Index: 0.73456\n", + "Training at Epoch 7 iteration 0 with loss 0.53380. Total time 1.08833 hours\n", + "Training at Epoch 7 iteration 100 with loss 0.55768. Total time 1.11444 hours\n", + "Training at Epoch 7 iteration 200 with loss 0.57367. Total time 1.14277 hours\n", + "Training at Epoch 7 iteration 300 with loss 0.60914. Total time 1.17055 hours\n", + "Training at Epoch 7 iteration 400 with loss 0.58427. Total time 1.19666 hours\n", + "Training at Epoch 7 iteration 500 with loss 0.61994. Total time 1.22305 hours\n", + "Training at Epoch 7 iteration 600 with loss 0.70587. Total time 1.25194 hours\n", + "Validation at Epoch 7 , MSE: 0.46277 , Pearson Correlation: 0.60217 with p-value: 0.0 , Concordance Index: 0.73545\n", + "Training at Epoch 8 iteration 0 with loss 0.62481. Total time 1.27194 hours\n", + "Training at Epoch 8 iteration 100 with loss 0.60135. Total time 1.29916 hours\n", + "Training at Epoch 8 iteration 200 with loss 0.61045. Total time 1.32611 hours\n", + "Training at Epoch 8 iteration 300 with loss 0.46631. Total time 1.35305 hours\n", + "Training at Epoch 8 iteration 400 with loss 0.57605. Total time 1.38083 hours\n", + "Training at Epoch 8 iteration 500 with loss 0.55145. Total time 1.40666 hours\n", + "Training at Epoch 8 iteration 600 with loss 0.88128. Total time 1.4325 hours\n", + "Validation at Epoch 8 , MSE: 0.54907 , Pearson Correlation: 0.61538 with p-value: 0.0 , Concordance Index: 0.74227\n", + "Training at Epoch 9 iteration 0 with loss 0.66877. Total time 1.45305 hours\n", + "Training at Epoch 9 iteration 100 with loss 0.55947. Total time 1.47972 hours\n", + "Training at Epoch 9 iteration 200 with loss 0.80887. Total time 1.50527 hours\n", + "Training at Epoch 9 iteration 300 with loss 0.51789. Total time 1.53638 hours\n", + "Training at Epoch 9 iteration 400 with loss 0.45560. Total time 1.56305 hours\n", + "Training at Epoch 9 iteration 500 with loss 0.54652. Total time 1.58916 hours\n", + "Training at Epoch 9 iteration 600 with loss 0.51819. Total time 1.61583 hours\n", + "Validation at Epoch 9 , MSE: 0.41752 , Pearson Correlation: 0.63835 with p-value: 0.0 , Concordance Index: 0.74886\n", + "Training at Epoch 10 iteration 0 with loss 0.53559. Total time 1.63583 hours\n", + "Training at Epoch 10 iteration 100 with loss 0.42198. Total time 1.66277 hours\n", + "Training at Epoch 10 iteration 200 with loss 0.74855. Total time 1.69055 hours\n", + "Training at Epoch 10 iteration 300 with loss 0.56010. Total time 1.72138 hours\n", + "Training at Epoch 10 iteration 400 with loss 0.64494. Total time 1.74861 hours\n", + "Training at Epoch 10 iteration 500 with loss 0.40499. Total time 1.77416 hours\n", + "Training at Epoch 10 iteration 600 with loss 0.38197. Total time 1.80527 hours\n", + "Validation at Epoch 10 , MSE: 0.39831 , Pearson Correlation: 0.66272 with p-value: 0.0 , Concordance Index: 0.75689\n", + "Training at Epoch 11 iteration 0 with loss 0.44420. Total time 1.83083 hours\n", + "Training at Epoch 11 iteration 100 with loss 0.61767. Total time 1.86916 hours\n", + "Training at Epoch 11 iteration 200 with loss 0.59886. Total time 1.90138 hours\n", + "Training at Epoch 11 iteration 300 with loss 0.44023. Total time 1.93888 hours\n", + "Training at Epoch 11 iteration 400 with loss 0.49944. Total time 1.98166 hours\n", + "Training at Epoch 11 iteration 500 with loss 0.80822. Total time 2.02527 hours\n", + "Training at Epoch 11 iteration 600 with loss 0.60348. Total time 2.06 hours\n", + "Validation at Epoch 11 , MSE: 0.50192 , Pearson Correlation: 0.67912 with p-value: 0.0 , Concordance Index: 0.76373\n", + "--- Go for Testing ---\n", + "Up to Epoch 10 Testing MSE: 0.6241257333934345 , Pearson Correlation: 0.5794750762149183 with p-value: 0.0 , Concordance Index: 0.7073133872497231\n", + "Training at Epoch 12 iteration 0 with loss 0.47469. Total time 2.095 hours\n", + "Training at Epoch 12 iteration 100 with loss 0.53183. Total time 2.12861 hours\n", + "Training at Epoch 12 iteration 200 with loss 0.45367. Total time 2.16027 hours\n", + "Training at Epoch 12 iteration 300 with loss 0.39957. Total time 2.18583 hours\n", + "Training at Epoch 12 iteration 400 with loss 0.55815. Total time 2.21361 hours\n", + "Training at Epoch 12 iteration 500 with loss 0.45266. Total time 2.23916 hours\n", + "Training at Epoch 12 iteration 600 with loss 0.44607. Total time 2.27666 hours\n", + "Validation at Epoch 12 , MSE: 0.39189 , Pearson Correlation: 0.68264 with p-value: 0.0 , Concordance Index: 0.75734\n", + "Training at Epoch 13 iteration 0 with loss 0.46262. Total time 2.30166 hours\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training at Epoch 13 iteration 100 with loss 0.40722. Total time 2.33388 hours\n", + "Training at Epoch 13 iteration 200 with loss 0.44606. Total time 2.36583 hours\n", + "Training at Epoch 13 iteration 300 with loss 0.34755. Total time 2.39638 hours\n", + "Training at Epoch 13 iteration 400 with loss 0.36445. Total time 2.41861 hours\n", + "Training at Epoch 13 iteration 500 with loss 0.39682. Total time 2.43777 hours\n", + "Training at Epoch 13 iteration 600 with loss 0.41257. Total time 2.45666 hours\n", + "Validation at Epoch 13 , MSE: 0.36162 , Pearson Correlation: 0.69767 with p-value: 0.0 , Concordance Index: 0.76840\n", + "Training at Epoch 14 iteration 0 with loss 0.39781. Total time 2.47 hours\n", + "Training at Epoch 14 iteration 100 with loss 0.39343. Total time 2.49111 hours\n", + "Training at Epoch 14 iteration 200 with loss 0.40009. Total time 2.51388 hours\n", + "Training at Epoch 14 iteration 300 with loss 0.66970. Total time 2.54305 hours\n", + "Training at Epoch 14 iteration 400 with loss 0.37376. Total time 2.57083 hours\n", + "Training at Epoch 14 iteration 500 with loss 0.42178. Total time 2.59333 hours\n", + "Training at Epoch 14 iteration 600 with loss 0.68821. Total time 2.61416 hours\n", + "Validation at Epoch 14 , MSE: 0.35647 , Pearson Correlation: 0.71057 with p-value: 0.0 , Concordance Index: 0.77446\n", + "Training at Epoch 15 iteration 0 with loss 0.34164. Total time 2.63277 hours\n", + "Training at Epoch 15 iteration 100 with loss 0.40583. Total time 2.66138 hours\n", + "Training at Epoch 15 iteration 200 with loss 0.38328. Total time 2.69027 hours\n", + "Training at Epoch 15 iteration 300 with loss 0.52372. Total time 2.71694 hours\n", + "Training at Epoch 15 iteration 400 with loss 0.53249. Total time 2.74138 hours\n", + "Training at Epoch 15 iteration 500 with loss 0.38095. Total time 2.76111 hours\n", + "Training at Epoch 15 iteration 600 with loss 0.45153. Total time 2.78555 hours\n", + "Validation at Epoch 15 , MSE: 0.38514 , Pearson Correlation: 0.71744 with p-value: 0.0 , Concordance Index: 0.77747\n", + "Training at Epoch 16 iteration 0 with loss 0.44061. Total time 2.8025 hours\n", + "Training at Epoch 16 iteration 100 with loss 0.42707. Total time 2.82611 hours\n", + "Training at Epoch 16 iteration 200 with loss 0.35150. Total time 2.84833 hours\n", + "Training at Epoch 16 iteration 300 with loss 0.58418. Total time 2.87194 hours\n", + "Training at Epoch 16 iteration 400 with loss 0.47213. Total time 2.9 hours\n", + "Training at Epoch 16 iteration 500 with loss 0.43227. Total time 2.92611 hours\n", + "Training at Epoch 16 iteration 600 with loss 0.34077. Total time 2.94444 hours\n", + "Validation at Epoch 16 , MSE: 0.34447 , Pearson Correlation: 0.71855 with p-value: 0.0 , Concordance Index: 0.78137\n", + "Training at Epoch 17 iteration 0 with loss 0.41209. Total time 2.96111 hours\n", + "Training at Epoch 17 iteration 100 with loss 0.50063. Total time 2.98611 hours\n", + "Training at Epoch 17 iteration 200 with loss 0.41551. Total time 3.00888 hours\n" + ] + } + ], + "source": [ + "model.train(train, val, test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save_model('./model_MPNN_AAC_Kiba')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}