MOLI / Git / Diff of /EGFR/EGFRv8.ipynb

Models:
AlyssaS/
MOLI
Downloads: 1
Diff of /EGFR/EGFRv8.ipynb [000000] .. [d90d15]
Switch to side-by-side view

--- a
+++ b/EGFR/EGFRv8.ipynb
@@ -0,0 +1,377 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch \n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import math\n",
+    "import sklearn.preprocessing as sk\n",
+    "import seaborn as sns\n",
+    "from sklearn import metrics\n",
+    "from sklearn.feature_selection import VarianceThreshold\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from utils import AllTripletSelector,HardestNegativeTripletSelector, RandomNegativeTripletSelector, SemihardNegativeTripletSelector # Strategies for selecting triplets within a minibatch\n",
+    "from metrics import AverageNonzeroTripletsMetric\n",
+    "from torch.utils.data.sampler import WeightedRandomSampler\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "from sklearn.metrics import average_precision_score\n",
+    "import random\n",
+    "from random import randint\n",
+    "from sklearn.model_selection import StratifiedKFold\n",
+    "\n",
+    "save_results_to = '/home/hnoghabi/EGFR/'\n",
+    "torch.manual_seed(42)\n",
+    "random.seed(42)\n",
+    "\n",
+    "GDSCE = pd.read_csv(\"GDSC_exprs.z.EGFRi.tsv\", \n",
+    "                    sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "GDSCE = pd.DataFrame.transpose(GDSCE)\n",
+    "\n",
+    "GDSCM = pd.read_csv(\"GDSC_mutations.EGFRi.tsv\", \n",
+    "                    sep = \"\\t\", index_col=0, decimal = \".\")\n",
+    "GDSCM = pd.DataFrame.transpose(GDSCM)\n",
+    "GDSCM = GDSCM.loc[:,~GDSCM.columns.duplicated()]\n",
+    "\n",
+    "GDSCC = pd.read_csv(\"GDSC_CNA.EGFRi.tsv\", \n",
+    "                    sep = \"\\t\", index_col=0, decimal = \".\")\n",
+    "GDSCC.drop_duplicates(keep='last')\n",
+    "GDSCC = pd.DataFrame.transpose(GDSCC)\n",
+    "GDSCC = GDSCC.loc[:,~GDSCC.columns.duplicated()]\n",
+    "\n",
+    "PDXEerlo = pd.read_csv(\"PDX_exprs.Erlotinib.eb_with.GDSC_exprs.Erlotinib.tsv\", \n",
+    "                   sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXEerlo = pd.DataFrame.transpose(PDXEerlo)\n",
+    "# PDXMerlo = pd.read_csv(\"PDX_mutations.Erlotinib.tsv\", \n",
+    "#                    sep = \"\\t\", index_col=0, decimal = \".\")\n",
+    "PDXMerlo = pd.read_csv(\"PDX_mutations.Erlotinib - Copy.tsv\", \n",
+    "                   sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXMerlo = pd.DataFrame.transpose(PDXMerlo)\n",
+    "# PDXCerlo = pd.read_csv(\"PDX_CNA.Erlotinib.tsv\", \n",
+    "#                    sep = \"\\t\", index_col=0, decimal = \".\")\n",
+    "PDXCerlo = pd.read_csv(\"PDX_CNV.Erlotinib.tsv\", \n",
+    "                   sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXCerlo.drop_duplicates(keep='last')\n",
+    "PDXCerlo = pd.DataFrame.transpose(PDXCerlo)\n",
+    "PDXCerlo = PDXCerlo.loc[:,~PDXCerlo.columns.duplicated()]\n",
+    "\n",
+    "PDXEcet = pd.read_csv(\"PDX_exprs.Cetuximab.eb_with.GDSC_exprs.Cetuximab.tsv\", \n",
+    "                   sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXEcet = pd.DataFrame.transpose(PDXEcet)\n",
+    "# PDXMcet = pd.read_csv(\"PDX_mutations.Cetuximab.tsv\", \n",
+    "#                    sep = \"\\t\", index_col=0, decimal = \".\")\n",
+    "PDXMcet = pd.read_csv(\"PDX_mutations.Cetuximab - Copy.tsv\", \n",
+    "                   sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXMcet = pd.DataFrame.transpose(PDXMcet)\n",
+    "# PDXCcet = pd.read_csv(\"PDX_CNA.Cetuximab.tsv\", \n",
+    "#                    sep = \"\\t\", index_col=0, decimal = \".\")\n",
+    "PDXCcet = pd.read_csv(\"PDX_CNV.Cetuximab.tsv\", \n",
+    "                   sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXCcet.drop_duplicates(keep='last')\n",
+    "PDXCcet = pd.DataFrame.transpose(PDXCcet)\n",
+    "PDXCcet = PDXCcet.loc[:,~PDXCcet.columns.duplicated()]\n",
+    "\n",
+    "selector = VarianceThreshold(0.05)\n",
+    "selector.fit_transform(GDSCE)\n",
+    "GDSCE = GDSCE[GDSCE.columns[selector.get_support(indices=True)]]\n",
+    "\n",
+    "GDSCM = GDSCM.fillna(0)\n",
+    "GDSCM[GDSCM != 0.0] = 1\n",
+    "GDSCC = GDSCC.fillna(0)\n",
+    "GDSCC[GDSCC != 0.0] = 1\n",
+    "\n",
+    "ls = GDSCE.columns.intersection(GDSCM.columns)\n",
+    "ls = ls.intersection(GDSCC.columns)\n",
+    "ls = ls.intersection(PDXEerlo.columns)\n",
+    "ls = ls.intersection(PDXMerlo.columns)\n",
+    "ls = ls.intersection(PDXCerlo.columns)\n",
+    "ls = ls.intersection(PDXEcet.columns)\n",
+    "ls = ls.intersection(PDXMcet.columns)\n",
+    "ls = ls.intersection(PDXCcet.columns)\n",
+    "ls2 = GDSCE.index.intersection(GDSCM.index)\n",
+    "ls2 = ls2.intersection(GDSCC.index)\n",
+    "ls3 = PDXEerlo.index.intersection(PDXMerlo.index)\n",
+    "ls3 = ls3.intersection(PDXCerlo.index)\n",
+    "ls4 = PDXEcet.index.intersection(PDXMcet.index)\n",
+    "ls4 = ls4.intersection(PDXCcet.index)\n",
+    "ls = pd.unique(ls)\n",
+    "\n",
+    "PDXEerlo = PDXEerlo.loc[ls3,ls]\n",
+    "PDXMerlo = PDXMerlo.loc[ls3,ls]\n",
+    "PDXCerlo = PDXCerlo.loc[ls3,ls]\n",
+    "PDXEcet = PDXEcet.loc[ls4,ls]\n",
+    "PDXMcet = PDXMcet.loc[ls4,ls]\n",
+    "PDXCcet = PDXCcet.loc[ls4,ls]\n",
+    "GDSCE = GDSCE.loc[:,ls]\n",
+    "GDSCM = GDSCM.loc[:,ls]\n",
+    "GDSCC = GDSCC.loc[:,ls]\n",
+    "\n",
+    "GDSCR = pd.read_csv(\"GDSC_response.EGFRi.tsv\", \n",
+    "                    sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "\n",
+    "GDSCR.rename(mapper = str, axis = 'index', inplace = True)\n",
+    "\n",
+    "d = {\"R\":0,\"S\":1}\n",
+    "GDSCR[\"response\"] = GDSCR.loc[:,\"response\"].apply(lambda x: d[x])\n",
+    "\n",
+    "responses = GDSCR\n",
+    "drugs = set(responses[\"drug\"].values)\n",
+    "exprs_z = GDSCE\n",
+    "cna = GDSCC\n",
+    "mut = GDSCM\n",
+    "expression_zscores = []\n",
+    "CNA=[]\n",
+    "mutations = []\n",
+    "for drug in drugs:\n",
+    "    samples = responses.loc[responses[\"drug\"]==drug,:].index.values\n",
+    "    e_z = exprs_z.loc[samples,:]\n",
+    "    c = cna.loc[samples,:]\n",
+    "    m = mut.loc[samples,:]\n",
+    "    m = mut.loc[samples,:]\n",
+    "    # next 3 rows if you want non-unique sample names\n",
+    "    e_z.rename(lambda x : str(x)+\"_\"+drug, axis = \"index\", inplace=True)\n",
+    "    c.rename(lambda x : str(x)+\"_\"+drug, axis = \"index\", inplace=True)\n",
+    "    m.rename(lambda x : str(x)+\"_\"+drug, axis = \"index\", inplace=True)\n",
+    "    expression_zscores.append(e_z)\n",
+    "    CNA.append(c)\n",
+    "    mutations.append(m)\n",
+    "responses.index = responses.index.values +\"_\"+responses[\"drug\"].values\n",
+    "GDSCEv2 = pd.concat(expression_zscores, axis =0 )\n",
+    "GDSCCv2 = pd.concat(CNA, axis =0 )\n",
+    "GDSCMv2 = pd.concat(mutations, axis =0 )\n",
+    "GDSCRv2 = responses\n",
+    "\n",
+    "ls2 = GDSCEv2.index.intersection(GDSCMv2.index)\n",
+    "ls2 = ls2.intersection(GDSCCv2.index)\n",
+    "GDSCEv2 = GDSCEv2.loc[ls2,:]\n",
+    "GDSCMv2 = GDSCMv2.loc[ls2,:]\n",
+    "GDSCCv2 = GDSCCv2.loc[ls2,:]\n",
+    "GDSCRv2 = GDSCRv2.loc[ls2,:]\n",
+    "\n",
+    "Y = GDSCRv2['response'].values\n",
+    "\n",
+    "PDXRcet = pd.read_csv(\"PDX_response.Cetuximab.tsv\", \n",
+    "                       sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXRcet.loc[PDXRcet.iloc[:,1] == 'R'] = 0\n",
+    "PDXRcet.loc[PDXRcet.iloc[:,1] == 'S'] = 1\n",
+    "PDXRcet = PDXRcet.loc[ls4,:]\n",
+    "Ytscet = PDXRcet['response'].values    \n",
+    "\n",
+    "PDXRerlo = pd.read_csv(\"PDX_response.Erlotinib.tsv\", \n",
+    "                       sep = \"\\t\", index_col=0, decimal = \",\")\n",
+    "PDXRerlo.loc[PDXRerlo.iloc[:,1] == 'R'] = 0\n",
+    "PDXRerlo.loc[PDXRerlo.iloc[:,1] == 'S'] = 1\n",
+    "PDXRerlo = PDXRerlo.loc[ls3,:]\n",
+    "Ytserlo = PDXRerlo['response'].values  \n",
+    "\n",
+    "hdm1 = 32\n",
+    "hdm2 = 16\n",
+    "hdm3 = 256\n",
+    "rate1 = 0.5\n",
+    "rate2 = 0.8\n",
+    "rate3 = 0.5\n",
+    "rate4 = 0.3\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/hnoghabi/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:475: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.\n",
+      "  warnings.warn(msg, DataConversionWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9440556750399118\n",
+      "0.7222222222222222\n",
+      "0.8\n"
+     ]
+    }
+   ],
+   "source": [
+    "scalerGDSC = sk.StandardScaler()\n",
+    "scalerGDSC.fit(GDSCEv2.values)\n",
+    "X_trainE = scalerGDSC.transform(GDSCEv2.values)\n",
+    "X_testEerlo = scalerGDSC.transform(PDXEerlo.values)    \n",
+    "X_testEcet = scalerGDSC.transform(PDXEcet.values)    \n",
+    "\n",
+    "X_trainM = np.nan_to_num(GDSCMv2.values)\n",
+    "X_trainC = np.nan_to_num(GDSCCv2.values)\n",
+    "X_testMerlo = np.nan_to_num(PDXMerlo.values)\n",
+    "X_testCerlo = np.nan_to_num(PDXCerlo.values)\n",
+    "X_testMcet = np.nan_to_num(PDXMcet.values)\n",
+    "X_testCcet = np.nan_to_num(PDXCcet.values)\n",
+    "\n",
+    "TX_testEerlo = torch.FloatTensor(X_testEerlo)\n",
+    "TX_testMerlo = torch.FloatTensor(X_testMerlo)\n",
+    "TX_testCerlo = torch.FloatTensor(X_testCerlo)\n",
+    "ty_testEerlo = torch.FloatTensor(Ytserlo.astype(int))\n",
+    "\n",
+    "TX_testEcet = torch.FloatTensor(X_testEcet)\n",
+    "TX_testMcet = torch.FloatTensor(X_testMcet)\n",
+    "TX_testCcet = torch.FloatTensor(X_testCcet)\n",
+    "ty_testEcet = torch.FloatTensor(Ytscet.astype(int))\n",
+    "\n",
+    "n_sampE, IE_dim = X_trainE.shape\n",
+    "n_sampM, IM_dim = X_trainM.shape\n",
+    "n_sampC, IC_dim = X_trainC.shape\n",
+    "\n",
+    "h_dim1 = hdm1\n",
+    "h_dim2 = hdm2\n",
+    "h_dim3 = hdm3        \n",
+    "Z_in = h_dim1 + h_dim2 + h_dim3\n",
+    "\n",
+    "costtr = []\n",
+    "auctr = []\n",
+    "costts = []\n",
+    "aucts = []\n",
+    "\n",
+    "class AEE(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(AEE, self).__init__()\n",
+    "        self.EnE = torch.nn.Sequential(\n",
+    "            nn.Linear(IE_dim, h_dim1),\n",
+    "            nn.BatchNorm1d(h_dim1),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Dropout(rate1))\n",
+    "    def forward(self, x):\n",
+    "        output = self.EnE(x)\n",
+    "        return output\n",
+    "\n",
+    "class AEM(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(AEM, self).__init__()\n",
+    "        self.EnM = torch.nn.Sequential(\n",
+    "            nn.Linear(IM_dim, h_dim2),\n",
+    "            nn.BatchNorm1d(h_dim2),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Dropout(rate2))\n",
+    "    def forward(self, x):\n",
+    "        output = self.EnM(x)\n",
+    "        return output    \n",
+    "\n",
+    "\n",
+    "class AEC(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(AEC, self).__init__()\n",
+    "        self.EnC = torch.nn.Sequential(\n",
+    "            nn.Linear(IM_dim, h_dim3),\n",
+    "            nn.BatchNorm1d(h_dim3),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Dropout(rate3))\n",
+    "    def forward(self, x):\n",
+    "        output = self.EnC(x)\n",
+    "        return output       \n",
+    "\n",
+    "class Classifier(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Classifier, self).__init__()\n",
+    "        self.FC = torch.nn.Sequential(\n",
+    "            nn.Linear(Z_in, 1),\n",
+    "            nn.Dropout(rate4),\n",
+    "            nn.Sigmoid())\n",
+    "    def forward(self, x):\n",
+    "        return self.FC(x)\n",
+    "\n",
+    "torch.cuda.manual_seed_all(42)\n",
+    "\n",
+    "AutoencoderE = torch.load('EGFRv2Exprs.pt')\n",
+    "AutoencoderM = torch.load('EGFRv2Mut.pt')\n",
+    "AutoencoderC = torch.load('EGFRv2CNA.pt')\n",
+    "\n",
+    "Clas = torch.load('EGFRv2Class.pt')\n",
+    "\n",
+    "AutoencoderE.eval()\n",
+    "AutoencoderM.eval()\n",
+    "AutoencoderC.eval()\n",
+    "Clas.eval()\n",
+    "\n",
+    "ZEX = AutoencoderE(torch.FloatTensor(X_trainE))\n",
+    "ZMX = AutoencoderM(torch.FloatTensor(X_trainM))\n",
+    "ZCX = AutoencoderC(torch.FloatTensor(X_trainC))\n",
+    "ZTX = torch.cat((ZEX, ZMX, ZCX), 1)\n",
+    "ZTX = F.normalize(ZTX, p=2, dim=0)\n",
+    "PredX = Clas(ZTX)\n",
+    "AUCt = roc_auc_score(Y, PredX.detach().numpy())\n",
+    "print(AUCt)\n",
+    "\n",
+    "ZETerlo = AutoencoderE(TX_testEerlo)\n",
+    "ZMTerlo = AutoencoderM(TX_testMerlo)\n",
+    "ZCTerlo = AutoencoderC(TX_testCerlo)\n",
+    "ZTTerlo = torch.cat((ZETerlo, ZMTerlo, ZCTerlo), 1)\n",
+    "ZTTerlo = F.normalize(ZTTerlo, p=2, dim=0)\n",
+    "PredTerlo = Clas(ZTTerlo)\n",
+    "AUCterlo = roc_auc_score(Ytserlo, PredTerlo.detach().numpy())\n",
+    "print(AUCterlo)\n",
+    "\n",
+    "ZETcet = AutoencoderE(TX_testEcet)\n",
+    "ZMTcet = AutoencoderM(TX_testMcet)\n",
+    "ZCTcet = AutoencoderC(TX_testCcet)\n",
+    "ZTTcet = torch.cat((ZETcet, ZMTcet, ZCTcet), 1)\n",
+    "ZTTcet = F.normalize(ZTTcet, p=2, dim=0)\n",
+    "PredTcet = Clas(ZTTcet)\n",
+    "AUCtcet = roc_auc_score(Ytscet, PredTcet.detach().numpy())\n",
+    "print(AUCtcet)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}