multimodal-recurrence / Git / Diff of /linear

Models:
RichardZick/
multimodal-recurrence
Downloads: 1
Diff of /linear_cox.ipynb [000000] .. [7b3b0e]
Switch to side-by-side view

--- a
+++ b/linear_cox.ipynb
@@ -0,0 +1,334 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Packages required\n",
+    "- numpy==1.14.0\n",
+    "- matplotlib==3.0.3\n",
+    "- pandas==0.24.2\n",
+    "- nibabel==2.5.0\n",
+    "- scikit-learn\n",
+    "- scikit-survival 0.12 with ``pip install scikit-survival==0.12``\n",
+    "- lifelines 0.23.9 with ``pip install lifelines``"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sksurv\n",
+    "import numpy as np\n",
+    "%matplotlib inline\n",
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "from sksurv.linear_model import CoxPHSurvivalAnalysis\n",
+    "from lifelines.utils import concordance_index\n",
+    "\n",
+    "from utils import DataLoader, get_structured_array, run_coxnet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
+      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
+      "Fold : 0 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
+      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
+      "Fold : 1 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
+      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
+      "Fold : 2 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
+      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
+      "Fold : 3 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
+      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
+      "Fold : 4 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n",
+      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
+      "  FutureWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "n_alphas = 100\n",
+    "l1_ratio = 0.3\n",
+    "num_genes = 500\n",
+    "\n",
+    "all_indices = []\n",
+    "\n",
+    "for fold_num in range(5):\n",
+    "\n",
+    "    curr_indices = []\n",
+    "\n",
+    "    data = DataLoader(fold=fold_num, num_genes=num_genes)\n",
+    "    y_train = get_structured_array(data.y_train_bool, data.y_train_value)\n",
+    "    y_valid = get_structured_array(data.y_valid_bool, data.y_valid_value)\n",
+    "    y_test = get_structured_array(data.y_test_bool, data.y_test_value)\n",
+    "    print('Fold : ' + str(fold_num) + ' || ' +\n",
+    "          'Training set: ' + str(y_train.shape) + ' | Validation set: ' + str(y_valid.shape) +\n",
+    "          ' | Test set: ' + str(y_test.shape))\n",
+    "\n",
+    "    ### Genomics\n",
+    "\n",
+    "    gen_outputs, gen_scores = run_coxnet(l1_ratio, n_alphas,\n",
+    "                                         data.gen_train , y_train,\n",
+    "                                         data.gen_test , y_test)\n",
+    "\n",
+    "    ### Pyradiomics\n",
+    "\n",
+    "    pyrad_outputs, pyrad_scores = run_coxnet(l1_ratio, n_alphas,\n",
+    "                                             data.pyrad_train , y_train,\n",
+    "                                             data.pyrad_test , y_test)\n",
+    "    \n",
+    "    ### Densenet\n",
+    "\n",
+    "    dense_outputs, dense_scores = run_coxnet(l1_ratio, n_alphas,\n",
+    "                                             data.dense_train , y_train,\n",
+    "                                             data.dense_test , y_test)\n",
+    "\n",
+    "    ### Genomics-PyRadiomics\n",
+    "\n",
+    "    feat1_train = np.concatenate((data.gen_train , \n",
+    "                                 data.pyrad_train ), axis=1)\n",
+    "    feat1_test  = np.concatenate((data.gen_test , \n",
+    "                                 data.pyrad_test ), axis=1)\n",
+    "\n",
+    "    feat1_outputs, feat1_score = run_coxnet(l1_ratio, n_alphas,\n",
+    "                                           feat1_train, y_train,\n",
+    "                                           feat1_test, y_test)\n",
+    "    feat1_hat_score = concordance_index(data.y_test_value ,\n",
+    "                                        - gen_outputs - pyrad_outputs,\n",
+    "                                        data.y_test_bool )\n",
+    "    ### Genomics-DenseNet\n",
+    " \n",
+    "    feat2_train = np.concatenate((data.gen_train , \n",
+    "                                 data.dense_train ), axis=1)\n",
+    "    feat2_test  = np.concatenate((data.gen_test , \n",
+    "                                 data.dense_test ), axis=1)\n",
+    "\n",
+    "    feat2_outputs, feat2_score = run_coxnet(l1_ratio, n_alphas,\n",
+    "                                           feat2_train, y_train,\n",
+    "                                           feat2_test, y_test)\n",
+    "    feat2_hat_score = concordance_index(data.y_test_value ,\n",
+    "                                        - gen_outputs - dense_outputs,\n",
+    "                                        data.y_test_bool )\n",
+    "   \n",
+    "    ### Genomics-PyRadiomics-DenseNet\n",
+    "\n",
+    " \n",
+    "    feat3_train = np.concatenate((data.gen_train , \n",
+    "                                 data.pyrad_train ,\n",
+    "                                 data.dense_train ), axis=1)\n",
+    "    feat3_test  = np.concatenate((data.gen_test , \n",
+    "                                 data.pyrad_test ,\n",
+    "                                 data.dense_test ), axis=1)\n",
+    "\n",
+    "    feat3_outputs, feat3_score = run_coxnet(l1_ratio, n_alphas,\n",
+    "                                           feat3_train, y_train,\n",
+    "                                           feat3_test, y_test)\n",
+    "    feat3_hat_score = concordance_index(data.y_test_value ,\n",
+    "                                        - gen_outputs - pyrad_outputs - dense_outputs,\n",
+    "                                        data.y_test_bool )\n",
+    "\n",
+    "\n",
+    "    curr_indices = [gen_scores, pyrad_scores, dense_scores,\n",
+    "                    feat1_score, feat1_hat_score,\n",
+    "                    feat2_score, feat2_hat_score,\n",
+    "                    feat3_score, feat3_hat_score]\n",
+    "    all_indices.append(curr_indices)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "methods = ['genomics \\t ', 'pyradiomics \\t', 'densenet \\t',\n",
+    "           'gen-pyrad (ef) \\t', 'gen-pyrad (lf) \\t', 'gen-dense (ef) \\t', 'gen-dense (lf) \\t',\n",
+    "           'gen-pyrad-dense (ef)', 'gen-pyrad-dense (lf)',]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "method \t\t \t|fd 1|fd 2|fd 3|fd 4|fd 5\n",
+      "-------------------------------------------------\n",
+      "genomics \t \t|0.52|0.45|0.60|0.72|0.75\n",
+      "pyradiomics \t\t|0.46|0.39|0.30|0.37|0.80\n",
+      "densenet \t\t|0.53|0.49|0.44|0.65|0.52\n",
+      "gen-pyrad (ef) \t\t|0.41|0.36|0.46|0.57|0.81\n",
+      "gen-pyrad (lf) \t\t|0.41|0.42|0.49|0.60|0.81\n",
+      "gen-dense (ef) \t\t|0.53|0.46|0.60|0.80|0.77\n",
+      "gen-dense (lf) \t\t|0.49|0.50|0.55|0.82|0.59\n",
+      "gen-pyrad-dense (ef)\t|0.43|0.37|0.47|0.61|0.82\n",
+      "gen-pyrad-dense (lf)\t|0.46|0.46|0.51|0.81|0.60\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('method \\t\\t \\t|fd 1|fd 2|fd 3|fd 4|fd 5')\n",
+    "print('-------------------------------------------------')\n",
+    "for y in range(len(all_indices[0])):s\n",
+    "    print(methods[y]  + '\\t|' + '|'.join([\"{0:.2f}\".format(round(x[y], 2)) for x in all_indices]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python3 (radiogenomics)",
+   "language": "python",
+   "name": "radiogenomics"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}