[7b3b0e]: / linear_cox.ipynb

Download this file

335 lines (334 with data), 21.3 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Packages required\n",
    "- numpy==1.14.0\n",
    "- matplotlib==3.0.3\n",
    "- pandas==0.24.2\n",
    "- nibabel==2.5.0\n",
    "- scikit-learn\n",
    "- scikit-survival 0.12 with ``pip install scikit-survival==0.12``\n",
    "- lifelines 0.23.9 with ``pip install lifelines``"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sksurv\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "from sksurv.linear_model import CoxPHSurvivalAnalysis\n",
    "from lifelines.utils import concordance_index\n",
    "\n",
    "from utils import DataLoader, get_structured_array, run_coxnet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
      "Fold : 0 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
      "Fold : 1 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
      "Fold : 2 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
      "Fold : 3 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
      "Fold : 4 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n",
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "n_alphas = 100\n",
    "l1_ratio = 0.3\n",
    "num_genes = 500\n",
    "\n",
    "all_indices = []\n",
    "\n",
    "for fold_num in range(5):\n",
    "\n",
    "    curr_indices = []\n",
    "\n",
    "    data = DataLoader(fold=fold_num, num_genes=num_genes)\n",
    "    y_train = get_structured_array(data.y_train_bool, data.y_train_value)\n",
    "    y_valid = get_structured_array(data.y_valid_bool, data.y_valid_value)\n",
    "    y_test = get_structured_array(data.y_test_bool, data.y_test_value)\n",
    "    print('Fold : ' + str(fold_num) + ' || ' +\n",
    "          'Training set: ' + str(y_train.shape) + ' | Validation set: ' + str(y_valid.shape) +\n",
    "          ' | Test set: ' + str(y_test.shape))\n",
    "\n",
    "    ### Genomics\n",
    "\n",
    "    gen_outputs, gen_scores = run_coxnet(l1_ratio, n_alphas,\n",
    "                                         data.gen_train , y_train,\n",
    "                                         data.gen_test , y_test)\n",
    "\n",
    "    ### Pyradiomics\n",
    "\n",
    "    pyrad_outputs, pyrad_scores = run_coxnet(l1_ratio, n_alphas,\n",
    "                                             data.pyrad_train , y_train,\n",
    "                                             data.pyrad_test , y_test)\n",
    "    \n",
    "    ### Densenet\n",
    "\n",
    "    dense_outputs, dense_scores = run_coxnet(l1_ratio, n_alphas,\n",
    "                                             data.dense_train , y_train,\n",
    "                                             data.dense_test , y_test)\n",
    "\n",
    "    ### Genomics-PyRadiomics\n",
    "\n",
    "    feat1_train = np.concatenate((data.gen_train , \n",
    "                                 data.pyrad_train ), axis=1)\n",
    "    feat1_test  = np.concatenate((data.gen_test , \n",
    "                                 data.pyrad_test ), axis=1)\n",
    "\n",
    "    feat1_outputs, feat1_score = run_coxnet(l1_ratio, n_alphas,\n",
    "                                           feat1_train, y_train,\n",
    "                                           feat1_test, y_test)\n",
    "    feat1_hat_score = concordance_index(data.y_test_value ,\n",
    "                                        - gen_outputs - pyrad_outputs,\n",
    "                                        data.y_test_bool )\n",
    "    ### Genomics-DenseNet\n",
    " \n",
    "    feat2_train = np.concatenate((data.gen_train , \n",
    "                                 data.dense_train ), axis=1)\n",
    "    feat2_test  = np.concatenate((data.gen_test , \n",
    "                                 data.dense_test ), axis=1)\n",
    "\n",
    "    feat2_outputs, feat2_score = run_coxnet(l1_ratio, n_alphas,\n",
    "                                           feat2_train, y_train,\n",
    "                                           feat2_test, y_test)\n",
    "    feat2_hat_score = concordance_index(data.y_test_value ,\n",
    "                                        - gen_outputs - dense_outputs,\n",
    "                                        data.y_test_bool )\n",
    "   \n",
    "    ### Genomics-PyRadiomics-DenseNet\n",
    "\n",
    " \n",
    "    feat3_train = np.concatenate((data.gen_train , \n",
    "                                 data.pyrad_train ,\n",
    "                                 data.dense_train ), axis=1)\n",
    "    feat3_test  = np.concatenate((data.gen_test , \n",
    "                                 data.pyrad_test ,\n",
    "                                 data.dense_test ), axis=1)\n",
    "\n",
    "    feat3_outputs, feat3_score = run_coxnet(l1_ratio, n_alphas,\n",
    "                                           feat3_train, y_train,\n",
    "                                           feat3_test, y_test)\n",
    "    feat3_hat_score = concordance_index(data.y_test_value ,\n",
    "                                        - gen_outputs - pyrad_outputs - dense_outputs,\n",
    "                                        data.y_test_bool )\n",
    "\n",
    "\n",
    "    curr_indices = [gen_scores, pyrad_scores, dense_scores,\n",
    "                    feat1_score, feat1_hat_score,\n",
    "                    feat2_score, feat2_hat_score,\n",
    "                    feat3_score, feat3_hat_score]\n",
    "    all_indices.append(curr_indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "methods = ['genomics \\t ', 'pyradiomics \\t', 'densenet \\t',\n",
    "           'gen-pyrad (ef) \\t', 'gen-pyrad (lf) \\t', 'gen-dense (ef) \\t', 'gen-dense (lf) \\t',\n",
    "           'gen-pyrad-dense (ef)', 'gen-pyrad-dense (lf)',]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "method \t\t \t|fd 1|fd 2|fd 3|fd 4|fd 5\n",
      "-------------------------------------------------\n",
      "genomics \t \t|0.52|0.45|0.60|0.72|0.75\n",
      "pyradiomics \t\t|0.46|0.39|0.30|0.37|0.80\n",
      "densenet \t\t|0.53|0.49|0.44|0.65|0.52\n",
      "gen-pyrad (ef) \t\t|0.41|0.36|0.46|0.57|0.81\n",
      "gen-pyrad (lf) \t\t|0.41|0.42|0.49|0.60|0.81\n",
      "gen-dense (ef) \t\t|0.53|0.46|0.60|0.80|0.77\n",
      "gen-dense (lf) \t\t|0.49|0.50|0.55|0.82|0.59\n",
      "gen-pyrad-dense (ef)\t|0.43|0.37|0.47|0.61|0.82\n",
      "gen-pyrad-dense (lf)\t|0.46|0.46|0.51|0.81|0.60\n"
     ]
    }
   ],
   "source": [
    "print('method \\t\\t \\t|fd 1|fd 2|fd 3|fd 4|fd 5')\n",
    "print('-------------------------------------------------')\n",
    "for y in range(len(all_indices[0])):s\n",
    "    print(methods[y]  + '\\t|' + '|'.join([\"{0:.2f}\".format(round(x[y], 2)) for x in all_indices]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python3 (radiogenomics)",
   "language": "python",
   "name": "radiogenomics"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}