--- a +++ b/linear_cox.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Packages required\n", + "- numpy==1.14.0\n", + "- matplotlib==3.0.3\n", + "- pandas==0.24.2\n", + "- nibabel==2.5.0\n", + "- scikit-learn\n", + "- scikit-survival 0.12 with ``pip install scikit-survival==0.12``\n", + "- lifelines 0.23.9 with ``pip install lifelines``" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sksurv\n", + "import numpy as np\n", + "%matplotlib inline\n", + "from matplotlib import pyplot as plt\n", + "\n", + "from sksurv.linear_model import CoxPHSurvivalAnalysis\n", + "from lifelines.utils import concordance_index\n", + "\n", + "from utils import DataLoader, get_structured_array, run_coxnet" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n", + "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n", + "Fold : 0 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n", + "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n", + "Fold : 1 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n", + "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n", + "Fold : 2 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n", + "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n", + "Fold : 3 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n", + "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n", + "Fold : 4 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n", + "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n", + " FutureWarning)\n" + ] + } + ], + "source": [ + "n_alphas = 100\n", + "l1_ratio = 0.3\n", + "num_genes = 500\n", + "\n", + "all_indices = []\n", + "\n", + "for fold_num in range(5):\n", + "\n", + " curr_indices = []\n", + "\n", + " data = DataLoader(fold=fold_num, num_genes=num_genes)\n", + " y_train = get_structured_array(data.y_train_bool, data.y_train_value)\n", + " y_valid = get_structured_array(data.y_valid_bool, data.y_valid_value)\n", + " y_test = get_structured_array(data.y_test_bool, data.y_test_value)\n", + " print('Fold : ' + str(fold_num) + ' || ' +\n", + " 'Training set: ' + str(y_train.shape) + ' | Validation set: ' + str(y_valid.shape) +\n", + " ' | Test set: ' + str(y_test.shape))\n", + "\n", + " ### Genomics\n", + "\n", + " gen_outputs, gen_scores = run_coxnet(l1_ratio, n_alphas,\n", + " data.gen_train , y_train,\n", + " data.gen_test , y_test)\n", + "\n", + " ### Pyradiomics\n", + "\n", + " pyrad_outputs, pyrad_scores = run_coxnet(l1_ratio, n_alphas,\n", + " data.pyrad_train , y_train,\n", + " data.pyrad_test , y_test)\n", + " \n", + " ### Densenet\n", + "\n", + " dense_outputs, dense_scores = run_coxnet(l1_ratio, n_alphas,\n", + " data.dense_train , y_train,\n", + " data.dense_test , y_test)\n", + "\n", + " ### Genomics-PyRadiomics\n", + "\n", + " feat1_train = np.concatenate((data.gen_train , \n", + " data.pyrad_train ), axis=1)\n", + " feat1_test = np.concatenate((data.gen_test , \n", + " data.pyrad_test ), axis=1)\n", + "\n", + " feat1_outputs, feat1_score = run_coxnet(l1_ratio, n_alphas,\n", + " feat1_train, y_train,\n", + " feat1_test, y_test)\n", + " feat1_hat_score = concordance_index(data.y_test_value ,\n", + " - gen_outputs - pyrad_outputs,\n", + " data.y_test_bool )\n", + " ### Genomics-DenseNet\n", + " \n", + " feat2_train = np.concatenate((data.gen_train , \n", + " data.dense_train ), axis=1)\n", + " feat2_test = np.concatenate((data.gen_test , \n", + " data.dense_test ), axis=1)\n", + "\n", + " feat2_outputs, feat2_score = run_coxnet(l1_ratio, n_alphas,\n", + " feat2_train, y_train,\n", + " feat2_test, y_test)\n", + " feat2_hat_score = concordance_index(data.y_test_value ,\n", + " - gen_outputs - dense_outputs,\n", + " data.y_test_bool )\n", + " \n", + " ### Genomics-PyRadiomics-DenseNet\n", + "\n", + " \n", + " feat3_train = np.concatenate((data.gen_train , \n", + " data.pyrad_train ,\n", + " data.dense_train ), axis=1)\n", + " feat3_test = np.concatenate((data.gen_test , \n", + " data.pyrad_test ,\n", + " data.dense_test ), axis=1)\n", + "\n", + " feat3_outputs, feat3_score = run_coxnet(l1_ratio, n_alphas,\n", + " feat3_train, y_train,\n", + " feat3_test, y_test)\n", + " feat3_hat_score = concordance_index(data.y_test_value ,\n", + " - gen_outputs - pyrad_outputs - dense_outputs,\n", + " data.y_test_bool )\n", + "\n", + "\n", + " curr_indices = [gen_scores, pyrad_scores, dense_scores,\n", + " feat1_score, feat1_hat_score,\n", + " feat2_score, feat2_hat_score,\n", + " feat3_score, feat3_hat_score]\n", + " all_indices.append(curr_indices)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "methods = ['genomics \\t ', 'pyradiomics \\t', 'densenet \\t',\n", + " 'gen-pyrad (ef) \\t', 'gen-pyrad (lf) \\t', 'gen-dense (ef) \\t', 'gen-dense (lf) \\t',\n", + " 'gen-pyrad-dense (ef)', 'gen-pyrad-dense (lf)',]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "method \t\t \t|fd 1|fd 2|fd 3|fd 4|fd 5\n", + "-------------------------------------------------\n", + "genomics \t \t|0.52|0.45|0.60|0.72|0.75\n", + "pyradiomics \t\t|0.46|0.39|0.30|0.37|0.80\n", + "densenet \t\t|0.53|0.49|0.44|0.65|0.52\n", + "gen-pyrad (ef) \t\t|0.41|0.36|0.46|0.57|0.81\n", + "gen-pyrad (lf) \t\t|0.41|0.42|0.49|0.60|0.81\n", + "gen-dense (ef) \t\t|0.53|0.46|0.60|0.80|0.77\n", + "gen-dense (lf) \t\t|0.49|0.50|0.55|0.82|0.59\n", + "gen-pyrad-dense (ef)\t|0.43|0.37|0.47|0.61|0.82\n", + "gen-pyrad-dense (lf)\t|0.46|0.46|0.51|0.81|0.60\n" + ] + } + ], + "source": [ + "print('method \\t\\t \\t|fd 1|fd 2|fd 3|fd 4|fd 5')\n", + "print('-------------------------------------------------')\n", + "for y in range(len(all_indices[0])):s\n", + " print(methods[y] + '\\t|' + '|'.join([\"{0:.2f}\".format(round(x[y], 2)) for x in all_indices]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python3 (radiogenomics)", + "language": "python", + "name": "radiogenomics" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}