Diff of /linear_cox.ipynb [000000] .. [7b3b0e]

Switch to unified view

a b/linear_cox.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "metadata": {},
6
   "source": [
7
    "#### Packages required\n",
8
    "- numpy==1.14.0\n",
9
    "- matplotlib==3.0.3\n",
10
    "- pandas==0.24.2\n",
11
    "- nibabel==2.5.0\n",
12
    "- scikit-learn\n",
13
    "- scikit-survival 0.12 with ``pip install scikit-survival==0.12``\n",
14
    "- lifelines 0.23.9 with ``pip install lifelines``"
15
   ]
16
  },
17
  {
18
   "cell_type": "code",
19
   "execution_count": 1,
20
   "metadata": {},
21
   "outputs": [],
22
   "source": [
23
    "import sksurv\n",
24
    "import numpy as np\n",
25
    "%matplotlib inline\n",
26
    "from matplotlib import pyplot as plt\n",
27
    "\n",
28
    "from sksurv.linear_model import CoxPHSurvivalAnalysis\n",
29
    "from lifelines.utils import concordance_index\n",
30
    "\n",
31
    "from utils import DataLoader, get_structured_array, run_coxnet"
32
   ]
33
  },
34
  {
35
   "cell_type": "code",
36
   "execution_count": 2,
37
   "metadata": {},
38
   "outputs": [
39
    {
40
     "name": "stdout",
41
     "output_type": "stream",
42
     "text": [
43
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
44
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
45
      "Fold : 0 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n"
46
     ]
47
    },
48
    {
49
     "name": "stderr",
50
     "output_type": "stream",
51
     "text": [
52
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
53
      "  FutureWarning)\n",
54
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
55
      "  FutureWarning)\n",
56
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
57
      "  FutureWarning)\n",
58
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
59
      "  FutureWarning)\n",
60
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
61
      "  FutureWarning)\n",
62
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
63
      "  FutureWarning)\n"
64
     ]
65
    },
66
    {
67
     "name": "stdout",
68
     "output_type": "stream",
69
     "text": [
70
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
71
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
72
      "Fold : 1 || Training set: (74,) | Validation set: (11,) | Test set: (22,)\n"
73
     ]
74
    },
75
    {
76
     "name": "stderr",
77
     "output_type": "stream",
78
     "text": [
79
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
80
      "  FutureWarning)\n",
81
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
82
      "  FutureWarning)\n",
83
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
84
      "  FutureWarning)\n",
85
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
86
      "  FutureWarning)\n",
87
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
88
      "  FutureWarning)\n",
89
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
90
      "  FutureWarning)\n"
91
     ]
92
    },
93
    {
94
     "name": "stdout",
95
     "output_type": "stream",
96
     "text": [
97
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
98
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
99
      "Fold : 2 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
100
     ]
101
    },
102
    {
103
     "name": "stderr",
104
     "output_type": "stream",
105
     "text": [
106
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
107
      "  FutureWarning)\n",
108
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
109
      "  FutureWarning)\n",
110
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
111
      "  FutureWarning)\n",
112
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
113
      "  FutureWarning)\n",
114
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
115
      "  FutureWarning)\n",
116
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
117
      "  FutureWarning)\n"
118
     ]
119
    },
120
    {
121
     "name": "stdout",
122
     "output_type": "stream",
123
     "text": [
124
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
125
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
126
      "Fold : 3 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
127
     ]
128
    },
129
    {
130
     "name": "stderr",
131
     "output_type": "stream",
132
     "text": [
133
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
134
      "  FutureWarning)\n",
135
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
136
      "  FutureWarning)\n",
137
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
138
      "  FutureWarning)\n",
139
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
140
      "  FutureWarning)\n",
141
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
142
      "  FutureWarning)\n",
143
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
144
      "  FutureWarning)\n"
145
     ]
146
    },
147
    {
148
     "name": "stdout",
149
     "output_type": "stream",
150
     "text": [
151
      "Loading data for mode valid from location data/stanford/labels/recurrence_labels/\n",
152
      "Loading data for mode test from location data/stanford/labels/recurrence_labels/\n",
153
      "Fold : 4 || Training set: (75,) | Validation set: (11,) | Test set: (21,)\n"
154
     ]
155
    },
156
    {
157
     "name": "stderr",
158
     "output_type": "stream",
159
     "text": [
160
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
161
      "  FutureWarning)\n",
162
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
163
      "  FutureWarning)\n",
164
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
165
      "  FutureWarning)\n",
166
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
167
      "  FutureWarning)\n",
168
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
169
      "  FutureWarning)\n",
170
      "/home/vs5/.virtualenvs/radiogenomics/lib/python3.5/site-packages/sksurv/linear_model/coxnet.py:211: FutureWarning: The default value of alpha_min_ratio will depend on the sample size relative to the number of features in 0.13. If n_samples > n_features, the current default value 0.0001 will be used. If n_samples < n_features, 0.01 will be used instead.\n",
171
      "  FutureWarning)\n"
172
     ]
173
    }
174
   ],
175
   "source": [
176
    "n_alphas = 100\n",
177
    "l1_ratio = 0.3\n",
178
    "num_genes = 500\n",
179
    "\n",
180
    "all_indices = []\n",
181
    "\n",
182
    "for fold_num in range(5):\n",
183
    "\n",
184
    "    curr_indices = []\n",
185
    "\n",
186
    "    data = DataLoader(fold=fold_num, num_genes=num_genes)\n",
187
    "    y_train = get_structured_array(data.y_train_bool, data.y_train_value)\n",
188
    "    y_valid = get_structured_array(data.y_valid_bool, data.y_valid_value)\n",
189
    "    y_test = get_structured_array(data.y_test_bool, data.y_test_value)\n",
190
    "    print('Fold : ' + str(fold_num) + ' || ' +\n",
191
    "          'Training set: ' + str(y_train.shape) + ' | Validation set: ' + str(y_valid.shape) +\n",
192
    "          ' | Test set: ' + str(y_test.shape))\n",
193
    "\n",
194
    "    ### Genomics\n",
195
    "\n",
196
    "    gen_outputs, gen_scores = run_coxnet(l1_ratio, n_alphas,\n",
197
    "                                         data.gen_train , y_train,\n",
198
    "                                         data.gen_test , y_test)\n",
199
    "\n",
200
    "    ### Pyradiomics\n",
201
    "\n",
202
    "    pyrad_outputs, pyrad_scores = run_coxnet(l1_ratio, n_alphas,\n",
203
    "                                             data.pyrad_train , y_train,\n",
204
    "                                             data.pyrad_test , y_test)\n",
205
    "    \n",
206
    "    ### Densenet\n",
207
    "\n",
208
    "    dense_outputs, dense_scores = run_coxnet(l1_ratio, n_alphas,\n",
209
    "                                             data.dense_train , y_train,\n",
210
    "                                             data.dense_test , y_test)\n",
211
    "\n",
212
    "    ### Genomics-PyRadiomics\n",
213
    "\n",
214
    "    feat1_train = np.concatenate((data.gen_train , \n",
215
    "                                 data.pyrad_train ), axis=1)\n",
216
    "    feat1_test  = np.concatenate((data.gen_test , \n",
217
    "                                 data.pyrad_test ), axis=1)\n",
218
    "\n",
219
    "    feat1_outputs, feat1_score = run_coxnet(l1_ratio, n_alphas,\n",
220
    "                                           feat1_train, y_train,\n",
221
    "                                           feat1_test, y_test)\n",
222
    "    feat1_hat_score = concordance_index(data.y_test_value ,\n",
223
    "                                        - gen_outputs - pyrad_outputs,\n",
224
    "                                        data.y_test_bool )\n",
225
    "    ### Genomics-DenseNet\n",
226
    " \n",
227
    "    feat2_train = np.concatenate((data.gen_train , \n",
228
    "                                 data.dense_train ), axis=1)\n",
229
    "    feat2_test  = np.concatenate((data.gen_test , \n",
230
    "                                 data.dense_test ), axis=1)\n",
231
    "\n",
232
    "    feat2_outputs, feat2_score = run_coxnet(l1_ratio, n_alphas,\n",
233
    "                                           feat2_train, y_train,\n",
234
    "                                           feat2_test, y_test)\n",
235
    "    feat2_hat_score = concordance_index(data.y_test_value ,\n",
236
    "                                        - gen_outputs - dense_outputs,\n",
237
    "                                        data.y_test_bool )\n",
238
    "   \n",
239
    "    ### Genomics-PyRadiomics-DenseNet\n",
240
    "\n",
241
    " \n",
242
    "    feat3_train = np.concatenate((data.gen_train , \n",
243
    "                                 data.pyrad_train ,\n",
244
    "                                 data.dense_train ), axis=1)\n",
245
    "    feat3_test  = np.concatenate((data.gen_test , \n",
246
    "                                 data.pyrad_test ,\n",
247
    "                                 data.dense_test ), axis=1)\n",
248
    "\n",
249
    "    feat3_outputs, feat3_score = run_coxnet(l1_ratio, n_alphas,\n",
250
    "                                           feat3_train, y_train,\n",
251
    "                                           feat3_test, y_test)\n",
252
    "    feat3_hat_score = concordance_index(data.y_test_value ,\n",
253
    "                                        - gen_outputs - pyrad_outputs - dense_outputs,\n",
254
    "                                        data.y_test_bool )\n",
255
    "\n",
256
    "\n",
257
    "    curr_indices = [gen_scores, pyrad_scores, dense_scores,\n",
258
    "                    feat1_score, feat1_hat_score,\n",
259
    "                    feat2_score, feat2_hat_score,\n",
260
    "                    feat3_score, feat3_hat_score]\n",
261
    "    all_indices.append(curr_indices)"
262
   ]
263
  },
264
  {
265
   "cell_type": "code",
266
   "execution_count": 3,
267
   "metadata": {},
268
   "outputs": [],
269
   "source": [
270
    "methods = ['genomics \\t ', 'pyradiomics \\t', 'densenet \\t',\n",
271
    "           'gen-pyrad (ef) \\t', 'gen-pyrad (lf) \\t', 'gen-dense (ef) \\t', 'gen-dense (lf) \\t',\n",
272
    "           'gen-pyrad-dense (ef)', 'gen-pyrad-dense (lf)',]"
273
   ]
274
  },
275
  {
276
   "cell_type": "code",
277
   "execution_count": 4,
278
   "metadata": {},
279
   "outputs": [
280
    {
281
     "name": "stdout",
282
     "output_type": "stream",
283
     "text": [
284
      "method \t\t \t|fd 1|fd 2|fd 3|fd 4|fd 5\n",
285
      "-------------------------------------------------\n",
286
      "genomics \t \t|0.52|0.45|0.60|0.72|0.75\n",
287
      "pyradiomics \t\t|0.46|0.39|0.30|0.37|0.80\n",
288
      "densenet \t\t|0.53|0.49|0.44|0.65|0.52\n",
289
      "gen-pyrad (ef) \t\t|0.41|0.36|0.46|0.57|0.81\n",
290
      "gen-pyrad (lf) \t\t|0.41|0.42|0.49|0.60|0.81\n",
291
      "gen-dense (ef) \t\t|0.53|0.46|0.60|0.80|0.77\n",
292
      "gen-dense (lf) \t\t|0.49|0.50|0.55|0.82|0.59\n",
293
      "gen-pyrad-dense (ef)\t|0.43|0.37|0.47|0.61|0.82\n",
294
      "gen-pyrad-dense (lf)\t|0.46|0.46|0.51|0.81|0.60\n"
295
     ]
296
    }
297
   ],
298
   "source": [
299
    "print('method \\t\\t \\t|fd 1|fd 2|fd 3|fd 4|fd 5')\n",
300
    "print('-------------------------------------------------')\n",
301
    "for y in range(len(all_indices[0])):s\n",
302
    "    print(methods[y]  + '\\t|' + '|'.join([\"{0:.2f}\".format(round(x[y], 2)) for x in all_indices]))"
303
   ]
304
  },
305
  {
306
   "cell_type": "code",
307
   "execution_count": null,
308
   "metadata": {},
309
   "outputs": [],
310
   "source": []
311
  }
312
 ],
313
 "metadata": {
314
  "kernelspec": {
315
   "display_name": "Python3 (radiogenomics)",
316
   "language": "python",
317
   "name": "radiogenomics"
318
  },
319
  "language_info": {
320
   "codemirror_mode": {
321
    "name": "ipython",
322
    "version": 3
323
   },
324
   "file_extension": ".py",
325
   "mimetype": "text/x-python",
326
   "name": "python",
327
   "nbconvert_exporter": "python",
328
   "pygments_lexer": "ipython3",
329
   "version": "3.5.2"
330
  }
331
 },
332
 "nbformat": 4,
333
 "nbformat_minor": 2
334
}