|
a |
|
b/baseline/Coxph_baseline.py |
|
|
1 |
""" |
|
|
2 |
Cox-Ph regression. Baseline. |
|
|
3 |
Leon Zheng |
|
|
4 |
""" |
|
|
5 |
|
|
|
6 |
from Coxph_regression import CoxPhRegression |
|
|
7 |
from sklearn.model_selection import GridSearchCV |
|
|
8 |
import preprocessing |
|
|
9 |
import numpy as np |
|
|
10 |
|
|
|
11 |
""" |
|
|
12 |
Feature selection |
|
|
13 |
""" |
|
|
14 |
radiomics_features = ['original_shape_Sphericity', 'original_shape_SurfaceVolumeRatio', |
|
|
15 |
'original_shape_Maximum3DDiameter', 'original_glcm_JointEntropy', 'original_glcm_Id', |
|
|
16 |
'original_glcm_Idm'] |
|
|
17 |
clinical_features = ['SourceDataset', 'Nstage'] |
|
|
18 |
features = radiomics_features + clinical_features |
|
|
19 |
|
|
|
20 |
""" |
|
|
21 |
Reading data |
|
|
22 |
""" |
|
|
23 |
# Read clean data |
|
|
24 |
input_train, output_train, input_test = preprocessing.load_owkin_data() |
|
|
25 |
input_train = input_train[features] |
|
|
26 |
input_test = input_test[features] |
|
|
27 |
|
|
|
28 |
# Normalization |
|
|
29 |
input_train, input_test = preprocessing.normalizing_input(input_train, input_test) |
|
|
30 |
print(input_train) |
|
|
31 |
print(output_train) |
|
|
32 |
|
|
|
33 |
""" |
|
|
34 |
Grid search |
|
|
35 |
""" |
|
|
36 |
tuned_params = {"alpha": np.logspace(-8, -3, 10), |
|
|
37 |
"threshold": np.linspace(0.85, 0.95, 10)} |
|
|
38 |
grid_search = GridSearchCV(CoxPhRegression(), tuned_params, cv=5, n_jobs=4) |
|
|
39 |
grid_search.fit(input_train, output_train) |
|
|
40 |
print(grid_search.best_score_) |
|
|
41 |
best_params = grid_search.best_params_ |
|
|
42 |
print(best_params) |
|
|
43 |
|
|
|
44 |
""" |
|
|
45 |
Create submission |
|
|
46 |
""" |
|
|
47 |
# Create submission |
|
|
48 |
print(input_test) |
|
|
49 |
coxph = CoxPhRegression(**best_params) |
|
|
50 |
coxph.fit(input_train, output_train) |
|
|
51 |
y_pred = coxph.predict(input_test) |
|
|
52 |
y_pred.to_csv('submission.csv') |