--- a +++ b/baseline/Coxph_baseline.py @@ -0,0 +1,52 @@ +""" +Cox-Ph regression. Baseline. +Leon Zheng +""" + +from Coxph_regression import CoxPhRegression +from sklearn.model_selection import GridSearchCV +import preprocessing +import numpy as np + +""" +Feature selection +""" +radiomics_features = ['original_shape_Sphericity', 'original_shape_SurfaceVolumeRatio', + 'original_shape_Maximum3DDiameter', 'original_glcm_JointEntropy', 'original_glcm_Id', + 'original_glcm_Idm'] +clinical_features = ['SourceDataset', 'Nstage'] +features = radiomics_features + clinical_features + +""" +Reading data +""" +# Read clean data +input_train, output_train, input_test = preprocessing.load_owkin_data() +input_train = input_train[features] +input_test = input_test[features] + +# Normalization +input_train, input_test = preprocessing.normalizing_input(input_train, input_test) +print(input_train) +print(output_train) + +""" +Grid search +""" +tuned_params = {"alpha": np.logspace(-8, -3, 10), + "threshold": np.linspace(0.85, 0.95, 10)} +grid_search = GridSearchCV(CoxPhRegression(), tuned_params, cv=5, n_jobs=4) +grid_search.fit(input_train, output_train) +print(grid_search.best_score_) +best_params = grid_search.best_params_ +print(best_params) + +""" +Create submission +""" +# Create submission +print(input_test) +coxph = CoxPhRegression(**best_params) +coxph.fit(input_train, output_train) +y_pred = coxph.predict(input_test) +y_pred.to_csv('submission.csv')