In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, make_scorer
from preprocessing import preprocessing, get_train_and_test, standardize_features

import warnings
warnings.filterwarnings('ignore')

x_values, y_values = preprocessing()
x_train, x_test, y_train, y_test = get_train_and_test(x_values, y_values)
x_train, x_test = standardize_features(x_train, x_test)

param_grid = {
    'kernel': ['rbf', 'sigmoid'],
    'C': np.logspace(-4, 2, num=30),
    'gamma':np.logspace(-4, 2, num=30),
    'random_state':[0]
}
grid = GridSearchCV(svm.SVC(), param_grid, scoring=make_scorer(roc_auc_score), cv=10)
grid.fit(x_train, y_train)

df = pd.DataFrame(grid.cv_results_)
cols_to_keep = ['param_' + k for k in param_grid.keys()] +\
               ["mean_test_score", "mean_train_score", "std_test_score", "std_train_score","params"] 
df_output = df[cols_to_keep].fillna('-')
df_output.sort_values(by=["mean_test_score"], ascending=False)

Unnamed: 0,param_kernel,param_C,param_gamma,param_random_state,mean_test_score,mean_train_score,std_test_score,std_train_score,params
1467,sigmoid,9.236709,0.048939,0,0.704950,0.676351,0.051847,0.021118,"{'C': 9.236708571873866, 'gamma': 0.0489390091..."
1527,sigmoid,14.873521,0.048939,0,0.697460,0.663911,0.043561,0.018025,"{'C': 14.873521072935118, 'gamma': 0.048939009..."
1407,sigmoid,5.736153,0.048939,0,0.691374,0.676547,0.064586,0.017041,"{'C': 5.736152510448681, 'gamma': 0.0489390091..."
1770,rbf,100.000000,0.126896,0,0.690550,0.934506,0.066089,0.010535,"{'C': 100.0, 'gamma': 0.12689610031679222, 'ke..."
1231,sigmoid,1.373824,0.126896,0,0.685510,0.620721,0.051742,0.019407,"{'C': 1.3738237958832638, 'gamma': 0.126896100..."
1703,sigmoid,62.101694,0.018874,0,0.683580,0.691920,0.055916,0.013977,"{'C': 62.10169418915616, 'gamma': 0.0188739182..."
1649,sigmoid,38.566204,0.078805,0,0.683087,0.625302,0.031914,0.020393,"{'C': 38.56620421163472, 'gamma': 0.0788046281..."
1763,sigmoid,100.000000,0.018874,0,0.683057,0.688661,0.061643,0.012275,"{'C': 100.0, 'gamma': 0.018873918221350976, 'k..."
1594,rbf,23.950266,0.329034,0,0.682593,0.964882,0.089643,0.008720,"{'C': 23.95026619987486, 'gamma': 0.3290344562..."
1772,rbf,100.000000,0.204336,0,0.682259,0.973359,0.077046,0.005554,"{'C': 100.0, 'gamma': 0.20433597178569418, 'ke..."


In [2]:
df_output = df[cols_to_keep].fillna('-')
best_params = df_output.sort_values(by=["mean_test_score"], ascending=False)['params'].iloc[0]
print(best_params)

clf = svm.SVC()
clf.set_params(**best_params)
clf.fit(x_train, y_train)

roc_auc_score(y_train, clf.predict(x_train)), roc_auc_score(y_test, clf.predict(x_test))

{'C': 9.236708571873866, 'gamma': 0.04893900918477494, 'kernel': 'sigmoid', 'random_state': 0}


(0.6427479855515422, 0.6607748184019371)