DIHI_adult_decomp / Git / [1d802c] /Code/Model/v1.0/run

Models:

RaymondKing/

DIHI_adult_decomp

Downloads: 1

[1d802c]: / Code / Model / v1.0 / run_xgboost.py

History

Download this file

43 lines (37 with data), 1.6 kB

import sqlite3
import pandas as pd
import numpy as np
from model_utils import get_data
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

def main():
    X_train_rus, y_train_rus, X_test, y_test = get_data()

    clf_xgb = xgb.XGBClassifier()
    '''
    parameters = {'eta': [0.3],
                  'max_depth': [8],
                  'min_child_weight': [5],
                  'gamma': [0.2],
                  'colsample_bytree': [1],
                  'objective': ['binary:logistic']}
    '''
    parameters = {'eta': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35],
                  'max_depth': [3, 4, 5, 6, 7, 8, 10, 12],
                  'min_child_weight': [1, 3, 5, 7],
                  'gamma': [0, 0.1, 0.2, 0.3, 0.4],
                  'colsample_bytree': [0.5, 0.7, 1],
                  'objective': ['binary:logistic']}
    
    grid = GridSearchCV(clf_xgb, parameters, n_jobs=4, scoring='average_precision', cv=2, verbose=5)
    best_params = grid.fit(X_train_rus, y_train_rus).best_params_

    D_train = xgb.DMatrix(X_train_rus, label=y_train_rus)
    D_test = xgb.DMatrix(X_test, label=y_test)
    clf_xgb = xgb.train(best_params, D_train, 20)
    y_score = clf_xgb.predict(D_test)
    #y_hat = np.asarray([np.argmax(line) for line in y_score])
    y_hat = y_score.copy()
    y_hat[y_hat<0.5]=0
    y_hat[y_hat>=0.5]=1

    xgb_scores = pd.DataFrame({'y_true': y_test, 'y_hat': y_hat, 'y_score': y_score})
    xgb_scores.to_csv('../../Data/Modeling/Output/scores/xgb_scores.csv')

if __name__ == '__main__':
    main()