ICU-Admission-Prediction / Git / [59083a] /utils/model.py

Models:
RaymondKing/
ICU-Admission-Prediction
Downloads: 1
[59083a]: / utils / model.py
History
Download this file
116 lines (90 with data), 4.6 kB

import mlflow
import numpy as np
from mlflow.metrics import f1_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import pickle


class MLModelSelector:
    def __init__(self, class_weight='balanced'):
        self.class_weight = class_weight

    def SVM(self, C=1.0, kernel='rbf', **kwargs):
        model = SVC(C=C, kernel=kernel, class_weight=self.class_weight, **kwargs)
        return model

    def logistic_regression(self, C=1.0, solver='lbfgs', max_iter=1000, **kwargs):
        model = LogisticRegression(C=C, solver=solver, max_iter=max_iter, class_weight=self.class_weight, **kwargs)
        return model

    def random_forest(self, n_estimators=100, max_depth=None, **kwargs):
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, class_weight=self.class_weight, **kwargs)
        return model

    def XGBoost(self, n_estimators=100, learning_rate=0.3, **kwargs):
        model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, **kwargs)
        return model


    def train_model(self, X_train, y_train,X_test, y_test, model_name, param_grid=None, **kwargs):
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        y_train = y_train.astype(int)

        preprocess_parameters =kwargs['preprocess_parameters']

        del kwargs['preprocess_parameters']
        if model_name == 'SVM':
            model = self.SVM(**kwargs)
        elif model_name == 'logistic_regression':
            model = self.logistic_regression(**kwargs)
        elif model_name == 'random_forest':
            model = self.random_forest(**kwargs)
        elif model_name == 'XGBoost':
            neg, pos = np.bincount(y_train)
            scale_pos_weight = neg / pos if 'scale_pos_weight' not in kwargs else kwargs['scale_pos_weight']
            kwargs['scale_pos_weight'] = scale_pos_weight
            model = self.XGBoost(**kwargs)
        else:
            raise ValueError(f"Model '{model_name}' not supported")

        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1_weighted')
        grid_search.fit(X_train, y_train)
        # results = grid_search.cv_results_
        # for mean_score, params in zip(results['mean_test_score'], results['params']):
        #     with mlflow.start_run(run_name=f"{model_name}_{params}_{preprocess_parameters}"):
        #         mlflow.log_params(params)
        #         mlflow.log_params(preprocess_parameters)
        #         mlflow.log_metric('f1_weighted_score', mean_score)

        best_params = grid_search.best_params_
        # best_score = grid_search.best_score_
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)

        f1 = f1_score(y_test, y_pred, average='weighted')

        with mlflow.start_run(run_name=f"Best_{model_name}"):
            mlflow.log_params(best_params)
            mlflow.log_params(preprocess_parameters)
            mlflow.log_metric('best_f1_weighted_score', f1)

        return best_params, f1

    def final_model(self, X, y, model_name, **kwargs):

        # with open('model/columns.pkl', 'wb') as f:
        #     pickle.dump(X.columns.tolist(), f)


        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        y = y.astype(int)

        if model_name == 'SVM':
            model = self.SVM(**kwargs)
        elif model_name == 'logistic_regression':
            model = self.logistic_regression(**kwargs)
        elif model_name == 'random_forest':
            model = self.random_forest(**kwargs)
        elif model_name == 'XGBoost':
            neg, pos = np.bincount(y)
            scale_pos_weight = neg / pos if 'scale_pos_weight' not in kwargs else kwargs['scale_pos_weight']
            kwargs['scale_pos_weight'] = scale_pos_weight
            model = self.XGBoost(**kwargs)
        else:
            raise ValueError(f"Model '{model_name}' not supported")

        model.fit(X, y)

        # os.makedirs('models', exist_ok=True)
        # os.makedirs('scalers', exist_ok=True)


        with open(f'model/model.pkl', 'wb') as model_file:
            pickle.dump(model, model_file)
        with open(f'model/scaler.pkl', 'wb') as scaler_file:
            pickle.dump(scaler, scaler_file)