covid-ehr-benchmarks / Git / [d6904d] /app/apis/ml_outcome

Models:
philipB/
covid-ehr-benchmarks
Downloads: 1
[d6904d]: / app / apis / ml_outcome_pipeline.py
History
Download this file
296 lines (279 with data), 11.0 kB

import math
import pathlib
import pickle
import random

import numpy as np
import pandas as pd
import torch
import xgboost as xgb
from catboost import CatBoostClassifier, CatBoostRegressor
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    KFold,
    StratifiedKFold,
    StratifiedShuffleSplit,
    train_test_split,
)
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from app.core.evaluation import covid_metrics, eval_metrics
from app.core.utils import init_random
from app.datasets import get_dataset, load_data
from app.datasets.dl import Dataset
from app.datasets.ml import flatten_dataset, numpy_dataset
from app.models import (
    build_model_from_cfg,
    get_multi_task_loss,
    predict_all_visits_bce_loss,
    predict_all_visits_mse_loss,
)
from app.utils import perflog


def train(x, y, method, cfg, seed=42):
    y = y[:, 0]

    if method == "xgboost":
        model = xgb.XGBClassifier(
            objective="binary:logistic",
            eval_metric="aucpr",
            verbosity=0,
            learning_rate=cfg.learning_rate,
            max_depth=cfg.max_depth,
            min_child_weight=cfg.min_child_weight,
            n_estimators=1000,
            use_label_encoder=False,
            random_state=seed,
        )
        model.fit(x, y)
    elif method == "gbdt":
        method = GradientBoostingClassifier(
            random_state=seed,
            learning_rate=cfg.learning_rate,
            n_estimators=cfg.n_estimators,
            subsample=cfg.subsample,
        )
        model = method.fit(x, y)
    elif method == "random_forest":
        method = RandomForestClassifier(
            random_state=seed,
            max_depth=cfg.max_depth,
            min_samples_split=cfg.min_samples_split,
            n_estimators=cfg.n_estimators,
        )
        model = method.fit(x, y)
    elif method == "decision_tree":
        model = DecisionTreeClassifier(random_state=seed, max_depth=cfg.max_depth)
        model.fit(x, y)
    elif method == "catboost":
        model = CatBoostClassifier(
            random_seed=seed,
            iterations=cfg.iterations,  # performance is better when iterations = 100
            learning_rate=cfg.learning_rate,
            depth=cfg.depth,
            verbose=None,
            silent=True,
            allow_writing_files=False,
            loss_function="CrossEntropy",
        )
        model.fit(x, y)
    return model


def validate(x, y, len_list, model, cfg):
    """val/test"""
    y_outcome_pred = model.predict(x)
    y_outcome_true = y[:, 0]
    evaluation_scores = eval_metrics.print_metrics_binary(
        y_outcome_true, y_outcome_pred, verbose=0
    )
    early_prediction_score = covid_metrics.early_prediction_outcome_metric(
        y, y_outcome_pred, len_list, cfg.thresholds, verbose=0
    )
    evaluation_scores["early_prediction_score"] = early_prediction_score
    return evaluation_scores


def start_pipeline(cfg):
    dataset_type, mode, method, num_folds, train_fold = (
        cfg.dataset,
        cfg.mode,
        cfg.model,
        cfg.num_folds,
        cfg.train_fold,
    )
    # Load data
    x, y, x_lab_length = load_data(dataset_type)
    x, y_outcome, y_los, x_lab_length = numpy_dataset(x, y, x_lab_length)

    all_history = {}
    test_performance = {
        "test_accuracy": [],
        "test_auroc": [],
        "test_auprc": [],
        "test_early_prediction_score": [],
    }

    kfold_test = StratifiedKFold(
        n_splits=num_folds, shuffle=True, random_state=cfg.dataset_split_seed
    )
    skf = kfold_test.split(np.arange(len(x)), y_outcome)
    for fold_test in range(train_fold):

        train_and_val_idx, test_idx = next(skf)
        print("====== Test Fold {} ======".format(fold_test + 1))
        sss = StratifiedShuffleSplit(
            n_splits=1,
            test_size=1 / (num_folds - 1),
            random_state=cfg.dataset_split_seed,
        )

        sub_x = x[train_and_val_idx]
        sub_x_lab_length = x_lab_length[train_and_val_idx]
        sub_y = y[train_and_val_idx]
        sub_y_los = sub_y[:, :, 1]
        sub_y_outcome = sub_y[:, 0, 0]

        train_idx, val_idx = next(
            sss.split(np.arange(len(train_and_val_idx)), sub_y_outcome)
        )

        x_train, y_train, len_list_train = flatten_dataset(
            sub_x, sub_y, train_idx, sub_x_lab_length, case="outcome"
        )
        x_val, y_val, len_list_val = flatten_dataset(
            sub_x, sub_y, val_idx, sub_x_lab_length, case="outcome"
        )
        x_test, y_test, len_list_test = flatten_dataset(
            x, y, test_idx, x_lab_length, case="outcome"
        )
        all_history["test_fold_{}".format(fold_test + 1)] = {}
        history = {
            "val_accuracy": [],
            "val_auroc": [],
            "val_auprc": [],
            "val_early_prediction_score": [],
        }
        for seed in cfg.model_init_seed:
            init_random(seed)
            if cfg.train == True:
                model = train(x_train, y_train, method, cfg, seed)
                pd.to_pickle(
                    model, f"checkpoints/{cfg.name}_{fold_test + 1}_seed{seed}.pth"
                )
            if mode == "val":
                val_evaluation_scores = validate(x_val, y_val, len_list_val, model, cfg)
                history["val_accuracy"].append(val_evaluation_scores["acc"])
                history["val_auroc"].append(val_evaluation_scores["auroc"])
                history["val_auprc"].append(val_evaluation_scores["auprc"])
                history["val_early_prediction_score"].append(
                    val_evaluation_scores["early_prediction_score"]
                )
                print(
                    f"Performance on val set {fold_test+1}: \
                    ACC = {val_evaluation_scores['acc']}, \
                    AUROC = {val_evaluation_scores['auroc']}, \
                    AUPRC = {val_evaluation_scores['auprc']}, \
                    EarlyPredictionScore = {val_evaluation_scores['early_prediction_score']}"
                )
            elif mode == "test":
                model = pd.read_pickle(
                    f"checkpoints/{cfg.name}_{fold_test + 1}_seed{seed}.pth"
                )
                test_evaluation_scores = validate(
                    x_test, y_test, len_list_test, model, cfg
                )
                test_performance["test_accuracy"].append(test_evaluation_scores["acc"])
                test_performance["test_auroc"].append(test_evaluation_scores["auroc"])
                test_performance["test_auprc"].append(test_evaluation_scores["auprc"])
                test_performance["test_early_prediction_score"].append(
                    test_evaluation_scores["early_prediction_score"]
                )
                print(
                    f"Performance on test set {fold_test+1}: \
                    ACC = {test_evaluation_scores['acc']}, \
                    AUROC = {test_evaluation_scores['auroc']}, \
                    AUPRC = {test_evaluation_scores['auprc']}, \
                    EarlyPredictionScore = {test_evaluation_scores['early_prediction_score']}"
                )
        all_history["test_fold_{}".format(fold_test + 1)] = history
    if mode == "val":
        # Calculate average performance on 10-fold val set
        val_accuracy_list = []
        val_auroc_list = []
        val_auprc_list = []
        val_early_prediction_list = []
        for f in range(train_fold):
            val_accuracy_list.extend(all_history[f"test_fold_{f + 1}"]["val_accuracy"])
            val_auroc_list.extend(all_history[f"test_fold_{f + 1}"]["val_auroc"])
            val_auprc_list.extend(all_history[f"test_fold_{f + 1}"]["val_auprc"])
            val_early_prediction_list.extend(
                all_history[f"test_fold_{f + 1}"]["val_early_prediction_score"]
            )
        val_accuracy_list = np.array(val_accuracy_list)
        val_auroc_list = np.array(val_auroc_list)
        val_auprc_list = np.array(val_auprc_list)
        val_early_prediction_list = np.array(val_early_prediction_list)
        print("====================== VAL RESULT ======================")
        print(
            "ACC: {:.3f} ({:.3f})".format(
                val_accuracy_list.mean(), val_accuracy_list.std()
            )
        )
        print(
            "AUROC: {:.3f} ({:.3f})".format(val_auroc_list.mean(), val_auroc_list.std())
        )
        print(
            "AUPRC: {:.3f} ({:.3f})".format(val_auprc_list.mean(), val_auprc_list.std())
        )
        print(
            "EarlyPredictionScore:",
            (
                val_early_prediction_list.mean(axis=0),
                val_early_prediction_list.std(axis=0),
            ),
        )
        print("=========================================================")
        perflog.process_and_upload_performance(
            cfg,
            acc=val_accuracy_list,
            auroc=val_auroc_list,
            auprc=val_auprc_list,
            early_prediction_score=val_early_prediction_list,
            verbose=1,
            upload=cfg.db,
        )
    elif mode == "test":
        # Calculate average performance on 10-fold test set
        test_accuracy_list = np.array(test_performance["test_accuracy"])
        test_auroc_list = np.array(test_performance["test_auroc"])
        test_auprc_list = np.array(test_performance["test_auprc"])
        test_early_prediction_list = np.array(
            test_performance["test_early_prediction_score"]
        )
        print("====================== TEST RESULT ======================")
        print(
            "ACC: {:.3f} ({:.3f})".format(
                test_accuracy_list.mean(), test_accuracy_list.std()
            )
        )
        print(
            "AUROC: {:.3f} ({:.3f})".format(
                test_auroc_list.mean(), test_auroc_list.std()
            )
        )
        print(
            "AUPRC: {:.3f} ({:.3f})".format(
                test_auprc_list.mean(), test_auprc_list.std()
            )
        )
        print(
            "EarlyPredictionScore:",
            (
                test_early_prediction_list.mean(axis=0),
                test_early_prediction_list.std(axis=0),
            ),
        )
        for i in range(len(cfg.thresholds)):
            print(
                cfg.thresholds[i],
                test_early_prediction_list.mean(axis=0)[i],
                test_early_prediction_list.std(axis=0)[i],
            )

        print("=========================================================")
        perflog.process_and_upload_performance(
            cfg,
            acc=test_accuracy_list,
            auroc=test_auroc_list,
            auprc=test_auprc_list,
            early_prediction_score=test_early_prediction_list,
            verbose=1,
            upload=cfg.db,
        )