Switch to side-by-side view

--- a
+++ b/app/apis/ml_outcome_pipeline.py
@@ -0,0 +1,295 @@
+import math
+import pathlib
+import pickle
+import random
+
+import numpy as np
+import pandas as pd
+import torch
+import xgboost as xgb
+from catboost import CatBoostClassifier, CatBoostRegressor
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import (
+    KFold,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    train_test_split,
+)
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+from app.core.evaluation import covid_metrics, eval_metrics
+from app.core.utils import init_random
+from app.datasets import get_dataset, load_data
+from app.datasets.dl import Dataset
+from app.datasets.ml import flatten_dataset, numpy_dataset
+from app.models import (
+    build_model_from_cfg,
+    get_multi_task_loss,
+    predict_all_visits_bce_loss,
+    predict_all_visits_mse_loss,
+)
+from app.utils import perflog
+
+
+def train(x, y, method, cfg, seed=42):
+    y = y[:, 0]
+
+    if method == "xgboost":
+        model = xgb.XGBClassifier(
+            objective="binary:logistic",
+            eval_metric="aucpr",
+            verbosity=0,
+            learning_rate=cfg.learning_rate,
+            max_depth=cfg.max_depth,
+            min_child_weight=cfg.min_child_weight,
+            n_estimators=1000,
+            use_label_encoder=False,
+            random_state=seed,
+        )
+        model.fit(x, y)
+    elif method == "gbdt":
+        method = GradientBoostingClassifier(
+            random_state=seed,
+            learning_rate=cfg.learning_rate,
+            n_estimators=cfg.n_estimators,
+            subsample=cfg.subsample,
+        )
+        model = method.fit(x, y)
+    elif method == "random_forest":
+        method = RandomForestClassifier(
+            random_state=seed,
+            max_depth=cfg.max_depth,
+            min_samples_split=cfg.min_samples_split,
+            n_estimators=cfg.n_estimators,
+        )
+        model = method.fit(x, y)
+    elif method == "decision_tree":
+        model = DecisionTreeClassifier(random_state=seed, max_depth=cfg.max_depth)
+        model.fit(x, y)
+    elif method == "catboost":
+        model = CatBoostClassifier(
+            random_seed=seed,
+            iterations=cfg.iterations,  # performance is better when iterations = 100
+            learning_rate=cfg.learning_rate,
+            depth=cfg.depth,
+            verbose=None,
+            silent=True,
+            allow_writing_files=False,
+            loss_function="CrossEntropy",
+        )
+        model.fit(x, y)
+    return model
+
+
+def validate(x, y, len_list, model, cfg):
+    """val/test"""
+    y_outcome_pred = model.predict(x)
+    y_outcome_true = y[:, 0]
+    evaluation_scores = eval_metrics.print_metrics_binary(
+        y_outcome_true, y_outcome_pred, verbose=0
+    )
+    early_prediction_score = covid_metrics.early_prediction_outcome_metric(
+        y, y_outcome_pred, len_list, cfg.thresholds, verbose=0
+    )
+    evaluation_scores["early_prediction_score"] = early_prediction_score
+    return evaluation_scores
+
+
+def start_pipeline(cfg):
+    dataset_type, mode, method, num_folds, train_fold = (
+        cfg.dataset,
+        cfg.mode,
+        cfg.model,
+        cfg.num_folds,
+        cfg.train_fold,
+    )
+    # Load data
+    x, y, x_lab_length = load_data(dataset_type)
+    x, y_outcome, y_los, x_lab_length = numpy_dataset(x, y, x_lab_length)
+
+    all_history = {}
+    test_performance = {
+        "test_accuracy": [],
+        "test_auroc": [],
+        "test_auprc": [],
+        "test_early_prediction_score": [],
+    }
+
+    kfold_test = StratifiedKFold(
+        n_splits=num_folds, shuffle=True, random_state=cfg.dataset_split_seed
+    )
+    skf = kfold_test.split(np.arange(len(x)), y_outcome)
+    for fold_test in range(train_fold):
+
+        train_and_val_idx, test_idx = next(skf)
+        print("====== Test Fold {} ======".format(fold_test + 1))
+        sss = StratifiedShuffleSplit(
+            n_splits=1,
+            test_size=1 / (num_folds - 1),
+            random_state=cfg.dataset_split_seed,
+        )
+
+        sub_x = x[train_and_val_idx]
+        sub_x_lab_length = x_lab_length[train_and_val_idx]
+        sub_y = y[train_and_val_idx]
+        sub_y_los = sub_y[:, :, 1]
+        sub_y_outcome = sub_y[:, 0, 0]
+
+        train_idx, val_idx = next(
+            sss.split(np.arange(len(train_and_val_idx)), sub_y_outcome)
+        )
+
+        x_train, y_train, len_list_train = flatten_dataset(
+            sub_x, sub_y, train_idx, sub_x_lab_length, case="outcome"
+        )
+        x_val, y_val, len_list_val = flatten_dataset(
+            sub_x, sub_y, val_idx, sub_x_lab_length, case="outcome"
+        )
+        x_test, y_test, len_list_test = flatten_dataset(
+            x, y, test_idx, x_lab_length, case="outcome"
+        )
+        all_history["test_fold_{}".format(fold_test + 1)] = {}
+        history = {
+            "val_accuracy": [],
+            "val_auroc": [],
+            "val_auprc": [],
+            "val_early_prediction_score": [],
+        }
+        for seed in cfg.model_init_seed:
+            init_random(seed)
+            if cfg.train == True:
+                model = train(x_train, y_train, method, cfg, seed)
+                pd.to_pickle(
+                    model, f"checkpoints/{cfg.name}_{fold_test + 1}_seed{seed}.pth"
+                )
+            if mode == "val":
+                val_evaluation_scores = validate(x_val, y_val, len_list_val, model, cfg)
+                history["val_accuracy"].append(val_evaluation_scores["acc"])
+                history["val_auroc"].append(val_evaluation_scores["auroc"])
+                history["val_auprc"].append(val_evaluation_scores["auprc"])
+                history["val_early_prediction_score"].append(
+                    val_evaluation_scores["early_prediction_score"]
+                )
+                print(
+                    f"Performance on val set {fold_test+1}: \
+                    ACC = {val_evaluation_scores['acc']}, \
+                    AUROC = {val_evaluation_scores['auroc']}, \
+                    AUPRC = {val_evaluation_scores['auprc']}, \
+                    EarlyPredictionScore = {val_evaluation_scores['early_prediction_score']}"
+                )
+            elif mode == "test":
+                model = pd.read_pickle(
+                    f"checkpoints/{cfg.name}_{fold_test + 1}_seed{seed}.pth"
+                )
+                test_evaluation_scores = validate(
+                    x_test, y_test, len_list_test, model, cfg
+                )
+                test_performance["test_accuracy"].append(test_evaluation_scores["acc"])
+                test_performance["test_auroc"].append(test_evaluation_scores["auroc"])
+                test_performance["test_auprc"].append(test_evaluation_scores["auprc"])
+                test_performance["test_early_prediction_score"].append(
+                    test_evaluation_scores["early_prediction_score"]
+                )
+                print(
+                    f"Performance on test set {fold_test+1}: \
+                    ACC = {test_evaluation_scores['acc']}, \
+                    AUROC = {test_evaluation_scores['auroc']}, \
+                    AUPRC = {test_evaluation_scores['auprc']}, \
+                    EarlyPredictionScore = {test_evaluation_scores['early_prediction_score']}"
+                )
+        all_history["test_fold_{}".format(fold_test + 1)] = history
+    if mode == "val":
+        # Calculate average performance on 10-fold val set
+        val_accuracy_list = []
+        val_auroc_list = []
+        val_auprc_list = []
+        val_early_prediction_list = []
+        for f in range(train_fold):
+            val_accuracy_list.extend(all_history[f"test_fold_{f + 1}"]["val_accuracy"])
+            val_auroc_list.extend(all_history[f"test_fold_{f + 1}"]["val_auroc"])
+            val_auprc_list.extend(all_history[f"test_fold_{f + 1}"]["val_auprc"])
+            val_early_prediction_list.extend(
+                all_history[f"test_fold_{f + 1}"]["val_early_prediction_score"]
+            )
+        val_accuracy_list = np.array(val_accuracy_list)
+        val_auroc_list = np.array(val_auroc_list)
+        val_auprc_list = np.array(val_auprc_list)
+        val_early_prediction_list = np.array(val_early_prediction_list)
+        print("====================== VAL RESULT ======================")
+        print(
+            "ACC: {:.3f} ({:.3f})".format(
+                val_accuracy_list.mean(), val_accuracy_list.std()
+            )
+        )
+        print(
+            "AUROC: {:.3f} ({:.3f})".format(val_auroc_list.mean(), val_auroc_list.std())
+        )
+        print(
+            "AUPRC: {:.3f} ({:.3f})".format(val_auprc_list.mean(), val_auprc_list.std())
+        )
+        print(
+            "EarlyPredictionScore:",
+            (
+                val_early_prediction_list.mean(axis=0),
+                val_early_prediction_list.std(axis=0),
+            ),
+        )
+        print("=========================================================")
+        perflog.process_and_upload_performance(
+            cfg,
+            acc=val_accuracy_list,
+            auroc=val_auroc_list,
+            auprc=val_auprc_list,
+            early_prediction_score=val_early_prediction_list,
+            verbose=1,
+            upload=cfg.db,
+        )
+    elif mode == "test":
+        # Calculate average performance on 10-fold test set
+        test_accuracy_list = np.array(test_performance["test_accuracy"])
+        test_auroc_list = np.array(test_performance["test_auroc"])
+        test_auprc_list = np.array(test_performance["test_auprc"])
+        test_early_prediction_list = np.array(
+            test_performance["test_early_prediction_score"]
+        )
+        print("====================== TEST RESULT ======================")
+        print(
+            "ACC: {:.3f} ({:.3f})".format(
+                test_accuracy_list.mean(), test_accuracy_list.std()
+            )
+        )
+        print(
+            "AUROC: {:.3f} ({:.3f})".format(
+                test_auroc_list.mean(), test_auroc_list.std()
+            )
+        )
+        print(
+            "AUPRC: {:.3f} ({:.3f})".format(
+                test_auprc_list.mean(), test_auprc_list.std()
+            )
+        )
+        print(
+            "EarlyPredictionScore:",
+            (
+                test_early_prediction_list.mean(axis=0),
+                test_early_prediction_list.std(axis=0),
+            ),
+        )
+        for i in range(len(cfg.thresholds)):
+            print(
+                cfg.thresholds[i],
+                test_early_prediction_list.mean(axis=0)[i],
+                test_early_prediction_list.std(axis=0)[i],
+            )
+
+        print("=========================================================")
+        perflog.process_and_upload_performance(
+            cfg,
+            acc=test_accuracy_list,
+            auroc=test_auroc_list,
+            auprc=test_auprc_list,
+            early_prediction_score=test_early_prediction_list,
+            verbose=1,
+            upload=cfg.db,
+        )