4Dsurvival / Git / Diff of /demo/scripts/bootstrap

Models:
tobiasharvey/
4Dsurvival
Downloads: 1
Diff of /demo/scripts/bootstrap_cox.py [000000] .. [2cc208]
Switch to side-by-side view

--- a
+++ b/demo/scripts/bootstrap_cox.py
@@ -0,0 +1,136 @@
+"""
+@author: gbello
+"""
+import shutil
+import pickle
+from lifelines.utils import concordance_index
+import numpy as np
+from pathlib import Path
+from argparse import ArgumentParser
+
+from survival4D.cox_reg import train_cox_reg, hypersearch_cox
+from survival4D.paths import DATA_DIR
+from survival4D.config import CoxExperimentConfig, HypersearchConfig
+
+DEFAULT_CONF_PATH = Path(__file__).parent.joinpath("default_cox.conf")
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        "-c", "--conf-path", dest="conf_path", type=str, default=None, help="Conf path."
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    if args.conf_path is None:
+        conf_path = DEFAULT_CONF_PATH
+    else:
+        conf_path = Path(args.conf_path)
+    exp_config = CoxExperimentConfig.from_conf(conf_path)
+    exp_config.output_dir.mkdir(parents=True, exist_ok=True)
+
+    hypersearch_config = HypersearchConfig.from_conf(conf_path)
+    shutil.copy(str(conf_path), str(exp_config.output_dir.joinpath("cox.conf")))
+    args = parse_args()
+    if args.data_dir is None:
+        data_dir = DATA_DIR
+    else:
+        data_dir = Path(args.data_dir)
+    # import input data: i_full=list of patient IDs, y_full=censoring status and survival times for patients,
+    # x_full=input data for patients (i.e. volumetric measures of RV function)
+    with open(str(data_dir.joinpath(args.file_name)), 'rb') as f:
+        c3 = pickle.load(f)
+    x_full = c3[0]
+    y_full = c3[1]
+    del c3
+
+    # Initialize lists to store predictions
+    preds_bootfull = []
+    inds_inbag = []
+    Cb_opts = []
+
+    # STEP 1
+    # (1a) find optimal hyperparameters
+    opars, osummary = hypersearch_cox(
+        x_data=x_full, y_data=y_full, method='particle swarm', nfolds=exp_config.n_folds, nevals=exp_config.n_evals,
+        penalty_range=hypersearch_config.penalty_exp
+    )
+
+    # (1b) using optimal hyperparameters, train a model on full sample
+    omod = train_cox_reg(xtr=x_full, ytr=y_full, penalty=10 ** opars['penalty'])
+
+    # (1c) Compute Harrell's Concordance index
+    predfull = omod.predict_partial_hazard(x_full)
+    C_app = concordance_index(y_full[:, 1], -predfull, y_full[:, 0])
+
+    print('\n\n==================================================')
+    print('Apparent concordance index = {0:.4f}'.format(C_app))
+    print('==================================================\n\n')
+
+    # BOOTSTRAP SAMPLING
+
+    # define useful variables
+    nsmp = len(x_full)
+    rowids = [_ for _ in range(nsmp)]
+    B = exp_config.n_bootstraps
+
+    for b in range(B):
+        print('\n-------------------------------------')
+        print('Current bootstrap sample:', b, 'of', B-1)
+        print('-------------------------------------')
+
+        # STEP 2: Generate a bootstrap sample by doing n random selections with replacement (where n is the sample size)
+        b_inds = np.random.choice(rowids, size=nsmp, replace=True)
+        xboot = x_full[b_inds]
+        yboot = y_full[b_inds]
+
+        # (2a) find optimal hyperparameters
+        bpars, bsummary = hypersearch_cox(
+            x_data=xboot, y_data=yboot, method='particle swarm', nfolds=exp_config.n_folds, nevals=exp_config.n_evals,
+        penalty_range=hypersearch_config.penalty_exp
+        )
+
+        # (2b) using optimal hyperparameters, train a model on bootstrap sample
+        bmod = train_cox_reg(xtr=xboot, ytr=yboot, penalty=10 ** bpars['penalty'])
+
+        # (2c[i])  Using bootstrap-trained model, compute predictions on bootstrap sample.
+        # Evaluate accuracy of predictions (Harrell's Concordance index)
+        predboot = bmod.predict_partial_hazard(xboot)
+        Cb_boot = concordance_index(yboot[:, 1], -predboot, yboot[:, 0])
+
+        # (2c[ii]) Using bootstrap-trained model, compute predictions on FULL sample.
+        # Evaluate accuracy of predictions (Harrell's Concordance index)
+        predbootfull = bmod.predict_partial_hazard(x_full)
+        Cb_full = concordance_index(y_full[:, 1], -predbootfull, y_full[:, 0])
+
+        # STEP 3: Compute optimism for bth bootstrap sample, as difference between results from 2c[i] and 2c[ii]
+        Cb_opt = Cb_boot - Cb_full
+
+        # store data on current bootstrap sample (predictions, C-indices)
+        preds_bootfull.append(predbootfull)
+        inds_inbag.append(b_inds)
+        Cb_opts.append(Cb_opt)
+
+        del bpars, bmod
+
+    # STEP 5
+    # Compute bootstrap-estimated optimism (mean of optimism estimates across the B bootstrap samples)
+    C_opt = np.mean(Cb_opts)
+
+    # Adjust apparent C using bootstrap-estimated optimism
+    C_adj = C_app - C_opt
+
+    # compute confidence intervals for optimism-adjusted C
+    C_opt_95confint = np.percentile([C_app - o for o in Cb_opts], q=[2.5, 97.5])
+
+    print('\n\n=========================SUMMARY=========================')
+    print('Apparent concordance index = {0:.4f}'.format(C_app))
+    print('Optimism bootstrap estimate = {0:.4f}'.format(C_opt))
+    print('Optimism-adjusted concordance index = {0:.4f}, and 95% CI = {1}'.format(C_adj, C_opt_95confint))
+
+
+if __name__ == '__main__':
+    main()