Diff of /AICare-baselines/test.py [000000] .. [0f1df3]

Switch to unified view

a b/AICare-baselines/test.py
1
import os
2
3
import lightning as L
4
import pandas as pd
5
6
from configs.exp import hparams
7
from datasets.loader.datamodule import EhrDataModule
8
from datasets.loader.load_los_info import get_los_info
9
from pipelines import DlPipeline, MlPipeline
10
11
def get_latest_file(path):
12
    # Get list of all files in the directory
13
    files = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
14
    
15
    # Get the file with the latest modification time
16
    latest_file = max(files, key=os.path.getctime)
17
    
18
    return latest_file
19
20
def run_ml_experiment(config):
21
    los_config = get_los_info(f'datasets/{config["dataset"]}/processed/fold_{config["fold"]}')
22
    config.update({"los_info": los_config})
23
    # data
24
    dm = EhrDataModule(f'datasets/{config["dataset"]}/processed/fold_{config["fold"]}', batch_size=config["batch_size"])
25
    # train/val/test
26
    pipeline = MlPipeline(config)
27
    trainer = L.Trainer(accelerator="cpu", max_epochs=1, logger=False, num_sanity_val_steps=0)
28
    trainer.test(pipeline, dm)
29
    perf = pipeline.test_performance
30
    return perf
31
32
def run_dl_experiment(config):
33
    los_config = get_los_info(f'datasets/{config["dataset"]}/processed/fold_{config["fold"]}')
34
    config.update({"los_info": los_config})
35
36
    # data
37
    dm = EhrDataModule(f'datasets/{config["dataset"]}/processed/fold_{config["fold"]}', batch_size=config["batch_size"])
38
    # checkpoint
39
    # checkpoint_path = f'logs/train/{config["dataset"]}/{config["task"]}/{config["model"]}-fold{config["fold"]}-seed{config["seed"]}/checkpoints/best.ckpt'
40
41
    checkpoint_path = get_latest_file(f'logs/train/{config["dataset"]}/{config["task"]}/{config["model"]}-fold{config["fold"]}-seed{config["seed"]}/checkpoints')
42
43
    print("checkpoint_path: ", checkpoint_path)
44
45
    if "time_aware" in config and config["time_aware"] == True:
46
        checkpoint_path = f'logs/train/{config["dataset"]}/{config["task"]}/{config["model"]}-fold{config["fold"]}-seed{config["seed"]}-ta/checkpoints/best.ckpt'
47
48
    # train/val/test
49
    pipeline = DlPipeline(config)
50
    trainer = L.Trainer(accelerator="cpu", max_epochs=1, logger=False, num_sanity_val_steps=0)
51
    trainer.test(pipeline, dm, ckpt_path=checkpoint_path)
52
    perf = pipeline.test_performance
53
    return perf
54
55
if __name__ == "__main__":
56
    best_hparams = hparams # [TO-SPECIFY]
57
    performance_table = {'dataset':[], 'task': [], 'model': [], 'fold': [], 'seed': [], 'accuracy': [], 'auroc': [], 'auprc': [], 'f1': [], 'minpse': []}
58
    for i in range(0, len(best_hparams)):
59
    # for i in range(0, 1):
60
        config = best_hparams[i]
61
        print(f"Testing... {i}/{len(best_hparams)}")
62
        run_func = run_ml_experiment if config["model"] in ["RF", "DT", "GBDT", "XGBoost", "CatBoost", "LR", "LightGBM"] else run_dl_experiment
63
        seeds = [0] # [0,1,2,3,4]
64
        folds = ['nshot']
65
        for fold in folds:
66
            config["fold"] = fold
67
            for seed in seeds:
68
                config["seed"] = seed
69
                perf = run_func(config)
70
                print(f"{config}, Test Performance: {perf}")
71
72
                if "time_aware" in config and config["time_aware"] == True:
73
                    model_name = config['model']+"_ta"
74
                else:
75
                    model_name = config['model']
76
77
                performance_table['dataset'].append(config['dataset'])
78
                performance_table['task'].append(config['task'])
79
                performance_table['model'].append(model_name)
80
                performance_table['fold'].append(config['fold'])
81
                performance_table['seed'].append(config['seed'])
82
                if config['task'] == 'outcome':
83
                    performance_table['accuracy'].append(perf['accuracy'])
84
                    performance_table['auroc'].append(perf['auroc'])
85
                    performance_table['auprc'].append(perf['auprc'])
86
                    performance_table['f1'].append(perf['f1'])
87
                    performance_table['minpse'].append(perf['minpse'])
88
    pd.DataFrame(performance_table).to_csv('ijcai24_ml_baselines_20240108.csv', index=False) # [TO-SPECIFY]