Diff of /find_best_model.py [000000] .. [1928b6]

Switch to unified view

a b/find_best_model.py
1
import os
2
import numpy as np
3
import pandas as pd
4
import argparse
5
6
parser = argparse.ArgumentParser(description='PET lymphoma classification')
7
8
parser.add_argument('--dir', type=str, default='results', help='name of folder to find convergence files in (default "results")')
9
parser.add_argument('--metric', type=str, default='auc', help='which metric to compare models with (default "auc")')
10
args = parser.parse_args()
11
print(args)
12
13
DF = []
14
files = os.listdir(args.dir)
15
for f in files: 
16
    if (f.endswith('.csv')) & (f.startswith('convergence')):
17
        df = pd.read_csv(os.path.join(args.dir, f))
18
        df = df[(df.split=='validation')&(df.metric==args.metric)].sort_values(by='epoch')
19
        if len(df)<7:
20
            print('SHORT',args.dir,f,'(',len(df),')')
21
            # continue
22
        print(args.dir,f,len(df))
23
        # Get last epoch
24
        df = df.tail(1)
25
        df['split'] = int( f.split('_')[1].split('split')[1] )
26
        df['run'] = int( f.split('run')[1].split('.')[0] )
27
        DF.append(df)
28
29
# Best run in this folder
30
DF = pd.concat(DF).reset_index(drop=True) 
31
32
grouped    = DF.groupby(['split'])
33
maxidx     = grouped['value'].idxmax()
34
dfbest     = DF.loc[maxidx]
35
# Top run in order of split
36
dfbest = dfbest.sort_values(by='split',ascending=True)
37
dfbest.to_csv( os.path.join(args.dir,'best_run.csv'),index=False)
38
print('Best run of ',args.dir,'(',str(len(dfbest)),') :',dfbest.sort_values(by='split',ascending=False).run.values)