|
a |
|
b/find_best_model.py |
|
|
1 |
import os |
|
|
2 |
import numpy as np |
|
|
3 |
import pandas as pd |
|
|
4 |
import argparse |
|
|
5 |
|
|
|
6 |
parser = argparse.ArgumentParser(description='PET lymphoma classification') |
|
|
7 |
|
|
|
8 |
parser.add_argument('--dir', type=str, default='results', help='name of folder to find convergence files in (default "results")') |
|
|
9 |
parser.add_argument('--metric', type=str, default='auc', help='which metric to compare models with (default "auc")') |
|
|
10 |
args = parser.parse_args() |
|
|
11 |
print(args) |
|
|
12 |
|
|
|
13 |
DF = [] |
|
|
14 |
files = os.listdir(args.dir) |
|
|
15 |
for f in files: |
|
|
16 |
if (f.endswith('.csv')) & (f.startswith('convergence')): |
|
|
17 |
df = pd.read_csv(os.path.join(args.dir, f)) |
|
|
18 |
df = df[(df.split=='validation')&(df.metric==args.metric)].sort_values(by='epoch') |
|
|
19 |
if len(df)<7: |
|
|
20 |
print('SHORT',args.dir,f,'(',len(df),')') |
|
|
21 |
# continue |
|
|
22 |
print(args.dir,f,len(df)) |
|
|
23 |
# Get last epoch |
|
|
24 |
df = df.tail(1) |
|
|
25 |
df['split'] = int( f.split('_')[1].split('split')[1] ) |
|
|
26 |
df['run'] = int( f.split('run')[1].split('.')[0] ) |
|
|
27 |
DF.append(df) |
|
|
28 |
|
|
|
29 |
# Best run in this folder |
|
|
30 |
DF = pd.concat(DF).reset_index(drop=True) |
|
|
31 |
|
|
|
32 |
grouped = DF.groupby(['split']) |
|
|
33 |
maxidx = grouped['value'].idxmax() |
|
|
34 |
dfbest = DF.loc[maxidx] |
|
|
35 |
# Top run in order of split |
|
|
36 |
dfbest = dfbest.sort_values(by='split',ascending=True) |
|
|
37 |
dfbest.to_csv( os.path.join(args.dir,'best_run.csv'),index=False) |
|
|
38 |
print('Best run of ',args.dir,'(',str(len(dfbest)),') :',dfbest.sort_values(by='split',ascending=False).run.values) |