[d129b2]: / medicalbert / evaluator / standard_evaluator.py

Download this file

120 lines (87 with data), 4.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import logging
import os
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score, average_precision_score
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
## Built in test evaluation
class StandardEvaluator:
def __init__(self, results_path, config, datareader, best_model_selector):
self.datareader = datareader
self.result_dir = results_path # Path to the results directory
self.config = config
self.model_selector = best_model_selector
# This method will run a classifier against a train and validation set
def go(self, classifier, classifier_name):
train_path = os.path.join(self.result_dir, classifier_name, "train")
valid_path = os.path.join(self.result_dir, classifier_name, "validation")
self.run(classifier, self.datareader.get_train(), train_path)
results = self.run(classifier, self.datareader.get_validation(), valid_path)
self.model_selector.update(results, classifier, classifier_name)
def test(self):
logging.info("Running Test evaluation")
classifier = self.model_selector.get_classifier()
name = self.model_selector.get_checkpoint()
valid_score = self.model_selector.get_score()
test_result_dir = "test" + "_" + name + "_" + str(valid_score)
self.run(classifier, self.datareader.get_test(), test_result_dir)
# functions for formatting the output into a human readible format.
@staticmethod
def make_output_dataframe(logits, labels):
first_logit = pd.Series(logits[:, 0])
second_logit = pd.Series(logits[:, 1])
labels = labels
frame = {'0': first_logit, '1': second_logit, 'label': labels}
return pd.DataFrame(frame)
# Collect all the outputs into suitable data structures
@staticmethod
def summarise(all_logits, all_labels):
loss_fct = CrossEntropyLoss()
loss = loss_fct(torch.from_numpy(all_logits), torch.from_numpy(all_labels)).item()
roc = roc_auc_score(all_labels, all_logits[:, 1])
precision = average_precision_score(all_labels, all_logits[:, 1])
accuracy = accuracy_score(all_labels, np.argmax(all_logits, axis=1))
# Create a Pandas dataframe from the summary dictionary.
summary = {"ROC": roc, "AVP": precision, "ACCURACY": accuracy, "LOSS": loss}
return pd.DataFrame([summary])
@staticmethod
def condense_output(all_logits, all_labels):
summary = StandardEvaluator.summarise(all_logits, all_labels)
output = StandardEvaluator.make_output_dataframe(all_logits, all_labels)
return summary, output
def save(self, summary, df, path):
# if we are using a local filesystem we'll need to create the dirs, otherwise we dont.
if path[:2] != "gs":
if not os.path.exists(path):
os.makedirs(path)
# save the summary file.
summary.to_csv(os.path.join(path, 'summary.csv'))
# save the model output
df.to_csv(os.path.join(path, "output.csv"))
def run(self, classifier, data, output_dir):
logging.info("Running Evaluations")
# Put the classifier in training mode.
device = torch.device(self.config['device'])
classifier.set_eval_mode()
classifier.model.to(device)
all_logits = None
all_labels = None
for step, batch in enumerate(tqdm(data, desc="evaluating")):
batch = tuple(t.to(device) for t in batch)
input_ids, input_mask, segment_ids, label_ids = batch
with torch.no_grad():
out = classifier.model(input_ids, labels=label_ids)
logits = out[1]
logits = logits.detach().cpu().numpy()
labels = label_ids.detach().cpu().numpy()
if all_logits is not None:
all_labels = np.concatenate([all_labels, labels])
all_logits = np.concatenate([all_logits, logits])
else:
all_labels = labels
all_logits = logits
summary, output = StandardEvaluator.condense_output(all_logits, all_labels)
self.save(summary, output, output_dir)
return summary