Diff of /utils/metrics.py [000000] .. [3f1788]

Switch to unified view

a b/utils/metrics.py
1
#!/usr/bin/env python
2
3
import numpy as np
4
import pandas as pd
5
pd.set_option('display.max_colwidth', -1)
6
7
from functools import partial
8
from typing import List
9
from sklearn.metrics import confusion_matrix, roc_auc_score
10
from scipy import stats
11
12
def _mean_confidence_interval(data, conf=0.95, decimal=3):
13
  assert(conf > 0 and conf < 1), f"Confidence interval must be within (0, 1). It is {conf}"
14
  a = 1.0 * np.array(data)
15
  n = len(a)
16
  m, se = np.mean(a), stats.sem(a)
17
  h = se * stats.t.ppf((1 + conf) / 2., n-1)
18
  return np.round(m, decimal), np.round(m-h, decimal), np.round(m+h, decimal)
19
20
class BinaryAvgMetrics(object):
21
  def __init__(self, targets: List[int], predictions: List[int], probs: List[float], decimal=3) -> None:
22
    assert (len(targets) == len(predictions) == len(probs)), f"Target list (length = {len(targets)}), predictions list (length = {len(predictions)}) and probabilities list (length = {len(probs)}) must all be of the same length!))"
23
    self.targs = targets
24
    self.n_runs = len(self.targs)
25
    self.preds = predictions
26
    self.probs = probs
27
    self.decimal = 3
28
    
29
    self.cms = np.zeros((len(self.targs), 2, 2), dtype=np.int64)
30
31
    for i, (targ, pred) in enumerate(zip(self.targs, self.preds)):
32
      self.cms[i] = confusion_matrix(targ, pred)  
33
34
  @property
35
  def tns(self):
36
    return self.cms[:, 0, 0]
37
  
38
  @property
39
  def fps(self):
40
    return self.cms[:, 0, 1]
41
  
42
  @property
43
  def fns(self):
44
    return self.cms[:, 1, 0]
45
  
46
  @property
47
  def tps(self):
48
    return self.cms[:, 1, 1]
49
  
50
  @property
51
  def cm_avg(self):
52
    return np.ceil(np.array([[self.tns.mean(), self.fps.mean()], [self.fns.mean(), self.tps.mean()]])).astype(np.int64)
53
  
54
  @property
55
  def prevalence_avg(self):
56
    return np.round(((self.fns + self.tps) / (self.tns + self.fps + self.fns + self.tps)).mean(), self.decimal)
57
58
  def sensitivities(self):
59
    return self.tps / (self.tps + self.fns)
60
  
61
  def sensitivity_avg(self, conf=None):
62
    se = (self.tps / (self.tps + self.fns))
63
    if conf is not None:
64
      return _mean_confidence_interval(se, conf)
65
66
    return np.round(se.mean(), self.decimal,)
67
68
  def specificities(self):
69
    return self.tns / (self.tns + self.fps)
70
  
71
  def specificity_avg(self, conf=None):
72
    sp = (self.tns / (self.tns + self.fps))
73
    if conf is not None:
74
      return _mean_confidence_interval(sp, conf)
75
76
    return np.round(sp.mean(), self.decimal)
77
78
  def ppvs(self):
79
    return self.tps / (self.tps + self.fps)
80
  
81
  def ppv_avg(self, conf=None):
82
    ppv = (self.tps / (self.tps + self.fps))
83
    if conf is not None:
84
      return _mean_confidence_interval(ppv, conf)
85
86
    return np.round(ppv.mean(), self.decimal)  
87
88
  def npvs(self):
89
    return self.tns / (self.tns + self.fns)
90
  
91
  def npv_avg(self, conf=None):
92
    npv = (self.tns / (self.tns + self.fns))
93
    if conf is not None:
94
      return _mean_confidence_interval(npv, conf)
95
96
    return np.round(npv.mean(), self.decimal)
97
  
98
  def f1s(self):
99
    return (2 * self.sensitivities() * self.ppvs()) / (self.sensitivities() + self.ppvs())
100
101
  def f1_avg(self, conf=None):
102
    se = (self.tps / (self.tps + self.fns))
103
    ppv = (self.tps / (self.tps + self.fps))
104
    f1 = (2 * se * ppv) / (se + ppv)
105
    if conf is not None:
106
      return _mean_confidence_interval(f1, conf)
107
108
    return np.round(f1.mean(), self.decimal)
109
110
  def aurocs(self):
111
    return np.array([roc_auc_score(targ, prob) for targ, prob in zip(self.targs, self.probs)])
112
113
  def auroc_avg(self, conf=None):
114
    auroc = np.array([roc_auc_score(targ, prob) for targ, prob in zip(self.targs, self.probs)])
115
    if conf is not None:
116
      return _mean_confidence_interval(auroc, conf)
117
118
    return np.round(auroc.mean(), self.decimal)
119
120
  def get_avg_metrics(self, conf=None, defn=False):
121
    definitions = {
122
      'sensitivity': "When it's ACTUALLY YES, how often does it PREDICT YES?",
123
      'specificity': "When it's ACTUALLY NO, how often does it PREDICT NO?",
124
      'ppv': "When it PREDICTS YES, how often is it correct?",
125
      'auroc': "Indicates how well the model is capable of distinguishing between classes",
126
      'npv': "When it PREDICTS NO, how often is it correct?",
127
      'f1': "Harmonic mean of sensitivity and ppv",
128
    }
129
    if conf is None:
130
      metrics = {
131
        'sensitivity': [self.sensitivity_avg() * 100],
132
        'specificity': [self.specificity_avg() * 100],
133
        'ppv': [self.ppv_avg() * 100],
134
        'auroc': [self.auroc_avg() * 100],
135
        'npv': [self.npv_avg() * 100],
136
        'f1': [self.f1_avg() * 100],
137
      }
138
139
      if defn:
140
        for metric, value in metrics.items():
141
          value.append(definitions[metric])
142
        d = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Value', 'Definition'])
143
      else:
144
        d = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Value'])
145
146
      return d
147
148
    else:
149
      metrics = {
150
        'sensitivity': [*[value * 100 for value in self.sensitivity_avg(conf)]],        
151
        'specificity': [*[value * 100 for value in self.specificity_avg(conf)]],
152
        'ppv': [*[value * 100 for value in self.ppv_avg(conf)]],
153
        'auroc': [*[value * 100 for value in self.auroc_avg(conf)]],   
154
        'npv': [*[value * 100 for value in self.npv_avg(conf)]],
155
        'f1': [*[value * 100 for value in self.f1_avg(conf)]],        
156
      }
157
158
      if defn:
159
        for metric, value in metrics.items():
160
          value.append(definitions[metric])
161
        d = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Mean', 'Lower', 'Upper', 'Definition'])
162
      else:
163
        d = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Mean', 'Lower', 'Upper'])
164
165
      return d
166
  
167
  def __repr__(self):
168
    s = f"Number of Runs: {self.n_runs}\n"
169
    return s
170
  
171
  def __len__(self):
172
    return len(self.targs)
173
174
def get_best_model(bam: BinaryAvgMetrics, fnames: List[str]):
175
  best_se, best_se_model = 0, None
176
  best_sp, best_sp_model = 0, None
177
  best_ppv, best_ppv_model = 0, None
178
  best_auroc, best_auroc_model = 0, None
179
  best_npv, best_npv_model = 0, None
180
  best_f1, best_f1_model = 0, None
181
182
  for i in range(bam.n_runs):
183
    se = bam.tps[i] / (bam.tps[i] + bam.fns[i])
184
    sp = bam.tns[i] / (bam.tns[i] + bam.fps[i])
185
    ppv = bam.tps[i] / (bam.tps[i] + bam.fps[i])
186
    npv = bam.tns[i] / (bam.tns[i] + bam.fns[i])
187
    f1 = (2 * se * ppv) / (se + ppv)
188
189
    if best_se < se:
190
      best_se = se
191
      best_se_model = fnames[i]    
192
    if best_sp < sp:
193
      best_sp = sp
194
      best_sp_model = fnames[i]          
195
    if best_ppv < ppv:
196
      best_ppv = ppv
197
      best_ppv_model = fnames[i]    
198
    if best_npv < npv:
199
      best_npv = npv
200
      best_npv_model = fnames[i]  
201
    if best_f1 < f1:
202
      best_f1 = f1
203
      best_f1_model = fnames[i]    
204
205
  for i, (targ, prob) in enumerate(zip(bam.targs, bam.probs)):
206
    auroc = roc_auc_score(targ, prob)
207
    if best_auroc < auroc:
208
      best_auroc = auroc
209
      best_auroc_model = fnames[i]
210
211
  d = {
212
    'sensitivity': [best_se, best_se_model],
213
    'specificity': [best_sp, best_sp_model],
214
    'ppv': [best_ppv, best_ppv_model],
215
    'auroc': [best_auroc, best_auroc_model],
216
    'npv': [best_npv, best_npv_model],
217
    'f1': [best_f1, best_f1_model],
218
  }
219
220
  return pd.DataFrame(d.values(), index=d.keys(), columns=['Value', 'Model File'])