|
a |
|
b/analysis/ml/metrics.py |
|
|
1 |
# -*- coding: utf-8 -*- |
|
|
2 |
|
|
|
3 |
"""This file is part of the TPOT library. |
|
|
4 |
|
|
|
5 |
TPOT was primarily developed at the University of Pennsylvania by: |
|
|
6 |
- Randal S. Olson (rso@randalolson.com) |
|
|
7 |
- Weixuan Fu (weixuanf@upenn.edu) |
|
|
8 |
- Daniel Angell (dpa34@drexel.edu) |
|
|
9 |
- and many more generous open source contributors |
|
|
10 |
|
|
|
11 |
TPOT is free software: you can redistribute it and/or modify |
|
|
12 |
it under the terms of the GNU Lesser General Public License as |
|
|
13 |
published by the Free Software Foundation, either version 3 of |
|
|
14 |
the License, or (at your option) any later version. |
|
|
15 |
|
|
|
16 |
TPOT is distributed in the hope that it will be useful, |
|
|
17 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
18 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
19 |
GNU Lesser General Public License for more details. |
|
|
20 |
|
|
|
21 |
You should have received a copy of the GNU Lesser General Public |
|
|
22 |
License along with TPOT. If not, see <http://www.gnu.org/licenses/>. |
|
|
23 |
|
|
|
24 |
""" |
|
|
25 |
|
|
|
26 |
import numpy as np |
|
|
27 |
from sklearn.metrics import make_scorer, SCORERS |
|
|
28 |
|
|
|
29 |
|
|
|
30 |
def balanced_accuracy(y_true, y_pred): |
|
|
31 |
"""Default scoring function: balanced accuracy. |
|
|
32 |
|
|
|
33 |
Balanced accuracy computes each class' accuracy on a per-class basis using a |
|
|
34 |
one-vs-rest encoding, then computes an unweighted average of the class accuracies. |
|
|
35 |
|
|
|
36 |
Parameters |
|
|
37 |
---------- |
|
|
38 |
y_true: numpy.ndarray {n_samples} |
|
|
39 |
True class labels |
|
|
40 |
y_pred: numpy.ndarray {n_samples} |
|
|
41 |
Predicted class labels by the estimator |
|
|
42 |
|
|
|
43 |
Returns |
|
|
44 |
------- |
|
|
45 |
fitness: float |
|
|
46 |
Returns a float value indicating the individual's balanced accuracy |
|
|
47 |
0.5 is as good as chance, and 1.0 is perfect predictive accuracy |
|
|
48 |
""" |
|
|
49 |
all_classes = list(set(np.append(y_true, y_pred))) |
|
|
50 |
all_class_accuracies = [] |
|
|
51 |
for this_class in all_classes: |
|
|
52 |
this_class_sensitivity = 0. |
|
|
53 |
this_class_specificity = 0. |
|
|
54 |
if sum(y_true == this_class) != 0: |
|
|
55 |
this_class_sensitivity = \ |
|
|
56 |
float(sum((y_pred == this_class) & (y_true == this_class))) /\ |
|
|
57 |
float(sum((y_true == this_class))) |
|
|
58 |
|
|
|
59 |
if sum(y_true != this_class) != 0: |
|
|
60 |
this_class_specificity = \ |
|
|
61 |
float(sum((y_pred != this_class) & (y_true != this_class))) /\ |
|
|
62 |
float(sum((y_true != this_class))) |
|
|
63 |
# print('class',this_class,'sensitivity:',this_class_sensitivity) |
|
|
64 |
# print('class',this_class,'specifity:',this_class_specificity) |
|
|
65 |
this_class_accuracy = (this_class_sensitivity + this_class_specificity) / 2. |
|
|
66 |
# print('class',this_class,'accuracy:',this_class_accuracy) |
|
|
67 |
all_class_accuracies.append(this_class_accuracy) |
|
|
68 |
|
|
|
69 |
return np.mean(all_class_accuracies) |
|
|
70 |
|
|
|
71 |
|
|
|
72 |
# SCORERS['balanced_accuracy'] = make_scorer(balanced_accuracy) |