[bca7a0]: / mura.py

Download this file

115 lines (97 with data), 4.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from __future__ import absolute_import, division, print_function
import re
import numpy as np
import pandas as pd
from sklearn.metrics import (accuracy_score, cohen_kappa_score, f1_score, precision_score, recall_score)
pd.set_option('display.max_rows', 20)
pd.set_option('precision', 4)
np.set_printoptions(precision=4)
class Mura(object):
"""`MURA <https://stanfordmlgroup.github.io/projects/mura/>`_ Dataset :
Towards Radiologist-Level Abnormality Detection in Musculoskeletal Radiographs.
"""
url = "https://cs.stanford.edu/group/mlgroup/mura-v1.0.zip"
filename = "mura-v1.0.zip"
md5_checksum = '4c36feddb7f5698c8bf291b912c438b1'
_patient_re = re.compile(r'patient(\d+)')
_study_re = re.compile(r'study(\d+)')
_image_re = re.compile(r'image(\d+)')
_study_type_re = re.compile(r'_(\w+)_patient')
def __init__(self, image_file_names, y_true, y_pred=None):
self.imgs = image_file_names
df_img = pd.Series(np.array(image_file_names), name='img')
self.y_true = y_true
df_true = pd.Series(np.array(y_true), name='y_true')
self.y_pred = y_pred
# number of unique classes
self.patient = []
self.study = []
self.study_type = []
self.image_num = []
self.encounter = []
for img in image_file_names:
self.patient.append(self._parse_patient(img))
self.study.append(self._parse_study(img))
self.image_num.append(self._parse_image(img))
self.study_type.append(self._parse_study_type(img))
self.encounter.append("{}_{}_{}".format(
self._parse_study_type(img),
self._parse_patient(img),
self._parse_study(img), ))
self.classes = np.unique(self.y_true)
df_patient = pd.Series(np.array(self.patient), name='patient')
df_study = pd.Series(np.array(self.study), name='study')
df_image_num = pd.Series(np.array(self.image_num), name='image_num')
df_study_type = pd.Series(np.array(self.study_type), name='study_type')
df_encounter = pd.Series(np.array(self.encounter), name='encounter')
self.data = pd.concat(
[
df_img,
df_encounter,
df_true,
df_patient,
df_patient,
df_study,
df_image_num,
df_study_type,
], axis=1)
if self.y_pred is not None:
self.y_pred_probability = self.y_pred.flatten()
self.y_pred = self.y_pred_probability.round().astype(int)
df_y_pred = pd.Series(self.y_pred, name='y_pred')
df_y_pred_probability = pd.Series(self.y_pred_probability, name='y_pred_probs')
self.data = pd.concat((self.data, df_y_pred, df_y_pred_probability), axis=1)
def __len__(self):
return len(self.imgs)
def _parse_patient(self, img_filename):
return int(self._patient_re.search(img_filename).group(1))
def _parse_study(self, img_filename):
return int(self._study_re.search(img_filename).group(1))
def _parse_image(self, img_filename):
return int(self._image_re.search(img_filename).group(1))
def _parse_study_type(self, img_filename):
return self._study_type_re.search(img_filename).group(1)
def metrics(self):
return "per image metrics:\n\taccuracy : {:.2f}\tf1 : {:.2f}\tprecision : {:.2f}\trecall : {:.2f}\tcohen_kappa : {:.2f}".format(
accuracy_score(self.y_true, self.y_pred),
f1_score(self.y_true, self.y_pred),
precision_score(self.y_true, self.y_pred),
recall_score(self.y_true, self.y_pred),
cohen_kappa_score(self.y_true, self.y_pred), )
def metrics_by_encounter(self):
y_pred = self.data.groupby(['encounter'])['y_pred_probs'].mean().round()
y_true = self.data.groupby(['encounter'])['y_true'].mean().round()
return "per encounter metrics:\n\taccuracy : {:.2f}\tf1 : {:.2f}\tprecision : {:.2f}\trecall : {:.2f}\tcohen_kappa : {:.2f}".format(
accuracy_score(y_true, y_pred),
f1_score(y_true, y_pred),
precision_score(y_true, y_pred),
recall_score(y_true, y_pred),
cohen_kappa_score(self.y_true, self.y_pred), )
# def metrics_by_study_type(self):
# y_pred = self.data.groupby(['study_type', 'encounter'])['y_pred_probs'].mean().round()
# y_true = self.data.groupby(['study_type', 'encounter'])['y_true'].mean().round()
# return "per study_type metrics:\n\taccuracy : {:.2f}\tf1 : {:.2f}\tprecision : {:.2f}\trecall : {:.2f}".format(
# accuracy_score(y_true, y_pred),
# f1_score(y_true, y_pred),
# precision_score(y_true, y_pred),
# recall_score(y_true, y_pred), )