--- a +++ b/tests/test_metrics.py @@ -0,0 +1,350 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + +from mmseg.core.evaluation import (eval_metrics, mean_dice, mean_fscore, + mean_iou) +from mmseg.core.evaluation.metrics import f_score + + +def get_confusion_matrix(pred_label, label, num_classes, ignore_index): + """Intersection over Union + Args: + pred_label (np.ndarray): 2D predict map + label (np.ndarray): label 2D label map + num_classes (int): number of categories + ignore_index (int): index ignore in evaluation + """ + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + n = num_classes + inds = n * label + pred_label + + mat = np.bincount(inds, minlength=n**2).reshape(n, n) + + return mat + + +# This func is deprecated since it's not memory efficient +def legacy_mean_iou(results, gt_seg_maps, num_classes, ignore_index): + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_mat = np.zeros((num_classes, num_classes), dtype=np.float) + for i in range(num_imgs): + mat = get_confusion_matrix( + results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index) + total_mat += mat + all_acc = np.diag(total_mat).sum() / total_mat.sum() + acc = np.diag(total_mat) / total_mat.sum(axis=1) + iou = np.diag(total_mat) / ( + total_mat.sum(axis=1) + total_mat.sum(axis=0) - np.diag(total_mat)) + + return all_acc, acc, iou + + +# This func is deprecated since it's not memory efficient +def legacy_mean_dice(results, gt_seg_maps, num_classes, ignore_index): + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_mat = np.zeros((num_classes, num_classes), dtype=np.float) + for i in range(num_imgs): + mat = get_confusion_matrix( + results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index) + total_mat += mat + all_acc = np.diag(total_mat).sum() / total_mat.sum() + acc = np.diag(total_mat) / total_mat.sum(axis=1) + dice = 2 * np.diag(total_mat) / ( + total_mat.sum(axis=1) + total_mat.sum(axis=0)) + + return all_acc, acc, dice + + +# This func is deprecated since it's not memory efficient +def legacy_mean_fscore(results, + gt_seg_maps, + num_classes, + ignore_index, + beta=1): + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_mat = np.zeros((num_classes, num_classes), dtype=np.float) + for i in range(num_imgs): + mat = get_confusion_matrix( + results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index) + total_mat += mat + all_acc = np.diag(total_mat).sum() / total_mat.sum() + recall = np.diag(total_mat) / total_mat.sum(axis=1) + precision = np.diag(total_mat) / total_mat.sum(axis=0) + fv = np.vectorize(f_score) + fscore = fv(precision, recall, beta=beta) + + return all_acc, recall, precision, fscore + + +def test_metrics(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + + # Test the availability of arg: ignore_index. + label[:, 2, 5:10] = ignore_index + + # Test the correctness of the implementation of mIoU calculation. + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index, metrics='mIoU') + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) + # Test the correctness of the implementation of mDice calculation. + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index, metrics='mDice') + all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(dice, dice_l) + # Test the correctness of the implementation of mDice calculation. + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index, metrics='mFscore') + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore( + results, label, num_classes, ignore_index) + assert all_acc == all_acc_l + assert np.allclose(recall, recall_l) + assert np.allclose(precision, precision_l) + assert np.allclose(fscore, fscore_l) + # Test the correctness of the implementation of joint calculation. + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index, + metrics=['mIoU', 'mDice', 'mFscore']) + all_acc, acc, iou, dice, precision, recall, fscore = ret_metrics[ + 'aAcc'], ret_metrics['Acc'], ret_metrics['IoU'], ret_metrics[ + 'Dice'], ret_metrics['Precision'], ret_metrics[ + 'Recall'], ret_metrics['Fscore'] + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) + assert np.allclose(dice, dice_l) + assert np.allclose(precision, precision_l) + assert np.allclose(recall, recall_l) + assert np.allclose(fscore, fscore_l) + + # Test the correctness of calculation when arg: num_classes is larger + # than the maximum value of input maps. + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics='mIoU', + nan_to_num=-1) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + assert acc[-1] == -1 + assert iou[-1] == -1 + + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics='mDice', + nan_to_num=-1) + all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + assert acc[-1] == -1 + assert dice[-1] == -1 + + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics='mFscore', + nan_to_num=-1) + all_acc, precision, recall, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Precision'], ret_metrics['Recall'], ret_metrics['Fscore'] + assert precision[-1] == -1 + assert recall[-1] == -1 + assert fscore[-1] == -1 + + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics=['mDice', 'mIoU', 'mFscore'], + nan_to_num=-1) + all_acc, acc, iou, dice, precision, recall, fscore = ret_metrics[ + 'aAcc'], ret_metrics['Acc'], ret_metrics['IoU'], ret_metrics[ + 'Dice'], ret_metrics['Precision'], ret_metrics[ + 'Recall'], ret_metrics['Fscore'] + assert acc[-1] == -1 + assert dice[-1] == -1 + assert iou[-1] == -1 + assert precision[-1] == -1 + assert recall[-1] == -1 + assert fscore[-1] == -1 + + # Test the bug which is caused by torch.histc. + # torch.histc: https://pytorch.org/docs/stable/generated/torch.histc.html + # When the arg:bins is set to be same as arg:max, + # some channels of mIoU may be nan. + results = np.array([np.repeat(31, 59)]) + label = np.array([np.arange(59)]) + num_classes = 59 + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index=255, metrics='mIoU') + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + assert not np.any(np.isnan(iou)) + + +def test_mean_iou(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + label[:, 2, 5:10] = ignore_index + ret_metrics = mean_iou(results, label, num_classes, ignore_index) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) + + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = mean_iou( + results, label, num_classes, ignore_index=255, nan_to_num=-1) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + assert acc[-1] == -1 + assert acc[-1] == -1 + + +def test_mean_dice(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + label[:, 2, 5:10] = ignore_index + ret_metrics = mean_dice(results, label, num_classes, ignore_index) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, dice_l) + + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = mean_dice( + results, label, num_classes, ignore_index=255, nan_to_num=-1) + all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + assert acc[-1] == -1 + assert dice[-1] == -1 + + +def test_mean_fscore(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + label[:, 2, 5:10] = ignore_index + ret_metrics = mean_fscore(results, label, num_classes, ignore_index) + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore( + results, label, num_classes, ignore_index) + assert all_acc == all_acc_l + assert np.allclose(recall, recall_l) + assert np.allclose(precision, precision_l) + assert np.allclose(fscore, fscore_l) + + ret_metrics = mean_fscore( + results, label, num_classes, ignore_index, beta=2) + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore( + results, label, num_classes, ignore_index, beta=2) + assert all_acc == all_acc_l + assert np.allclose(recall, recall_l) + assert np.allclose(precision, precision_l) + assert np.allclose(fscore, fscore_l) + + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = mean_fscore( + results, label, num_classes, ignore_index=255, nan_to_num=-1) + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + assert recall[-1] == -1 + assert precision[-1] == -1 + assert fscore[-1] == -1 + + +def test_filename_inputs(): + import cv2 + import tempfile + + def save_arr(input_arrays: list, title: str, is_image: bool, dir: str): + filenames = [] + SUFFIX = '.png' if is_image else '.npy' + for idx, arr in enumerate(input_arrays): + filename = '{}/{}-{}{}'.format(dir, title, idx, SUFFIX) + if is_image: + cv2.imwrite(filename, arr) + else: + np.save(filename, arr) + filenames.append(filename) + return filenames + + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + labels = np.random.randint(0, num_classes, size=pred_size) + labels[:, 2, 5:10] = ignore_index + + with tempfile.TemporaryDirectory() as temp_dir: + + result_files = save_arr(results, 'pred', False, temp_dir) + label_files = save_arr(labels, 'label', True, temp_dir) + + ret_metrics = eval_metrics( + result_files, + label_files, + num_classes, + ignore_index, + metrics='mIoU') + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics[ + 'Acc'], ret_metrics['IoU'] + all_acc_l, acc_l, iou_l = legacy_mean_iou(results, labels, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l)