Diff of /SynthSeg/evaluate.py [000000] .. [e571d1]

Switch to side-by-side view

--- a
+++ b/SynthSeg/evaluate.py
@@ -0,0 +1,370 @@
+"""
+If you use this code, please cite one of the SynthSeg papers:
+https://github.com/BBillot/SynthSeg/blob/master/bibtex.bib
+
+Copyright 2020 Benjamin Billot
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
+compliance with the License. You may obtain a copy of the License at
+https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software distributed under the License is
+distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied. See the License for the specific language governing permissions and limitations under the
+License.
+"""
+
+
+# python imports
+import os
+import numpy as np
+from scipy.stats import wilcoxon
+from scipy.ndimage.morphology import distance_transform_edt
+
+# third-party imports
+from ext.lab2im import utils
+from ext.lab2im import edit_volumes
+
+
+def fast_dice(x, y, labels):
+    """Fast implementation of Dice scores.
+    :param x: input label map
+    :param y: input label map of the same size as x
+    :param labels: numpy array of labels to evaluate on
+    :return: numpy array with Dice scores in the same order as labels.
+    """
+
+    assert x.shape == y.shape, 'both inputs should have same size, had {} and {}'.format(x.shape, y.shape)
+
+    if len(labels) > 1:
+        # sort labels
+        labels_sorted = np.sort(labels)
+
+        # build bins for histograms
+        label_edges = np.sort(np.concatenate([labels_sorted - 0.1, labels_sorted + 0.1]))
+        label_edges = np.insert(label_edges, [0, len(label_edges)], [labels_sorted[0] - 0.1, labels_sorted[-1] + 0.1])
+
+        # compute Dice and re-arrange scores in initial order
+        hst = np.histogram2d(x.flatten(), y.flatten(), bins=label_edges)[0]
+        idx = np.arange(start=1, stop=2 * len(labels_sorted), step=2)
+        dice_score = 2 * np.diag(hst)[idx] / (np.sum(hst, 0)[idx] + np.sum(hst, 1)[idx] + 1e-5)
+        dice_score = dice_score[np.searchsorted(labels_sorted, labels)]
+
+    else:
+        dice_score = dice(x == labels[0], y == labels[0])
+
+    return dice_score
+
+
+def dice(x, y):
+    """Implementation of dice scores for 0/1 numpy array"""
+    return 2 * np.sum(x * y) / (np.sum(x) + np.sum(y))
+
+
+def surface_distances(x, y, hausdorff_percentile=None, return_coordinate_max_distance=False):
+    """Computes the maximum boundary distance (Hausdorff distance), and the average boundary distance of two masks.
+    :param x: numpy array (boolean or 0/1)
+    :param y: numpy array (boolean or 0/1)
+    :param hausdorff_percentile: (optional) percentile (from 0 to 100) for which to compute the Hausdorff distance.
+    Set this to 100 to compute the real Hausdorff distance (default). Can also be a list, where HD will be computed for
+    the provided values.
+    :param return_coordinate_max_distance: (optional) when set to true, the function will return the coordinates of the
+    voxel with the highest distance (only if hausdorff_percentile=100).
+    :return: max_dist, mean_dist(, coordinate_max_distance)
+    max_dist: scalar with HD computed for the given percentile (or list if hausdorff_percentile was given as a list).
+    mean_dist: scalar with average surface distance
+    coordinate_max_distance: only returned return_coordinate_max_distance is True."""
+
+    assert x.shape == y.shape, 'both inputs should have same size, had {} and {}'.format(x.shape, y.shape)
+    n_dims = len(x.shape)
+
+    hausdorff_percentile = 100 if hausdorff_percentile is None else hausdorff_percentile
+    hausdorff_percentile = utils.reformat_to_list(hausdorff_percentile)
+
+    # crop x and y around ROI
+    _, crop_x = edit_volumes.crop_volume_around_region(x)
+    _, crop_y = edit_volumes.crop_volume_around_region(y)
+
+    # set distances to maximum volume shape if they are not defined
+    if (crop_x is None) | (crop_y is None):
+        return max(x.shape), max(x.shape)
+
+    crop = np.concatenate([np.minimum(crop_x, crop_y)[:n_dims], np.maximum(crop_x, crop_y)[n_dims:]])
+    x = edit_volumes.crop_volume_with_idx(x, crop)
+    y = edit_volumes.crop_volume_with_idx(y, crop)
+
+    # detect edge
+    x_dist_int = distance_transform_edt(x * 1)
+    x_edge = (x_dist_int == 1) * 1
+    y_dist_int = distance_transform_edt(y * 1)
+    y_edge = (y_dist_int == 1) * 1
+
+    # calculate distance from edge
+    x_dist = distance_transform_edt(np.logical_not(x_edge))
+    y_dist = distance_transform_edt(np.logical_not(y_edge))
+
+    # find distances from the 2 surfaces
+    x_dists_to_y = y_dist[x_edge == 1]
+    y_dists_to_x = x_dist[y_edge == 1]
+
+    max_dist = list()
+    coordinate_max_distance = None
+    for hd_percentile in hausdorff_percentile:
+
+        # find max distance from the 2 surfaces
+        if hd_percentile == 100:
+            max_dist.append(np.max(np.concatenate([x_dists_to_y, y_dists_to_x])))
+
+            if return_coordinate_max_distance:
+                indices_x_surface = np.where(x_edge == 1)
+                idx_max_distance_x = np.where(x_dists_to_y == max_dist)[0]
+                if idx_max_distance_x.size != 0:
+                    coordinate_max_distance = np.stack(indices_x_surface).transpose()[idx_max_distance_x]
+                else:
+                    indices_y_surface = np.where(y_edge == 1)
+                    idx_max_distance_y = np.where(y_dists_to_x == max_dist)[0]
+                    coordinate_max_distance = np.stack(indices_y_surface).transpose()[idx_max_distance_y]
+
+        # find percentile of max distance
+        else:
+            max_dist.append(np.percentile(np.concatenate([x_dists_to_y, y_dists_to_x]), hd_percentile))
+
+    # find average distance between 2 surfaces
+    if x_dists_to_y.shape[0] > 0:
+        x_mean_dist_to_y = np.mean(x_dists_to_y)
+    else:
+        x_mean_dist_to_y = max(x.shape)
+    if y_dists_to_x.shape[0] > 0:
+        y_mean_dist_to_x = np.mean(y_dists_to_x)
+    else:
+        y_mean_dist_to_x = max(x.shape)
+    mean_dist = (x_mean_dist_to_y + y_mean_dist_to_x) / 2
+
+    # convert max dist back to scalar if HD only computed for 1 percentile
+    if len(max_dist) == 1:
+        max_dist = max_dist[0]
+
+    # return coordinate of max distance if necessary
+    if coordinate_max_distance is not None:
+        return max_dist, mean_dist, coordinate_max_distance
+    else:
+        return max_dist, mean_dist
+
+
+def compute_non_parametric_paired_test(dice_ref, dice_compare, eval_indices=None, alternative='two-sided'):
+    """Compute non-parametric paired t-tests between two sets of Dice scores.
+    :param dice_ref: numpy array with Dice scores, rows represent structures, and columns represent subjects.
+    Taken as reference for one-sided tests.
+    :param dice_compare: numpy array of the same format as dice_ref.
+    :param eval_indices: (optional) list or 1d array indicating the row indices of structures to run the tests for.
+    Default is None, for which p-values are computed for all rows.
+    :param alternative: (optional) The alternative hypothesis to be tested, can be 'two-sided', 'greater', 'less'.
+    :return: 1d numpy array, with p-values for all tests on evaluated structures, as well as an additional test for
+    average scores (last value of the array). The average score is computed only on the evaluation structures.
+    """
+
+    # take all rows if indices not specified
+    if eval_indices is None:
+        if len(dice_ref.shape) > 1:
+            eval_indices = np.arange(dice_ref.shape[0])
+        else:
+            eval_indices = []
+
+    # loop over all evaluation label values
+    pvalues = list()
+    if len(eval_indices) > 1:
+        for idx in eval_indices:
+
+            x = dice_ref[idx, :]
+            y = dice_compare[idx, :]
+            _, p = wilcoxon(x, y, alternative=alternative)
+            pvalues.append(p)
+
+        # average score
+        x = np.mean(dice_ref[eval_indices, :], axis=0)
+        y = np.mean(dice_compare[eval_indices, :], axis=0)
+        _, p = wilcoxon(x, y, alternative=alternative)
+        pvalues.append(p)
+
+    else:
+        # average score
+        _, p = wilcoxon(dice_ref, dice_compare, alternative=alternative)
+        pvalues.append(p)
+
+    return np.array(pvalues)
+
+
+def cohens_d(volumes_x, volumes_y):
+
+    means_x = np.mean(volumes_x, axis=0)
+    means_y = np.mean(volumes_y, axis=0)
+
+    var_x = np.var(volumes_x, axis=0)
+    var_y = np.var(volumes_y, axis=0)
+
+    n_x = np.shape(volumes_x)[0]
+    n_y = np.shape(volumes_y)[0]
+
+    std = np.sqrt(((n_x-1)*var_x + (n_y-1)*var_y) / (n_x + n_y - 2))
+    cohensd = (means_x - means_y) / std
+
+    return cohensd
+
+
+def evaluation(gt_dir,
+               seg_dir,
+               label_list,
+               mask_dir=None,
+               compute_score_whole_structure=False,
+               path_dice=None,
+               path_hausdorff=None,
+               path_hausdorff_99=None,
+               path_hausdorff_95=None,
+               path_mean_distance=None,
+               crop_margin_around_gt=10,
+               list_incorrect_labels=None,
+               list_correct_labels=None,
+               use_nearest_label=False,
+               recompute=True,
+               verbose=True):
+    """This function computes Dice scores, as well as surface distances, between two sets of labels maps in gt_dir
+    (ground truth) and seg_dir (typically predictions). Label maps in both folders are matched by sorting order.
+    The resulting scores are saved at the specified locations.
+    :param gt_dir: path of directory with gt label maps
+    :param seg_dir: path of directory with label maps to compare to gt_dir. Matched to gt label maps by sorting order.
+    :param label_list: list of label values for which to compute evaluation metrics. Can be a sequence, a 1d numpy
+    array, or the path to such array.
+    :param mask_dir: (optional) path of directory with masks of areas to ignore for each evaluated segmentation.
+    Matched to gt label maps by sorting order. Default is None, where nothing is masked.
+    :param compute_score_whole_structure: (optional) whether to also compute the selected scores for the whole segmented
+    structure (i.e. scores are computed for a single structure obtained by regrouping all non-zero values). If True, the
+    resulting scores are added as an extra row to the result matrices. Default is False.
+    :param path_dice: path where the resulting Dice will be writen as numpy array.
+    Default is None, where the array is not saved.
+    :param path_hausdorff: path where the resulting Hausdorff distances will be writen as numpy array (only if
+    compute_distances is True). Default is None, where the array is not saved.
+    :param path_hausdorff_99: same as for path_hausdorff but for the 99th percentile of the boundary distance.
+    :param path_hausdorff_95: same as for path_hausdorff but for the 95th percentile of the boundary distance.
+    :param path_mean_distance: path where the resulting mean distances will be writen as numpy array (only if
+    compute_distances is True). Default is None, where the array is not saved.
+    :param crop_margin_around_gt: (optional) margin by which to crop around the gt volumes, in order to compute the
+    scores more efficiently. If 0, no cropping is performed.
+    :param list_incorrect_labels: (optional) this option enables to replace some label values in the maps in seg_dir by
+    other label values. Can be a list, a 1d numpy array, or the path to such an array.
+    The incorrect labels can then be replaced either by specified values, or by the nearest value (see below).
+    :param list_correct_labels: (optional) list of values to correct the labels specified in list_incorrect_labels.
+    Correct values must have the same order as their corresponding value in list_incorrect_labels.
+    :param use_nearest_label: (optional) whether to correct the incorrect label values with the nearest labels.
+    :param recompute: (optional) whether to recompute the already existing results. Default is True.
+    :param verbose: (optional) whether to print out info about the remaining number of cases.
+    """
+
+    # check whether to recompute
+    compute_dice = not os.path.isfile(path_dice) if (path_dice is not None) else True
+    compute_hausdorff = not os.path.isfile(path_hausdorff) if (path_hausdorff is not None) else False
+    compute_hausdorff_99 = not os.path.isfile(path_hausdorff_99) if (path_hausdorff_99 is not None) else False
+    compute_hausdorff_95 = not os.path.isfile(path_hausdorff_95) if (path_hausdorff_95 is not None) else False
+    compute_mean_dist = not os.path.isfile(path_mean_distance) if (path_mean_distance is not None) else False
+    compute_hd = [compute_hausdorff, compute_hausdorff_99, compute_hausdorff_95]
+
+    if compute_dice | any(compute_hd) | compute_mean_dist | recompute:
+
+        # get list label maps to compare
+        path_gt_labels = utils.list_images_in_folder(gt_dir)
+        path_segs = utils.list_images_in_folder(seg_dir)
+        path_gt_labels = utils.reformat_to_list(path_gt_labels, length=len(path_segs))
+        if len(path_gt_labels) != len(path_segs):
+            print('gt and segmentation folders must have the same amount of label maps.')
+        if mask_dir is not None:
+            path_masks = utils.list_images_in_folder(mask_dir)
+            if len(path_masks) != len(path_segs):
+                print('not the same amount of masks and segmentations.')
+        else:
+            path_masks = [None] * len(path_segs)
+
+        # load labels list
+        label_list, _ = utils.get_list_labels(label_list=label_list, labels_dir=gt_dir)
+        n_labels = len(label_list)
+        max_label = np.max(label_list) + 1
+
+        # initialise result matrices
+        if compute_score_whole_structure:
+            max_dists = np.zeros((n_labels + 1, len(path_segs), 3))
+            mean_dists = np.zeros((n_labels + 1, len(path_segs)))
+            dice_coefs = np.zeros((n_labels + 1, len(path_segs)))
+        else:
+            max_dists = np.zeros((n_labels, len(path_segs), 3))
+            mean_dists = np.zeros((n_labels, len(path_segs)))
+            dice_coefs = np.zeros((n_labels, len(path_segs)))
+
+        # loop over segmentations
+        loop_info = utils.LoopInfo(len(path_segs), 10, 'evaluating', print_time=True)
+        for idx, (path_gt, path_seg, path_mask) in enumerate(zip(path_gt_labels, path_segs, path_masks)):
+            if verbose:
+                loop_info.update(idx)
+
+            # load gt labels and segmentation
+            gt_labels = utils.load_volume(path_gt, dtype='int', aff_ref=np.eye(4))
+            seg = utils.load_volume(path_seg, dtype='int', aff_ref=np.eye(4))
+            if path_mask is not None:
+                mask = utils.load_volume(path_mask, dtype='bool', aff_ref=np.eye(4))
+                gt_labels[mask] = max_label
+                seg[mask] = max_label
+
+            # crop images
+            if crop_margin_around_gt > 0:
+                gt_labels, cropping = edit_volumes.crop_volume_around_region(gt_labels, margin=crop_margin_around_gt)
+                seg = edit_volumes.crop_volume_with_idx(seg, cropping)
+
+            if list_incorrect_labels is not None:
+                seg = edit_volumes.correct_label_map(seg, list_incorrect_labels, list_correct_labels, use_nearest_label)
+
+            # compute Dice scores
+            dice_coefs[:n_labels, idx] = fast_dice(gt_labels, seg, label_list)
+
+            # compute Dice scores for whole structures
+            if compute_score_whole_structure:
+                temp_gt = (gt_labels > 0) * 1
+                temp_seg = (seg > 0) * 1
+                dice_coefs[-1, idx] = dice(temp_gt, temp_seg)
+            else:
+                temp_gt = temp_seg = None
+
+            # compute average and Hausdorff distances
+            if any(compute_hd) | compute_mean_dist:
+
+                # compute unique label values
+                unique_gt_labels = np.unique(gt_labels)
+                unique_seg_labels = np.unique(seg)
+
+                # compute max/mean surface distances for all labels
+                for index, label in enumerate(label_list):
+                    if (label in unique_gt_labels) & (label in unique_seg_labels):
+                        mask_gt = np.where(gt_labels == label, True, False)
+                        mask_seg = np.where(seg == label, True, False)
+                        tmp_max_dists, mean_dists[index, idx] = surface_distances(mask_gt, mask_seg, [100, 99, 95])
+                        max_dists[index, idx, :] = np.array(tmp_max_dists)
+                    else:
+                        mean_dists[index, idx] = max(gt_labels.shape)
+                        max_dists[index, idx, :] = np.array([max(gt_labels.shape)] * 3)
+
+                # compute max/mean distances for whole structure
+                if compute_score_whole_structure:
+                    tmp_max_dists, mean_dists[-1, idx] = surface_distances(temp_gt, temp_seg, [100, 99, 95])
+                    max_dists[-1, idx, :] = np.array(tmp_max_dists)
+
+        # write results
+        if path_dice is not None:
+            utils.mkdir(os.path.dirname(path_dice))
+            np.save(path_dice, dice_coefs)
+        if path_hausdorff is not None:
+            utils.mkdir(os.path.dirname(path_hausdorff))
+            np.save(path_hausdorff, max_dists[..., 0])
+        if path_hausdorff_99 is not None:
+            utils.mkdir(os.path.dirname(path_hausdorff_99))
+            np.save(path_hausdorff_99, max_dists[..., 1])
+        if path_hausdorff_95 is not None:
+            utils.mkdir(os.path.dirname(path_hausdorff_95))
+            np.save(path_hausdorff_95, max_dists[..., 2])
+        if path_mean_distance is not None:
+            utils.mkdir(os.path.dirname(path_mean_distance))
+            np.save(path_mean_distance, mean_dists)