a b/slideflow/stats/stats_utils.py
1
from typing import Dict, Tuple
2
3
import numpy as np
4
from sklearn.cluster import KMeans
5
from sklearn.metrics import pairwise_distances_argmin_min
6
7
8
def calculate_centroid(
9
    act: Dict[str, np.ndarray]
10
) -> Tuple[Dict[str, int], Dict[str, np.ndarray]]:
11
    """Calcultes slide-level centroid indices for a provided activations dict.
12
13
    Args:
14
        activations (dict): Dict mapping slide names to ndarray of activations
15
            across tiles, of shape (n_tiles, n_features)
16
17
    Returns:
18
        A tuple containing
19
20
            dict: Dict mapping slides to index of tile nearest to centroid
21
22
            dict: Dict mapping slides to activations of tile nearest to centroid
23
    """
24
25
    optimal_indices = {}
26
    centroid_activations = {}
27
    for slide in act:
28
        if not len(act[slide]):
29
            continue
30
        km = KMeans(n_clusters=1, n_init=10).fit(act[slide])
31
        closest, _ = pairwise_distances_argmin_min(
32
            km.cluster_centers_,
33
            act[slide]
34
        )
35
        closest_index = closest[0]
36
        closest_activations = act[slide][closest_index]
37
        optimal_indices.update({slide: closest_index})
38
        centroid_activations.update({slide: closest_activations})
39
    return optimal_indices, centroid_activations
40
41
42
def get_centroid_index(arr: np.ndarray) -> int:
43
    """Calculate index nearest to centroid from a given 2D input array."""
44
    km = KMeans(n_clusters=1, n_init=10).fit(arr)
45
    closest, _ = pairwise_distances_argmin_min(km.cluster_centers_, arr)
46
    return closest[0]
47
48
49
def normalize_layout(
50
    layout: np.ndarray,
51
    min_percentile: int = 1,
52
    max_percentile: int = 99,
53
    relative_margin: float = 0.1
54
) -> Tuple[np.ndarray, Tuple[float, float], Tuple[float, float]]:
55
    """Removes outliers and scales layout to between [0,1].
56
57
    Args:
58
        layout (np.ndarray): 2D array containing data to be scaled.
59
        min_percentile (int, optional): Percentile for scaling. Defaults to 1.
60
        max_percentile (int, optional): Percentile for scaling. Defaults to 99.
61
        relative_margin (float, optional): Add an additional margin (fraction
62
            of total plot width). Defaults to 0.1.
63
64
    Returns:
65
        np.ndarray: layout array, re-scaled and clipped.
66
67
        tuple(float, float): Range in original space covered by this layout.
68
69
        tuple(float, float): Clipping values (min, max) used for this layout
70
    """
71
72
    # Compute percentiles
73
    mins = np.percentile(layout, min_percentile, axis=(0))
74
    maxs = np.percentile(layout, max_percentile, axis=(0))
75
    # Add margins
76
    mins -= relative_margin * (maxs - mins)
77
    maxs += relative_margin * (maxs - mins)
78
    # `clip` broadcasts, `[None]`s added only for readability
79
    clipped = np.clip(layout, mins, maxs)
80
    # embed within [0,1] along both axes
81
    _min = clipped.min(axis=0)
82
    _max = clipped.max(axis=0)
83
    clipped -= _min
84
    clipped /= (_max - _min)
85
    return clipped, (_min, _max), (mins, maxs)
86
87
def normalize(
88
    array: np.ndarray,
89
    norm_range: Tuple[np.ndarray, np.ndarray],
90
    norm_clip: Tuple[np.ndarray, np.ndarray],
91
) -> np.ndarray:
92
    """Normalize and clip an array."""
93
    _min, _max = norm_range
94
    mins, maxs = norm_clip
95
    clipped = np.clip(array, mins, maxs)
96
    clipped -= _min
97
    clipped /= (_max - _min)
98
    return clipped
99
100
def denormalize(
101
    array: np.ndarray,
102
    norm_range: Tuple[np.ndarray, np.ndarray],
103
) -> np.ndarray:
104
    """De-normalize an array."""
105
    _min, _max = norm_range
106
    transformed = array * (_max - _min)
107
    transformed += _min
108
    return transformed