Switch to side-by-side view

--- a
+++ b/slideflow/stats/stats_utils.py
@@ -0,0 +1,108 @@
+from typing import Dict, Tuple
+
+import numpy as np
+from sklearn.cluster import KMeans
+from sklearn.metrics import pairwise_distances_argmin_min
+
+
+def calculate_centroid(
+    act: Dict[str, np.ndarray]
+) -> Tuple[Dict[str, int], Dict[str, np.ndarray]]:
+    """Calcultes slide-level centroid indices for a provided activations dict.
+
+    Args:
+        activations (dict): Dict mapping slide names to ndarray of activations
+            across tiles, of shape (n_tiles, n_features)
+
+    Returns:
+        A tuple containing
+
+            dict: Dict mapping slides to index of tile nearest to centroid
+
+            dict: Dict mapping slides to activations of tile nearest to centroid
+    """
+
+    optimal_indices = {}
+    centroid_activations = {}
+    for slide in act:
+        if not len(act[slide]):
+            continue
+        km = KMeans(n_clusters=1, n_init=10).fit(act[slide])
+        closest, _ = pairwise_distances_argmin_min(
+            km.cluster_centers_,
+            act[slide]
+        )
+        closest_index = closest[0]
+        closest_activations = act[slide][closest_index]
+        optimal_indices.update({slide: closest_index})
+        centroid_activations.update({slide: closest_activations})
+    return optimal_indices, centroid_activations
+
+
+def get_centroid_index(arr: np.ndarray) -> int:
+    """Calculate index nearest to centroid from a given 2D input array."""
+    km = KMeans(n_clusters=1, n_init=10).fit(arr)
+    closest, _ = pairwise_distances_argmin_min(km.cluster_centers_, arr)
+    return closest[0]
+
+
+def normalize_layout(
+    layout: np.ndarray,
+    min_percentile: int = 1,
+    max_percentile: int = 99,
+    relative_margin: float = 0.1
+) -> Tuple[np.ndarray, Tuple[float, float], Tuple[float, float]]:
+    """Removes outliers and scales layout to between [0,1].
+
+    Args:
+        layout (np.ndarray): 2D array containing data to be scaled.
+        min_percentile (int, optional): Percentile for scaling. Defaults to 1.
+        max_percentile (int, optional): Percentile for scaling. Defaults to 99.
+        relative_margin (float, optional): Add an additional margin (fraction
+            of total plot width). Defaults to 0.1.
+
+    Returns:
+        np.ndarray: layout array, re-scaled and clipped.
+
+        tuple(float, float): Range in original space covered by this layout.
+
+        tuple(float, float): Clipping values (min, max) used for this layout
+    """
+
+    # Compute percentiles
+    mins = np.percentile(layout, min_percentile, axis=(0))
+    maxs = np.percentile(layout, max_percentile, axis=(0))
+    # Add margins
+    mins -= relative_margin * (maxs - mins)
+    maxs += relative_margin * (maxs - mins)
+    # `clip` broadcasts, `[None]`s added only for readability
+    clipped = np.clip(layout, mins, maxs)
+    # embed within [0,1] along both axes
+    _min = clipped.min(axis=0)
+    _max = clipped.max(axis=0)
+    clipped -= _min
+    clipped /= (_max - _min)
+    return clipped, (_min, _max), (mins, maxs)
+
+def normalize(
+    array: np.ndarray,
+    norm_range: Tuple[np.ndarray, np.ndarray],
+    norm_clip: Tuple[np.ndarray, np.ndarray],
+) -> np.ndarray:
+    """Normalize and clip an array."""
+    _min, _max = norm_range
+    mins, maxs = norm_clip
+    clipped = np.clip(array, mins, maxs)
+    clipped -= _min
+    clipped /= (_max - _min)
+    return clipped
+
+def denormalize(
+    array: np.ndarray,
+    norm_range: Tuple[np.ndarray, np.ndarray],
+) -> np.ndarray:
+    """De-normalize an array."""
+    _min, _max = norm_range
+    transformed = array * (_max - _min)
+    transformed += _min
+    return transformed