Data: Tabular Time Series Specialty: Endocrinology Laboratory: Blood Tests EHR: Demographics Diagnoses Medications Omics: Genomics Multi-omics Transcriptomics Wearable: Activity Clinical Purpose: Treatment Response Assessment Task: Biomarker Discovery

Switch to side-by-side view

--- a
+++ b/src/move/analysis/metrics.py
@@ -0,0 +1,101 @@
+__all__ = ["calculate_accuracy", "calculate_cosine_similarity"]
+
+import numpy as np
+
+from move.core.typing import FloatArray
+
+
+def calculate_accuracy(
+    original_input: FloatArray, reconstruction: FloatArray
+) -> FloatArray:
+    """Compute accuracy per sample.
+
+    Args:
+        original_input: Original labels (one-hot encoded as a 3D array).
+        reconstruction: Reconstructed labels (2D array).
+
+    Returns:
+        Array of accuracy scores.
+    """
+    if original_input.ndim != 3:
+        raise ValueError("Expected original input to have three dimensions.")
+    if reconstruction.ndim != 2:
+        raise ValueError("Expected reconstruction to have two dimensions.")
+    if original_input[:, :, 0].shape != reconstruction.shape:
+        raise ValueError(
+            f"Original input {original_input.shape} and reconstruction "
+            f"{reconstruction.shape} shapes do not match."
+        )
+
+    is_nan = original_input.sum(axis=2) == 0
+    original_input = np.argmax(original_input, axis=2)  # 3D => 2D
+    y_true = np.ma.masked_array(original_input, mask=is_nan)
+    y_pred = np.ma.masked_array(reconstruction, mask=is_nan)
+
+    num_features = np.ma.count(y_true, axis=1)
+    scores = np.sum(y_true == y_pred, axis=1) / num_features
+
+    return np.ma.filled(scores, 0)
+
+
+def calculate_cosine_similarity(
+    original_input: FloatArray, reconstruction: FloatArray
+) -> FloatArray:
+    """Compute cosine similarity per sample.
+
+    Args:
+        original_input: Original values (2D array).
+        reconstruction: Reconstructed values (2D array).
+
+    Returns:
+        Array of similarities.
+    """
+    if any((original_input.ndim != 2, reconstruction.ndim != 2)):
+        raise ValueError("Expected both inputs to have two dimensions.")
+    if original_input.shape != reconstruction.shape:
+        raise ValueError(
+            f"Original input {original_input.shape} and reconstruction "
+            f"{reconstruction.shape} shapes do not match."
+        )
+
+    is_nan = original_input == 0
+    x = np.ma.masked_array(original_input, mask=is_nan)
+    y = np.ma.masked_array(reconstruction, mask=is_nan)
+
+    # Equivalent to `np.diag(sklearn.metrics.pairwise.cosine_similarity(x, y))`
+    # But can handle masked arrays
+    scores = np.sum(x * y, axis=1) / (norm(x) * norm(y))
+
+    return np.ma.filled(scores, 0)
+
+
+def norm(x: np.ma.MaskedArray, axis: int = 1) -> np.ma.MaskedArray:
+    """Return Euclidean norm. This function is equivalent to `np.linalg.norm`,
+    but it can handle masked arrays.
+
+    Args:
+        x: 2D masked array
+        axis: Axis along which to the operation is performed. Defaults to 1.
+
+    Returns:
+        1D array with the specified axis removed.
+    """
+    return np.sqrt(np.sum(x**2, axis=axis))
+
+
+def get_2nd_order_polynomial(
+    x_array: FloatArray, y_array: FloatArray, n_points=100
+) -> tuple[FloatArray, FloatArray, tuple[float, float, float]]:
+    """
+    Given a set of x an y values, find the 2nd oder polynomial fitting best the data.
+
+    Returns:
+        x_pol: x coordinates for the polynomial function evaluation.
+        y_pol: y coordinates for the polynomial function evaluation.
+    """
+    a2, a1, a = np.polyfit(x_array, y_array, deg=2)
+
+    x_pol = np.linspace(np.min(x_array), np.max(x_array), n_points)
+    y_pol = np.array([a2 * x * x + a1 * x + a for x in x_pol])
+
+    return x_pol, y_pol, (a2, a1, a)