Data: Tabular Time Series Specialty: Endocrinology Laboratory: Blood Tests EHR: Demographics Diagnoses Medications Omics: Genomics Multi-omics Transcriptomics Wearable: Activity Clinical Purpose: Treatment Response Assessment Task: Biomarker Discovery
[c23b31]: / src / move / analysis / metrics.py

Download this file

102 lines (76 with data), 3.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
__all__ = ["calculate_accuracy", "calculate_cosine_similarity"]
import numpy as np
from move.core.typing import FloatArray
def calculate_accuracy(
original_input: FloatArray, reconstruction: FloatArray
) -> FloatArray:
"""Compute accuracy per sample.
Args:
original_input: Original labels (one-hot encoded as a 3D array).
reconstruction: Reconstructed labels (2D array).
Returns:
Array of accuracy scores.
"""
if original_input.ndim != 3:
raise ValueError("Expected original input to have three dimensions.")
if reconstruction.ndim != 2:
raise ValueError("Expected reconstruction to have two dimensions.")
if original_input[:, :, 0].shape != reconstruction.shape:
raise ValueError(
f"Original input {original_input.shape} and reconstruction "
f"{reconstruction.shape} shapes do not match."
)
is_nan = original_input.sum(axis=2) == 0
original_input = np.argmax(original_input, axis=2) # 3D => 2D
y_true = np.ma.masked_array(original_input, mask=is_nan)
y_pred = np.ma.masked_array(reconstruction, mask=is_nan)
num_features = np.ma.count(y_true, axis=1)
scores = np.sum(y_true == y_pred, axis=1) / num_features
return np.ma.filled(scores, 0)
def calculate_cosine_similarity(
original_input: FloatArray, reconstruction: FloatArray
) -> FloatArray:
"""Compute cosine similarity per sample.
Args:
original_input: Original values (2D array).
reconstruction: Reconstructed values (2D array).
Returns:
Array of similarities.
"""
if any((original_input.ndim != 2, reconstruction.ndim != 2)):
raise ValueError("Expected both inputs to have two dimensions.")
if original_input.shape != reconstruction.shape:
raise ValueError(
f"Original input {original_input.shape} and reconstruction "
f"{reconstruction.shape} shapes do not match."
)
is_nan = original_input == 0
x = np.ma.masked_array(original_input, mask=is_nan)
y = np.ma.masked_array(reconstruction, mask=is_nan)
# Equivalent to `np.diag(sklearn.metrics.pairwise.cosine_similarity(x, y))`
# But can handle masked arrays
scores = np.sum(x * y, axis=1) / (norm(x) * norm(y))
return np.ma.filled(scores, 0)
def norm(x: np.ma.MaskedArray, axis: int = 1) -> np.ma.MaskedArray:
"""Return Euclidean norm. This function is equivalent to `np.linalg.norm`,
but it can handle masked arrays.
Args:
x: 2D masked array
axis: Axis along which to the operation is performed. Defaults to 1.
Returns:
1D array with the specified axis removed.
"""
return np.sqrt(np.sum(x**2, axis=axis))
def get_2nd_order_polynomial(
x_array: FloatArray, y_array: FloatArray, n_points=100
) -> tuple[FloatArray, FloatArray, tuple[float, float, float]]:
"""
Given a set of x an y values, find the 2nd oder polynomial fitting best the data.
Returns:
x_pol: x coordinates for the polynomial function evaluation.
y_pol: y coordinates for the polynomial function evaluation.
"""
a2, a1, a = np.polyfit(x_array, y_array, deg=2)
x_pol = np.linspace(np.min(x_array), np.max(x_array), n_points)
y_pol = np.array([a2 * x * x + a1 * x + a for x in x_pol])
return x_pol, y_pol, (a2, a1, a)