Survival-Prediction / Git / Diff of /metrics.py

Models:
RichardZick/
Survival-Prediction
Downloads: 1
Diff of /metrics.py [000000] .. [785f18]
Switch to side-by-side view

--- a
+++ b/metrics.py
@@ -0,0 +1,76 @@
+import numpy as np
+import pandas as pd
+import warnings
+
+
+def cindex(y_true_times, predicted_times, tol=1e-8):
+    """
+    Author: Romuald Menuet & Rémy Dubois
+
+    Evaluate concordance index from Pandas DataFrame, taking ties into account.
+
+    Args:
+        y_true_times: pd.DataFrame
+            pd DataFrame with three columns: `PatientID`, `Event` and `SurvivalTime` the float-valued column of true survival times.
+        predicted_times: pd.DataFrame
+            pd DataFrame with three columns: `PatientID`, `SurvivalTime` the float-valued column of predicted survival times,
+            and one `Event`column, whose value does not matter. It must be appended so that target and predictions have the same format.
+        tol: float
+            small float value for numerical stability.
+    Returns:
+        Concordance index, as described here:
+        https://square.github.io/pysurvival/metrics/c_index.html
+    """
+
+    assert isinstance(y_true_times, pd.DataFrame), 'Y true times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
+    assert isinstance(predicted_times, pd.DataFrame), 'Predicted times should be pd dataframe with patient `PatientID` as index, and `Event` and `SurvivalTime` as columns'
+    assert len(y_true_times.shape) == 2, 'Y true times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
+    assert len(predicted_times.shape) == 2, 'Predicted times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
+    assert set(y_true_times.columns) == {'Event', 'SurvivalTime'}, 'Y true times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
+    assert set(predicted_times.columns) == {'Event', 'SurvivalTime'}, 'Predicted times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
+    np.testing.assert_equal(y_true_times.shape, predicted_times.shape, err_msg="Not same amount of predicted versus true samples")
+    assert set(y_true_times.index) == set(predicted_times.index), 'Not same patients in prediction versus ground truth'
+    assert np.all(predicted_times['SurvivalTime'] > 0), 'Predicted times should all be positive'
+
+    events = y_true_times.Event
+    y_true_times = y_true_times.SurvivalTime
+    predicted_times = predicted_times.SurvivalTime
+
+    # Just ordering the right way
+    predicted_times = predicted_times.loc[y_true_times.index]
+    events = events.loc[y_true_times.index]
+
+    events = events.values.astype(int)
+    y_true_times = y_true_times.values.astype(float)
+    predicted_times = predicted_times.values.astype(float)
+    # events = events.values.astype(bool)
+
+    np.testing.assert_array_less(1.,
+                                 predicted_times.astype(float),
+                                 err_msg="Predicted y_true_times all below 1.\
+                                 It should be in days. Make sure that you are not predicting risk instead of time.")
+
+    return _cindex_np(y_true_times, predicted_times, events)
+
+
+def _cindex_np(times, predicted_times, events, tol=1.e-8):
+    """
+    Raw CI computation from np arrray. Should not be used as is.
+    """
+    assert times.ndim == predicted_times.ndim == events.ndim == 1, "wrong input, should be vectors only"
+    assert times.shape[0] == predicted_times.shape[0] == events.shape[0], "wrong input, should be vectors of the same len"
+
+    risks = - predicted_times
+
+    risks_i = risks.reshape((-1, 1))
+    risks_j = risks.reshape((1, -1))
+    times_i = times.reshape((-1, 1))
+    times_j = times.reshape((1, -1))
+    events_i = events.reshape((-1, 1))
+
+    eligible_pairs = (times_i < times_j) * events_i
+
+    well_ordered = np.sum(eligible_pairs * (risks_i > risks_j))
+    ties = + np.sum(eligible_pairs * 0.5 * (risks_i == risks_j))
+
+    return (well_ordered + ties) / (eligible_pairs.sum() + tol)