covid_blood_analysis / Git / Diff of /utils/analysis

Downloads: 1

Diff of /utils/analysis_utils.py [000000] .. [d2dd59]

 b/utils/analysis_utils.py
+def impute_with_median (df):
+    """Iterate through columns of Pandas DataFrame.
+    Where NaNs exist replace with median"""
+    # Get list of DataFrame column names
+    cols = list(df)
+    # Loop through columns
+    for column in cols:
+        # Transfer column to independent series
+        col_data = df[column]
+        # Look to see if there is any missing numerical data
+        missing_data = sum(col_data.isna())
+        if missing_data > 0:
+            # Get median and replace missing numerical data with median
+            col_median = col_data.median()
+            col_data.fillna(col_median, inplace=True)
+            df[column] = col_data
+    return df
+def plot_roc_curve(fpr, tpr, label=None):
+    """Plot the ROC curve from False Positive Rate
+    and True Positive Rate"""
+    plt.plot(fpr, tpr, linewidth=2, label=label)
+    plt.plot([0, 1], [0, 1], 'k--')
+    plt.axis([0, 1, 0, 1])
+    plt.xlabel('False Positive Rate', fontsize=16)
+    plt.ylabel('True Positive Rate', fontsize=16)
+    plt.grid(True)