Diff of /utils/analysis_utils.py [000000] .. [d2dd59]

Switch to unified view

a b/utils/analysis_utils.py
1
def impute_with_median (df):
2
    """Iterate through columns of Pandas DataFrame.
3
    Where NaNs exist replace with median"""
4
    
5
    # Get list of DataFrame column names
6
    cols = list(df)
7
    # Loop through columns
8
    for column in cols:
9
        # Transfer column to independent series
10
        col_data = df[column]
11
        # Look to see if there is any missing numerical data
12
        missing_data = sum(col_data.isna())
13
        if missing_data > 0:
14
            # Get median and replace missing numerical data with median
15
            col_median = col_data.median()
16
            col_data.fillna(col_median, inplace=True)
17
            df[column] = col_data
18
    return df
19
20
21
def plot_roc_curve(fpr, tpr, label=None):
22
    """Plot the ROC curve from False Positive Rate 
23
    and True Positive Rate"""
24
    
25
    plt.plot(fpr, tpr, linewidth=2, label=label)
26
    plt.plot([0, 1], [0, 1], 'k--') 
27
    plt.axis([0, 1, 0, 1])                       
28
    plt.xlabel('False Positive Rate', fontsize=16)
29
    plt.ylabel('True Positive Rate', fontsize=16)
30
    plt.grid(True)