[157809]: / utils / analysis_utils.py

Download this file

32 lines (26 with data), 1.1 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def impute_with_median (df):
"""Iterate through columns of Pandas DataFrame.
Where NaNs exist replace with median"""
# Get list of DataFrame column names
cols = list(df)
# Loop through columns
for column in cols:
# Transfer column to independent series
col_data = df[column]
# Look to see if there is any missing numerical data
missing_data = sum(col_data.isna())
if missing_data > 0:
# Get median and replace missing numerical data with median
col_median = col_data.median()
col_data.fillna(col_median, inplace=True)
df[column] = col_data
return df
def plot_roc_curve(fpr, tpr, label=None):
"""Plot the ROC curve from False Positive Rate
and True Positive Rate"""
plt.plot(fpr, tpr, linewidth=2, label=label)
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.grid(True)