def impute_with_median (df):
"""Iterate through columns of Pandas DataFrame.
Where NaNs exist replace with median"""
# Get list of DataFrame column names
cols = list(df)
# Loop through columns
for column in cols:
# Transfer column to independent series
col_data = df[column]
# Look to see if there is any missing numerical data
missing_data = sum(col_data.isna())
if missing_data > 0:
# Get median and replace missing numerical data with median
col_median = col_data.median()
col_data.fillna(col_median, inplace=True)
df[column] = col_data
return df
def plot_roc_curve(fpr, tpr, label=None):
"""Plot the ROC curve from False Positive Rate
and True Positive Rate"""
plt.plot(fpr, tpr, linewidth=2, label=label)
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.grid(True)