[c4ddf6]: / Methods_utils / Plotting scripts / plot_allAUROC_perModel.py

Download this file

131 lines (97 with data), 4.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 27 11:00:25 2024
@author: Asus
"""
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
def plotPerModel_colorByIteration_givenStage(allAUROCs, model_name, given_stage):
if given_stage == 1:
print("Skipping plotting for Stage 1.")
return
custom_strings = {
0: "No Feature Selection", #skip 1 because it's LASSO
2: "SHAP",
3: "rf feature selection",
4: "xgb feature selection",
5: "ridge feature selection",
6: "logistic feature selection",
# Add more custom strings for other stages if needed
}
if given_stage not in custom_strings:
print(f"Custom string not defined for stage {given_stage}.")
return
custom_str = custom_strings[given_stage]
print("Plotting the AUROC from all iterations for " + model_name + " model for stage: " + custom_str)
# Group data by iteration
filtered_data = allAUROCs[(allAUROCs['Model name'] == model_name) & (allAUROCs['Stage'] == given_stage)]
# Group data by iteration
grouped = filtered_data.groupby('Iteration')
plt.figure(figsize=(8, 6))
# Plot AUROC curves for all models from the specified stage for each iteration
for iteration, group in grouped:
for index, row in group.iterrows():
plt.plot(row['FPR'], row['TPR'], label=f"Data Split {iteration}, {model_name} (AUROC={row['AUROC']:.2f})")
# Plot baseline (AUROC=0.5)
plt.plot([0, 1], [0, 1], linestyle='--', label='Baseline (AUROC=0.5)', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'AUROC curves - {model_name} - Feature selection strategy {given_stage}')
plt.legend()
directory = "./plots_perModel_" + model_name + "/"
if not os.path.exists(directory):
os.makedirs(directory)
# Save the plot as an image
plt.savefig(directory + f"{model_name}_Stage_{given_stage}_allIterations.png")
plt.show()
def plotPerModel_colorByStage(allAUROCs, model_name):
print("Plotting the AUROC from all iterations, all stages for " + model_name + "coloring by Stage..." )
# Filter data by model name and given stage
filtered_data = allAUROCs[(allAUROCs['Model name'] == model_name)]
# Group data by iteration
grouped = filtered_data.groupby('Iteration')
plt.figure(figsize=(8, 6))
# Define a color map for different stages
colors = plt.cm.viridis(np.linspace(0, 1, len(grouped)))
# Plot AUROC curves for all models from the specified stage for each iteration
for i, (iteration, group) in enumerate(grouped):
for index, row in group.iterrows():
plt.plot(row['FPR'], row['TPR'], label=f"Data split {iteration}, {model_name} (AUROC={row['AUROC']:.2f})", color=colors[i])
# Plot baseline (AUROC=0.5)
plt.plot([0, 1], [0, 1], linestyle='--', label='Baseline (AUROC=0.5)', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'AUROC curves - {model_name}')
plt.legend()
plt.savefig("./plots_perModel_" + model_name + "/" + f"{model_name}_ColorByStage_allIterationsBIG.png")
plt.show()
# plots_perModel_rf
plot_csv_path = '../allAUROCs_pipeline_10_.csv'
data_plots = pd.read_csv(plot_csv_path, encoding='latin-1', sep='~')
allAUROCs = data_plots
print(data_plots)
# Convert string representations back to lists
allAUROCs['TPR'] = allAUROCs['TPR'].apply(lambda x: list(map(float, x.split(','))))
allAUROCs['FPR'] = allAUROCs['FPR'].apply(lambda x: list(map(float, x.split(','))))
# Iterate over each row in the DataFrame
for index, row in allAUROCs.iterrows():
# Plot the AUROC curve
plt.plot(row['FPR'], row['TPR'], label=f"{row['Iteration']}, Stage {row['Stage']}, {row['Model name']}")
# Add labels and legend
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('AUROC Curves')
plt.legend()
plt.savefig("All_omg.png")
# Show plot
plt.show()
# plotPerModel_colorByStage(allAUROCs, 'rf') # not intelligible
for stage_number in range (0,7):
plotPerModel_colorByIteration_givenStage(allAUROCs, 'rf', stage_number)
plotPerModel_colorByIteration_givenStage(allAUROCs, 'svm', stage_number)
plotPerModel_colorByIteration_givenStage(allAUROCs, 'xgb', stage_number)
plotPerModel_colorByIteration_givenStage(allAUROCs, 'ridge', stage_number)
plotPerModel_colorByIteration_givenStage(allAUROCs, 'logistic', stage_number)