import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import pickle
from utils.formatting import SafeDict
from scipy.stats import wasserstein_distance
from scipy.stats import ttest_ind, pearsonr, mannwhitneyu, spearmanr
from models.segmentation_models import *
def training_plot(log_csv):
log_df = pd.read_csv(log_csv)
plt.title("Training Plot Sample")
plt.xlabel("Epochs")
plt.ylabel("Jaccard Loss")
plt.xlim((0, 300))
plt.ylim((0, 1))
plt.plot(log_df["epoch"], log_df["train_loss"], label="Training Loss")
plt.plot(log_df["epoch"], log_df["val_loss"], label="Validation Loss")
# plt.plot(log_df["epoch"], log_df["ood_iou"], label="Etis-LaribDB iou")
plt.legend()
plt.show()
def ood_correlations(log_csv):
log_df = pd.read_csv(log_csv)
plt.title("SIS-OOD correlation")
plt.xlabel("SIS")
plt.ylabel("Etis-LaribDB OOD performance")
plt.xlim((0, 1))
plt.ylim((0, 1))
plt.scatter(log_df["consistency"], log_df["ood_iou"], label="Consistency")
plt.scatter(log_df["iid_test_iou"], log_df["ood_iou"], label="IID IoU")
plt.legend()
plt.show()
def ood_v_epoch(log_csv):
log_df = pd.read_csv(log_csv)
plt.title("Training Plot Sample")
plt.xlabel("Epochs")
plt.ylabel("SIL")
plt.xlim((0, 500))
plt.ylim((0, 1))
plt.plot(log_df["epoch"], log_df["consistency"], label="consistency")
plt.plot(log_df["epoch"], log_df["ood_iou"], label="ood iou")
plt.legend()
plt.show()
def get_boxplots_for_models():
"""
box plot for comparing model performance. Considers d% reduced along datasets, split according to experiments
and models
:return:
"""
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
model_names = ["DeepLab", "FPN, Unet, InductiveNet, TriUnet"]
dataset = []
for fname in sorted(os.listdir("experiments/Data/pickles")):
if "0" in fname:
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
model = fname.split("_")[0]
if model == "InductiveNet":
model = "DD-DeepLabV3+"
data = pickle.load(file)
datasets, samples = data["ious"].shape
kvasir_ious = data["ious"][0]
mean_iid_iou = np.median(kvasir_ious)
print(mean_iid_iou)
if "maximum_consistency" in fname:
continue
for i in range(datasets):
if i == 0:
continue
for j in range(samples):
if data["ious"][i, j] < 0.25 or data["ious"][0][j] < 0.75:
print(f"{fname} with id {j} has iou {data['ious'][i, j]} and {data['ious'][0][j]} ")
continue
# dataset.append([dataset_names[i], model, data["ious"][i, j]])
dataset.append(
[dataset_names[i], model, 100 * (data["ious"][i, j] - mean_iid_iou) / mean_iid_iou])
dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "\u0394%IoU"])
print(dataset)
plt.ylim(0, -100)
sns.barplot(x="Dataset", y="\u0394%IoU", hue="Model", data=dataset)
plt.show()
def get_variances_for_models():
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
model_names = ["DeepLab", "FPN, Unet, InductiveNet, TriUnet"]
dataset = []
for fname in sorted(os.listdir("experiments/Data/pickles")):
if "maximum_consistency" in fname:
continue
if "0" in fname:
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
model = fname.split("_")[0]
if model == "InductiveNet":
model = "DD-DeepLabV3+"
data = pickle.load(file)
datasets, samples = data["ious"].shape
if "maximum_consistency" in fname:
continue
for i in range(datasets):
# if i == 0:
# continue
for j in range(samples):
if data["ious"][0][j] < 0.75:
print(fname, "-", j)
continue
if i == 3 and model == "InductiveNet":
print("inductivenet", data["ious"][i, j])
if i == 3 and model == "DeepLab":
print("DeepLab", data["ious"][i, j])
dataset.append([dataset_names[i], model, data["ious"][i, j]])
iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "Coefficient of Std.Dev"])
std_dataset = iou_dataset.groupby(["Model", "Dataset"]).std() / iou_dataset.groupby(["Model", "Dataset"]).mean()
std_dataset = std_dataset.reset_index()
print(std_dataset)
plt.ylim((0, 0.15))
sns.barplot(x="Dataset", y="Coefficient of Std.Dev", hue="Model", data=std_dataset)
plt.show()
def plot_parameters_sizes():
models = [DeepLab, FPN, InductiveNet, Unet, TriUnet]
model_names = ["DeepLab", "FPN", "InductiveNet", "Unet", "TriUnet"]
for model_name, model_c in zip(model_names, models):
model = model_c()
print(f"{model_name}: {sum(p.numel() for p in model.parameters(recurse=True))}")
def collate_ensemble_results_into_df(type="consistency"):
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
model_names = ["DeepLab", "FPN", "Unet", "InductiveNet", "TriUnet"]
dataset = []
for fname in sorted(os.listdir("experiments/Data/pickles")):
if "ensemble" not in fname:
continue
if "maximum_consistency" in fname or "last_epoch" in fname:
continue
if type != "all":
if type == "consistency" and ("augmentation" in fname or "vanilla" in fname):
continue
if type == "augmentation" and "augmentation" not in fname:
continue
if type == "vanilla" and "vanilla" not in fname:
continue
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
model = fname.split("-")[0]
# experiment = fname.split("-")[-1]
if "vanilla" in fname:
experiment = "No Augmentation"
elif "augmentation" in fname:
experiment = "Vanilla Augmentation"
else:
experiment = "Consistency Training"
data = pickle.load(file)
# print(file, data.keys())
datasets, samples = data["ious"].shape
if model == "InductiveNet":
model = "DD-DeepLabV3+"
for i in range(datasets):
for j in range(samples):
if data["ious"][0, j] < 0.75: # if bugged out; rare
continue
try:
dataset.append(
[dataset_names[i], model, j, experiment, data["ious"][i, j], data["constituents"][j]])
except KeyError:
continue
iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "ID", "Experiment", "IoU", "constituents"])
# print(iou_dataset)
iou_dataset.to_csv("ensemble_data.csv")
return iou_dataset
def collate_base_results_into_df():
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
model_names = ["DeepLab", "FPN", "Unet", "InductiveNet", "TriUnet"]
dataset = []
for fname in sorted(os.listdir("experiments/Data/pickles")):
if "ensemble" in fname:
# print(fname)
continue
if "maximum_consistency" in fname or "last_epoch" in fname:
# print(fname)
continue
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
model = fname.split("_")[0]
data = pickle.load(file)
datasets, samples = data["ious"].shape
if model == "InductiveNet":
model = "DD-DeepLabV3+"
experiment = "No Augmentation"
if "sil" in fname and "_G" not in fname:
experiment = "Consistency Training"
elif "_V" in fname:
experiment = "Vanilla Augmentation"
elif "_G" in fname:
experiment = "Inpainter Augmentation"
for i in range(datasets):
for j in range(samples):
if data["ious"][0, j] < 0.75:
continue
dataset.append([dataset_names[i], model, j, experiment, data["ious"][i, j], data["sis"][i, j]])
iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "ID", "Experiment", "IoU", "SIS"])
iou_dataset.to_csv("base_data.csv")
return iou_dataset
def plot_ensemble_performance():
df = collate_ensemble_results_into_df("augmentation")
print(df)
latex = df.groupby(["Model", "Dataset"])["IoU"].mean()
print(latex.reset_index())
print(latex.to_latex(float_format="%.3f"))
order = df.groupby(["Dataset", "Model"])["IoU"].mean().sort_values().index
sns.barplot(data=df, x="Dataset", y="IoU", hue="Model")
plt.show()
grouped_mean = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean()
# print(grouped_mean)
grouped_iid = np.abs(grouped_mean - grouped_mean["Kvasir-SEG"]) / grouped_mean["Kvasir-SEG"]
# print(grouped_iid)
nedf = collate_base_results_into_df()
ne_grouped_mean = nedf.groupby(["Dataset", "Model"])["IoU"].mean()
# print(ne_grouped_mean)
ne_grouped_iid = np.abs(ne_grouped_mean["Kvasir-SEG"] - ne_grouped_mean) / ne_grouped_mean["Kvasir-SEG"]
# print(ne_grouped_iid)
comparison = ne_grouped_iid - grouped_iid
comparison = comparison.reset_index()
sns.barplot(data=comparison, x="Dataset", y="IoU", hue="Model")
plt.show()
# plot delta vs variance
ne_grouped_coeff_std = nedf.groupby(["Dataset", "Model"])["IoU"].std() / ne_grouped_mean
ne_grouped_coeff_std = ne_grouped_coeff_std.reset_index()
ne_grouped_coeff_std = ne_grouped_coeff_std.rename(columns={"IoU": "Coeff. StD of IoUs"})
# print(ne_grouped_coeff_std.head(10))
sns.barplot(data=ne_grouped_coeff_std, x="Dataset", y="Coeff. StD of IoUs", hue="Model")
plt.show()
test = pd.merge(ne_grouped_coeff_std, comparison)
test = test.rename(columns={"IoU": "% Improvement over mean constituent IoU"})
test["% Improvement over mean constituent IoU"] *= 100
test = test.groupby(["Model", "ID"]).mean()
test = test.reset_index()
print("mean", np.mean(test))
print("max", np.max(test))
# print(test)
sns.lineplot(data=test, x="Coeff. StD of IoUs", y="% Improvement over mean constituent IoU", err_style="bars",
color="gray", linestyle='--')
test = test.groupby("Model").mean().reset_index()
sns.scatterplot(test["Coeff. StD of IoUs"], test["% Improvement over mean constituent IoU"], hue=test["Model"],
s=100, ci=99)
plt.show()
def plot_overall_ensemble_performance():
df = collate_ensemble_results_into_df("both")
grouped_mean = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean()
nedf = collate_base_results_into_df()
ne_grouped_mean = nedf.groupby(["Dataset", "Model"])["IoU"].mean()
# plot delta vs variance
ne_grouped_coeff_std = nedf.groupby(["Dataset", "Model"])["IoU"].std() / ne_grouped_mean
ne_grouped_coeff_std = ne_grouped_coeff_std.reset_index()
ne_grouped_coeff_std = ne_grouped_coeff_std.rename(columns={"IoU": "Coeff. StD of IoUs"})
def plot_cons_vs_aug_ensembles():
df = collate_ensemble_results_into_df("consistency")
df2 = collate_ensemble_results_into_df("augmentation")
grouped = df2.groupby(["Model", "Dataset"])["IoU"].mean()
grouped2 = df2.groupby(["Dataset"])["IoU"].mean()
grouped3 = df.groupby(["Dataset"])["IoU"].mean()
print(grouped2)
print(grouped3)
latex = grouped.to_latex(float_format="%.3f")
for dset in np.unique(df2["Dataset"])[::-1]:
utest = mannwhitneyu(df[df["Dataset"] == dset]["IoU"], df2[df2["Dataset"] == dset]["IoU"])
print(f"{dset} & {round(utest[0], 5)} & {round(utest[1], 5)} \\\ ")
def plot_inpainter_vs_conventional_performance():
df = collate_base_results_into_df()
df = df[df["Experiment"] != "Consistency Training"]
models = np.unique(df["Model"])
for dset in np.unique(df["Dataset"])[::-1]:
overall_utest = mannwhitneyu(df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset)]["IoU"],
df[(df["Experiment"] == "Inpainter Augmentation") & (df["Dataset"] == dset)][
"IoU"])
print(f"{dset} & {overall_utest[0]}, p={round(overall_utest[1], 5)} \\\ ")
for model in models:
print(f"{model}", end="")
for dset in np.unique(df["Dataset"]):
ttest = ttest_ind(
df[(df["Experiment"] == "Inpainter Augmentation") & (df["Dataset"] == dset) & (df["Model"] == model)][
"IoU"],
df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset) & (df["Model"] == model)][
"IoU"],
equal_var=False)
print(f" & {round(ttest[1], 5)}", end="")
print("\\\ ")
table = df.groupby(["Dataset", "Model", "Experiment"])["IoU"].mean()
no_augmentation = df[df["Experiment"] == "No Augmentation"].groupby(["Dataset"])[
"IoU"].mean()
improvements = 100 * (table - no_augmentation) / no_augmentation
improvements = improvements.reset_index()
improvements = improvements[improvements["Experiment"] != "No Augmentation"]
improvements.rename(columns={"IoU": "% Change in mean IoU with respect to No Augmentation"}, inplace=True)
test = table.to_latex(float_format="%.3f")
# improvements = improvements[improvements["Dataset"] == "CVC-ClinicDB"]
print(np.max(improvements[improvements["Experiment"] == "Vanilla Augmentation"]))
print(np.mean(improvements[improvements["Experiment"] == "Vanilla Augmentation"]))
print(np.max(improvements[improvements["Experiment"] == "Inpainter Augmentation"]))
print(np.mean(improvements[improvements["Experiment"] == "Inpainter Augmentation"]))
sns.boxplot(data=improvements, x="Dataset", y="% Change in mean IoU with respect to No Augmentation",
hue="Experiment")
plt.savefig("augmentation_plot.eps")
plt.show()
return table
def plot_training_procedure_performance():
df = collate_base_results_into_df()
df = df[df["Experiment"] != "Inpainter Augmentation"]
index = df.index[df["Experiment"] == "No Augmentation"].tolist() + df.index[
df["Experiment"] == "Vanilla Augmentation"].tolist() + df.index[
df["Experiment"] == "Consistency Training"].tolist()
df = df.reindex(index)
# print(df)
filt = df.groupby(["Dataset", "ID", "IoU", "Experiment"]).mean()
filt = filt.reset_index()
hue_order = df.groupby(["Experiment"])["IoU"].mean().sort_values().index
order = df.groupby(["Dataset"])["IoU"].mean().sort_values().index
table = df.groupby(["Dataset", "Model", "Experiment"])["IoU"].mean()
w_p_values = table.reset_index()
for i, row in w_p_values.iterrows():
experiment = row["Experiment"]
model = row["Model"]
dataset = row["Dataset"]
ious = df[(df["Dataset"] == dataset) & (df["Model"] == model) & (df["Experiment"] == experiment)]["IoU"]
augmentation_ious = \
df[(df["Dataset"] == dataset) & (df["Model"] == model) & (df["Experiment"] == "Vanilla Augmentation")][
"IoU"]
w_p_values.at[i, "p-value"] = round(ttest_ind(ious, augmentation_ious, equal_var=False)[-1], 3)
for dset in np.unique(df["Dataset"]):
overall_ttest = mannwhitneyu(df[(df["Experiment"] == "Consistency Training") & (df["Dataset"] == dset)]["IoU"],
df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset)]["IoU"])
print(f"{dset}: {overall_ttest[0]}, p={round(overall_ttest[1], 5)} ")
test = table.to_latex(float_format="%.3f")
no_augmentation_performance = filt[filt["Experiment"] == "No Augmentation"].groupby(["Dataset"])["IoU"].mean()
# C.StD analysis
cstd = filt.groupby(["Dataset", "Experiment"])["IoU"].std() / filt.groupby(["Dataset", "Experiment"])[
"IoU"].mean()
cstd = cstd.reset_index()
cstd.rename(columns={"IoU": "Coefficient of Standard Deviation of IoUs"}, inplace=True)
sns.barplot(data=cstd, x="Dataset", y="Coefficient of Standard Deviation of IoUs", hue="Experiment",
hue_order=["No Augmentation", "Vanilla Augmentation", "Consistency Training"])
plt.savefig("consistency_training_cstd.eps")
plt.show()
augmentation_performance = filt[filt["Experiment"] == "Vanilla Augmentation"].groupby(["Dataset"])["IoU"].mean()
test = improvement_pct = 100 * (filt.groupby(["Dataset", "Experiment", "ID"])[
"IoU"].mean() - augmentation_performance) / augmentation_performance
print(test.groupby(["Experiment"]).mean())
input()
improvement_pct = 100 * (filt.groupby(["Dataset", "Experiment", "ID"])[
"IoU"].mean() - no_augmentation_performance) / no_augmentation_performance
improvement_pct = improvement_pct.reset_index()
print(improvement_pct[improvement_pct["Experiment"] == "No Augmentation"])
improvement_pct = improvement_pct[improvement_pct["Experiment"] != "No Augmentation"]
# print(np.max(improvement_pct[improvement_pct["Experiment"] == "Consistency Training"]))
print("Consistency")
print(np.mean(improvement_pct[improvement_pct["Experiment"] == "Consistency Training"]))
print("Augmentation")
print(np.mean(improvement_pct[improvement_pct["Experiment"] == "Vanilla Augmentation"]))
improvement_pct.rename(columns={"IoU": "% Change in mean IoU with respect to No Augmentation"}, inplace=True)
sns.boxplot(data=improvement_pct, x="Dataset", y="% Change in mean IoU with respect to No Augmentation",
hue="Experiment")
plt.savefig("consistency_training_percent.eps")
plt.show()
# print(w_p_values)
# scatter = sns.barplot(data=filt, x="Dataset", y="IoU", hue="Experiment", hue_order=hue_order, order=order)
# scatter.legend(loc='lower right')
# plt.show()
return table
def compare_models(training_method):
df = collate_base_results_into_df()
df = df[df["Experiment"] == training_method]
# p_value_matrix = np.zeros((len(np.unique(df["Model"])), len(np.unique(df["Model"]))))
# models = np.unique(df["Model"])
# print()
# np.set_printoptions(precision=5, suppress=True)
# fig, ax = plt.subplots(2, 2, sharey=True, sharex=True, figsize=(8, 8))
# for didx, dataset in enumerate(np.unique(df["Dataset"])):
# for i, model in enumerate(models):
# for j, compare_model in enumerate(models):
# p_value_matrix[i, j] = round(ttest_ind(df[(df["Model"] == model) & (df["Dataset"] == dataset)]["IoU"],
# df[(df["Model"] == compare_model) & (df["Dataset"] == dataset)][
# "IoU"],
# equal_var=False)[1], 5)
#
# sns.heatmap(p_value_matrix, ax=ax.flatten()[didx], annot=True, xticklabels=models, yticklabels=models,
# cbar=False)
# ax.flatten()[didx].set_title(dataset)
# plt.tight_layout()
# plt.savefig("model_pvals.eps")
# plt.show()
#
# df_van = df.groupby(["Dataset", "Model"])["IoU"].mean()
# df_van = df_van.reset_index()
# order = df_van.groupby(["Dataset"])["IoU"].mean().sort_values().index
#
# plt.hist(df[df["Dataset"] == "Kvasir-SEG"]["IoU"])
# plt.show()
# sns.barplot(data=df, x="Dataset", y="IoU", hue="Model", order=order)
# plt.show()
# generalizability_gap
grouped = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean().reset_index()
ood = grouped[grouped["Dataset"] != "Kvasir-SEG"].copy()
print(ood.columns)
iid = grouped[grouped["Dataset"] == "Kvasir-SEG"].copy()
for i, row in ood.iterrows():
id = ood.at[i, "ID"]
dataset = ood.at[i, "Dataset"]
model = ood.at[i, "Model"]
iou = row["IoU"]
iid_iou = float(iid[(iid["ID"] == id) & (iid["Model"] == model)]["IoU"])
print(iou)
print(iid_iou)
ood.at[i, "gap"] = 100 * (iou - iid_iou) / iid_iou
sns.barplot(data=ood, x="Dataset", hue="Model", y="gap")
plt.ylim(-100, 0)
plt.ylabel("% Change in IoU wrt IID")
plt.savefig("delta_iou_baseline.eps")
plt.show()
cstds = df.groupby(["Dataset", "Model"])["IoU"].std() / df.groupby(["Dataset", "Model"])["IoU"].mean()
cstds = cstds.reset_index()
sns.barplot(data=cstds, x="Dataset", y="IoU", hue="Model")
both = pd.merge(ood, cstds, on=["Model", "Dataset"])
plt.savefig("cstd_baseline.eps")
plt.show()
fig, ax = plt.subplots(3, 1, figsize=(6, 6))
for didx, dataset in enumerate(np.unique(both["Dataset"])):
test = pearsonr(both[both["Dataset"] == dataset]["IoU_y"], both[both["Dataset"] == dataset]["gap"])
ax.flatten()[didx].set_title(f"{dataset} : Rp={round(test[0], 5)}, p={round(test[1], 5)}")
if didx == 2:
scatter = sns.scatterplot(ax=ax.flatten()[didx], data=both[both["Dataset"] == dataset], x="IoU_y", y="gap",
hue="Model")
scatter.legend(loc="upper center", bbox_to_anchor=(0.5, -0.2), ncol=3)
else:
sns.scatterplot(ax=ax.flatten()[didx], data=both[both["Dataset"] == dataset], x="IoU_y", y="gap",
hue="Model", legend=False)
# plt.tight_layout()
for axis in ax:
axis.set_ylabel("")
axis.set_xlabel("")
axis.set_yticklabels([])
axis.set_xticklabels([])
# axis.set_ylim(axis.get_ylim()[::-1])
ax.flatten()[2].set_xlabel("C.Std mIoU")
ax.flatten()[1].set_ylabel("% Change in mIoU wrt IID")
plt.tight_layout()
plt.savefig("underspecification_baseline.eps")
plt.show(ypad=4)
def plot_consistencies():
df = collate_base_results_into_df()
df.groupby(["Experiment", "Dataset", "Model", "ID"]).mean().reset_index().to_csv("test.csv")
grouped = df.groupby(["Experiment", "Dataset", "Model", "ID"])["SIS"].mean().reset_index()
grouped = grouped[grouped["Experiment"] != "Inpainter Augmentation"]
grouped = grouped[grouped["Dataset"] == "Kvasir-SEG"]
# grouped.to_csv("test.csv")
sns.barplot(data=grouped, x="Model", y="SIS", hue="Experiment")
plt.show()
grouped = df.groupby(["Experiment", "Dataset", "Model", "ID"])["IoU"].mean().reset_index()
grouped = grouped[grouped["Experiment"] != "Inpainter Augmentation"]
grouped = grouped[grouped["Dataset"] == "Kvasir-SEG"]
# grouped.to_csv("test.csv")
sns.barplot(data=grouped, x="Model", y="IoU", hue="Experiment")
plt.tight_layout()
plt.show()
# aug_consistencies = []
# aug_oods = []
# cons_consistencies = []
# cons_oods
cons_df = pd.DataFrame()
aug_df = pd.DataFrame()
for file in os.listdir("logs/consistency/FPN"):
if "augmentation" in file:
aug_df = aug_df.append(pd.read_csv(os.path.join("logs/consistency/FPN", file)), ignore_index=True)
if "consistency" in file:
cons_df = aug_df.append(pd.read_csv(os.path.join("logs/consistency/FPN", file)), ignore_index=True)
else:
continue
cons_df = cons_df[cons_df["epoch"] < 300]
aug_df = aug_df[aug_df["epoch"] < 300]
sns.lineplot(data=cons_df, x="epoch", y="consistency", color="orange")
sns.lineplot(data=aug_df, x="epoch", y="consistency", color="blue")
sns.lineplot(data=cons_df, x="epoch", y="ood_iou", color="orange")
sns.lineplot(data=aug_df, x="epoch", y="ood_iou", color="blue")
plt.show()
def plot_ensemble_variance_relationship(training_method):
df = collate_ensemble_results_into_df(training_method)
df_constituents = collate_base_results_into_df()
df_constituents = df_constituents[df_constituents["Experiment"] != "Inpainter Augmentation"]
df["constituents"] = df["constituents"].apply(
lambda x: [int(i.split("_")[-1]) for i in x] if type(x) == type([]) else int(x))
if training_method != "all":
if training_method == "vanilla": training_method = "No Augmentation"
if training_method == "augmentation": training_method = "Vanilla Augmentation"
if training_method == "consistency": training_method = "Consistency Training"
df_constituents = df_constituents[df_constituents["Experiment"] == training_method]
colors = ["tab:blue", "tab:orange", "tab:green", "tab:red"]
# colors = ["b", "g", "r", "c", "m", "y"]
colormap = dict(zip(np.unique(df["Dataset"]), colors))
var_dataset = pd.DataFrame()
for i, row in df.iterrows():
model = df.at[i, "Model"]
id = df.at[i, "ID"]
experiment = df.at[i, "Experiment"]
if model == "diverse":
filtered = df_constituents[
(df_constituents["ID"] == id) &
(df_constituents["Experiment"] == experiment)]
cstd = (filtered.groupby(["Dataset"]).std() / filtered.groupby(["Dataset"]).mean())["IoU"]
improvements = df[
(df["Model"] == model) & (df["Experiment"] == experiment) & (df["ID"] == id)]
improvements = 100 * (improvements.groupby(["Dataset"])["IoU"].mean() - filtered.groupby(["Dataset"])[
"IoU"].mean()) / filtered.groupby(["Dataset"])["IoU"].mean()
cstd = cstd.reset_index()
improvements = improvements.reset_index()
cstd.rename(columns={"IoU": "C.StD"}, inplace=True)
improvements.rename(columns={"IoU": "% Increase in Generalizability wrt Constituents Mean"}, inplace=True)
merged = pd.merge(improvements, cstd)
merged["Model"] = [model] * 4 # dataset length
merged["ID"] = [id] * 4
merged["Experiment"] = [experiment] * 4
var_dataset = var_dataset.append(merged)
else:
constituents = df.at[i, "constituents"]
filtered = df_constituents[
(df_constituents["Model"] == model) & (df_constituents["ID"].isin(constituents)) & (
df_constituents["Experiment"] == experiment)]
cstd = (filtered.groupby(["Dataset"]).std() / filtered.groupby(["Dataset"]).mean())["IoU"]
improvements = df[
(df["Model"] == model) & (df["Experiment"] == experiment) & (df["ID"] == id)]
improvements = 100 * (improvements.groupby(["Dataset"])["IoU"].mean() - filtered.groupby(["Dataset"])[
"IoU"].mean()) / filtered.groupby(["Dataset"])["IoU"].mean()
cstd = cstd.reset_index()
improvements = improvements.reset_index()
cstd.rename(columns={"IoU": "C.StD"}, inplace=True)
improvements.rename(columns={"IoU": "% Increase in Generalizability wrt Constituents Mean"}, inplace=True)
merged = pd.merge(improvements, cstd)
merged["Model"] = [model] * 4
merged["ID"] = [id] * 4
merged["Experiment"] = [experiment] * 4
var_dataset = var_dataset.append(merged)
# improvements = filtered.groupby
# cstd = filtered
# df.at[i, "cstd"] =
# cstds.append(0)
print(len(np.unique(var_dataset[var_dataset["Experiment"] == "Vanilla Augmentation"][
"% Increase in Generalizability wrt Constituents Mean"])))
print(len(np.unique(var_dataset[var_dataset["Experiment"] == "No Augmentation"][
"% Increase in Generalizability wrt Constituents Mean"])))
print(var_dataset.columns)
datasets = np.unique(var_dataset["Dataset"])
training_methods = ["No Augmentation", "Vanilla Augmentation", "Consistency Training"]
fig, ax = plt.subplots(len(datasets), len(training_methods), figsize=(11, 12))
var_dataset = var_dataset.replace("diverse", "MultiModel")
for i, dataset_name in enumerate(datasets):
for j, training_method in enumerate(training_methods):
dataset_filtered = var_dataset[
(var_dataset["Dataset"] == dataset_name) & (var_dataset["Experiment"] == training_method)]
# sns.regplot(ax=ax.flatten()[i], data=dataset_filtered, x="C.StD",
# y="% Increase in Generalizability wrt Constituents Mean",
# ci=99,
# color=colormap[dataset_name], label=dataset_name)
# correlation = pearsonr(dataset_filtered["C.StD"],
# dataset_filtered["% Increase in Generalizability wrt Constituents Mean"])
if j == 0: # seaborn does not like global legends
scatter = sns.scatterplot(ax=ax[i, j], data=dataset_filtered, x="C.StD",
y="% Increase in Generalizability wrt Constituents Mean",
ci=99, legend=False, color=colormap[dataset_name], label=dataset_name)
ax[i, j].set_title(training_method)
else:
scatter = sns.scatterplot(ax=ax[i, j], data=dataset_filtered, x="C.StD",
y="% Increase in Generalizability wrt Constituents Mean",
ci=99, legend=False, color=colormap[dataset_name])
correlation = spearmanr(dataset_filtered["C.StD"],
dataset_filtered["% Increase in Generalizability wrt Constituents Mean"])
ax[i, j].set_title(f"Rs={correlation[0]:.3f}, p={correlation[1]:.6f}")
for a in ax.flatten():
a.set(xlabel=None)
a.set(ylabel=None)
for axis, col in zip(ax[0], training_methods):
axis.annotate(col, xy=(0.5, 1.5), xytext=(0, 5),
xycoords='axes fraction', textcoords='offset points',
size='xx-large', ha='center', va='baseline')
fig.add_subplot(111, frameon=False)
# fig.legend(loc='lower center', bbox_to_anchor=(0.5, 0.5), ncol=2, labels=np.unique(var_dataset["Dataset"]))
fig.legend(loc='lower center', bbox_to_anchor=(0.5, 0), ncol=4)
plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)
plt.ylabel("% Increase in Generalizability wrt Constituents Mean")
plt.xlabel("Coefficient of Standard Deviation")
# plt.title()
fig.tight_layout()
# fig.subplots_adjust(bottom=0.2)
plt.savefig("ensemble_variance_relationship_statistical.eps")
plt.show()
# hue_order = var_dataset.groupby(["Model"])[
# "% Increase in Generalizability wrt Constituents Mean"].mean().sort_values().index
var_dataset = var_dataset.replace("diverse", "MultiModel")
fig, ax = plt.subplots(figsize=(12, 6))
sns.boxplot(data=var_dataset, ax=ax, x="Dataset", y="% Increase in Generalizability wrt Constituents Mean",
hue="Model",
order=["Kvasir-SEG", "CVC-ClinicDB", "EndoCV2020", "Etis-LaribDB"])
plt.axhline(0, linestyle="--")
plt.savefig("improvements_due_to_ensembles.eps")
plt.show()
def get_ensemble_p_vals():
singular = collate_base_results_into_df()
# cross-model t-test (not used in thesis)
print("No augmentation")
for mix, model in enumerate(np.unique(singular["Model"])):
print(model, end="&")
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
single = singular[singular["Experiment"] == "No Augmentation"]
ensemble = collate_ensemble_results_into_df(type="vanilla")
single = single[(single["Dataset"] == dataset) & (single["Model"] == model)]
ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)]
ttest = ttest_ind(
single["IoU"], ensemble["IoU"], equal_var=False
)
print(round(ttest[1], 5), end=" & ")
print("\\\ ")
print("Augmentation")
for mix, model in enumerate(np.unique(singular["Model"])):
print(model, end="&")
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
single = singular[singular["Experiment"] == "Vanilla Augmentation"]
ensemble = collate_ensemble_results_into_df(type="augmentation")
single = single[(single["Dataset"] == dataset) & (single["Model"] == model)]
ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)]
ttest = ttest_ind(
single["IoU"], ensemble["IoU"], equal_var=False
)
print(round(ttest[1], 5), end=" & ")
print("\\\ ")
print("Consistency Training")
for mix, model in enumerate(np.unique(singular["Model"])):
print(model, end="&")
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
single = singular[singular["Experiment"] == "Consistency Training"]
ensemble = collate_ensemble_results_into_df(type="consistency")
single = single[(single["Dataset"] == dataset) & (single["Model"] == model)]
ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)]
ttest = ttest_ind(
single["IoU"], ensemble["IoU"], equal_var=False
)
print(round(ttest[1], 5), end=" & ")
print("\\\ ")
# model-averaged
print("When averaged across models:")
print("No augmentation")
experiments_long = ["No Augmentation", "Conventional Augmentation", "Consistency Training"]
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
single = singular[singular["Experiment"] == "No Augmentation"]
ensemble = collate_ensemble_results_into_df(type="vanilla")
single = single[(single["Dataset"] == dataset)]
ensemble = ensemble[(ensemble["Dataset"] == dataset)]
ttest = mannwhitneyu(
single["IoU"], ensemble["IoU"]
)
print(round(ttest[1], 3), end=" & ")
print("\nAugmentation")
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
single = singular[singular["Experiment"] == "Vanilla Augmentation"]
ensemble = collate_ensemble_results_into_df(type="augmentation")
single = single[(single["Dataset"] == dataset)]
ensemble = ensemble[(ensemble["Dataset"] == dataset)]
ttest = mannwhitneyu(
single["IoU"], ensemble["IoU"]
)
print(round(ttest[1], 3), end=" & ")
print("\nConsistency Training")
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
single = singular[singular["Experiment"] == "Consistency Training"]
ensemble = collate_ensemble_results_into_df(type="consistency")
single = single[(single["Dataset"] == dataset)]
ensemble = ensemble[(ensemble["Dataset"] == dataset)]
ttest = mannwhitneyu(
single["IoU"], ensemble["IoU"]
)
print(round(ttest[1], 3), end=" & ")
experiments = ["vanilla", "augmentation", "consistency"]
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(8, 8))
for dix, dataset in enumerate(np.unique(singular["Dataset"])):
p_values = np.zeros((len(experiments), len(experiments)))
for i, exp1 in enumerate(experiments):
for j, exp2 in enumerate(experiments):
df1 = collate_ensemble_results_into_df(exp1)
df2 = collate_ensemble_results_into_df(exp2)
test = mannwhitneyu(df1[df1["Dataset"] == dataset]["IoU"],
df2[(df2["Dataset"] == dataset)]["IoU"])
p_values[i, j] = round(test[1], 5)
sns.heatmap(p_values, ax=axes.flatten()[dix], annot=True, xticklabels=experiments_long,
yticklabels=experiments_long,
cbar=False)
ax = axes.flatten()[dix].set_title(dataset)
plt.tight_layout()
plt.savefig("ensemble_relative_pvals.eps")
plt.show()
def compare_ensembles():
singular = collate_base_results_into_df()
singular_no_augment = singular[singular["Experiment"] == "No Augmentation"].groupby(["Dataset", "ID"])[
"IoU"].mean()
singular_augment = singular[singular["Experiment"] == "Vanilla Augmentation"].groupby(["Dataset", "ID"])[
"IoU"].mean()
singular_ct = singular[singular["Experiment"] == "Consistency Training"].groupby(["Dataset", "ID"])[
"IoU"].mean()
no_augment = collate_ensemble_results_into_df(type="vanilla").groupby(["Dataset", "ID"])[
"IoU"].mean()
augment = collate_ensemble_results_into_df(type="augmentation").groupby(["Dataset", "ID"])[
"IoU"].mean()
consistency = collate_ensemble_results_into_df(type="consistency").groupby(["Dataset", "ID"])[
"IoU"].mean()
no_augment_improvements = (100 * (no_augment - singular_no_augment) / singular_no_augment).reset_index()
augment_improvements = (100 * (augment - singular_augment) / singular_augment).reset_index()
ct_improvements = (100 * (consistency - singular_ct) / singular_ct).reset_index()
no_augment_improvements["Experiment"] = pd.Series(["No Augmentation"] * len(no_augment_improvements),
index=no_augment_improvements.index)
augment_improvements["Experiment"] = pd.Series(["Conventional Augmentation"] * len(augment_improvements),
index=augment_improvements.index)
ct_improvements["Experiment"] = pd.Series(["Consistency Training"] * len(ct_improvements),
index=ct_improvements.index)
# print("No augmentation")
# print(no_augment_improvements)
# print("Augmentation")
# print(augment_improvements)
# print("Consistency Training")
# print(ct_improvements)
# print(augment_improvements)
overall_improvements = pd.concat([no_augment_improvements, augment_improvements, ct_improvements],
ignore_index=True)
experiments = np.unique(overall_improvements["Experiment"])
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(8, 8))
for dix, dataset in enumerate(np.unique(overall_improvements["Dataset"])):
p_values = np.zeros((len(experiments), len(experiments)))
for i, exp1 in enumerate(experiments):
for j, exp2 in enumerate(experiments):
test = ttest_ind(overall_improvements[(overall_improvements["Dataset"] == dataset) & (
overall_improvements["Experiment"] == exp1)]["IoU"],
overall_improvements[(overall_improvements["Dataset"] == dataset) & (
overall_improvements["Experiment"] == exp2)]["IoU"], equal_var=True)
p_values[i, j] = test[1]
sns.heatmap(p_values, ax=axes.flatten()[dix], annot=True, xticklabels=experiments, yticklabels=experiments,
cbar=False)
ax = axes.flatten()[dix].set_title(dataset)
plt.tight_layout()
plt.savefig("ensemble_improvement_pvals.eps")
plt.show()
box = sns.boxplot(data=overall_improvements, x="Experiment", y="IoU", hue="Dataset",
hue_order=["Kvasir-SEG", "EndoCV2020", "CVC-ClinicDB", "Etis-LaribDB"])
box.legend(loc="upper left")
box.set(ylabel="Improvement in IoU (%)")
box.set(xlabel="Training Method")
box.axhline(0, linestyle="--")
plt.savefig("ensemble_improvements.eps")
print("..,.")
print(overall_improvements.groupby(["Experiment"])["IoU"].mean())
print(overall_improvements.groupby(["Experiment"])["IoU"].max())
plt.show()
grouped = singular[singular["Experiment"] != "Inpainter Augmentation"].groupby(["Model", "Dataset", "Experiment"])[
"IoU"]
constituent_cstd = grouped.std() / grouped.mean()
print(constituent_cstd)
def test():
ensemble = collate_ensemble_results_into_df("all")
ensemble = ensemble.replace("augmentation", "Vanilla Augmentation")
ensemble = ensemble.replace("vanilla", "No Augmentation")
ensemble = ensemble.replace("consistency", "Consistency Training")
ensemble = ensemble[ensemble["Model"] != "diverse"]
ensemble_means = ensemble.groupby(["Experiment", "Dataset", "Model", "ID"])["IoU"].mean()
singular = collate_base_results_into_df()
singular = singular[singular["Experiment"] != "Inpainter Augmentation"]
singular_grouped = singular.groupby(["Experiment", "Dataset", "Model"])["IoU"]
# input()
ensemble_improvements = 100 * (ensemble_means - singular_grouped.mean()) / singular_grouped.mean()
singular_cstds = singular_grouped.std() / singular_grouped.mean()
merged = pd.merge(ensemble_improvements, singular_cstds, how='inner', on=["Experiment", "Dataset", "Model"])
# merged = merged.groupby(["Experiment", "Model"]).mean()
fig = sns.scatterplot(data=merged, x="IoU_y", y="IoU_x", hue="Experiment")
test = spearmanr(merged["IoU_y"], merged["IoU_x"])
plt.title(f"R_s = {round(test[0], 5)}, p={round(test[1], 5)}")
fig.set_ylabel("Change in IoU (%)")
fig.set_xlabel("IoU C.StD.")
# print(spearmanr(merged["IoU_y"], merged["IoU_x"]))
plt.savefig("ensembles_underspecification.eps")
plt.show()
if __name__ == '__main__':
training_plot("logs/consistency/DeepLab/consistency_1.csv")
# plot_inpainter_vs_conventional_performance()
# plot_training_procedure_performance()
# plot_ensemble_performance()
# compare_models("No Augmentation")
# compare_models("Vanilla Augmentation")
# compare_models("Consistency Training")
# plot_ensemble_variance_relationship("all")
# plot_cons_vs_aug_ensembles()
# compare_ensembles()
# get_ensemble_p_vals()
# test()