|
a |
|
b/experiments/plotting.py |
|
|
1 |
import os |
|
|
2 |
|
|
|
3 |
import numpy as np |
|
|
4 |
import pandas as pd |
|
|
5 |
import matplotlib.pyplot as plt |
|
|
6 |
import seaborn as sns |
|
|
7 |
import torch |
|
|
8 |
import pickle |
|
|
9 |
from utils.formatting import SafeDict |
|
|
10 |
from scipy.stats import wasserstein_distance |
|
|
11 |
from scipy.stats import ttest_ind, pearsonr, mannwhitneyu, spearmanr |
|
|
12 |
from models.segmentation_models import * |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
def training_plot(log_csv): |
|
|
16 |
log_df = pd.read_csv(log_csv) |
|
|
17 |
plt.title("Training Plot Sample") |
|
|
18 |
plt.xlabel("Epochs") |
|
|
19 |
plt.ylabel("Jaccard Loss") |
|
|
20 |
plt.xlim((0, 300)) |
|
|
21 |
plt.ylim((0, 1)) |
|
|
22 |
plt.plot(log_df["epoch"], log_df["train_loss"], label="Training Loss") |
|
|
23 |
plt.plot(log_df["epoch"], log_df["val_loss"], label="Validation Loss") |
|
|
24 |
# plt.plot(log_df["epoch"], log_df["ood_iou"], label="Etis-LaribDB iou") |
|
|
25 |
plt.legend() |
|
|
26 |
plt.show() |
|
|
27 |
|
|
|
28 |
|
|
|
29 |
def ood_correlations(log_csv): |
|
|
30 |
log_df = pd.read_csv(log_csv) |
|
|
31 |
plt.title("SIS-OOD correlation") |
|
|
32 |
plt.xlabel("SIS") |
|
|
33 |
plt.ylabel("Etis-LaribDB OOD performance") |
|
|
34 |
plt.xlim((0, 1)) |
|
|
35 |
plt.ylim((0, 1)) |
|
|
36 |
plt.scatter(log_df["consistency"], log_df["ood_iou"], label="Consistency") |
|
|
37 |
plt.scatter(log_df["iid_test_iou"], log_df["ood_iou"], label="IID IoU") |
|
|
38 |
|
|
|
39 |
plt.legend() |
|
|
40 |
plt.show() |
|
|
41 |
|
|
|
42 |
|
|
|
43 |
def ood_v_epoch(log_csv): |
|
|
44 |
log_df = pd.read_csv(log_csv) |
|
|
45 |
plt.title("Training Plot Sample") |
|
|
46 |
plt.xlabel("Epochs") |
|
|
47 |
plt.ylabel("SIL") |
|
|
48 |
plt.xlim((0, 500)) |
|
|
49 |
plt.ylim((0, 1)) |
|
|
50 |
plt.plot(log_df["epoch"], log_df["consistency"], label="consistency") |
|
|
51 |
plt.plot(log_df["epoch"], log_df["ood_iou"], label="ood iou") |
|
|
52 |
plt.legend() |
|
|
53 |
plt.show() |
|
|
54 |
|
|
|
55 |
|
|
|
56 |
def get_boxplots_for_models(): |
|
|
57 |
""" |
|
|
58 |
box plot for comparing model performance. Considers d% reduced along datasets, split according to experiments |
|
|
59 |
and models |
|
|
60 |
:return: |
|
|
61 |
""" |
|
|
62 |
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"] |
|
|
63 |
model_names = ["DeepLab", "FPN, Unet, InductiveNet, TriUnet"] |
|
|
64 |
dataset = [] |
|
|
65 |
for fname in sorted(os.listdir("experiments/Data/pickles")): |
|
|
66 |
if "0" in fname: |
|
|
67 |
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file: |
|
|
68 |
model = fname.split("_")[0] |
|
|
69 |
if model == "InductiveNet": |
|
|
70 |
model = "DD-DeepLabV3+" |
|
|
71 |
data = pickle.load(file) |
|
|
72 |
datasets, samples = data["ious"].shape |
|
|
73 |
kvasir_ious = data["ious"][0] |
|
|
74 |
mean_iid_iou = np.median(kvasir_ious) |
|
|
75 |
print(mean_iid_iou) |
|
|
76 |
if "maximum_consistency" in fname: |
|
|
77 |
continue |
|
|
78 |
for i in range(datasets): |
|
|
79 |
if i == 0: |
|
|
80 |
continue |
|
|
81 |
for j in range(samples): |
|
|
82 |
if data["ious"][i, j] < 0.25 or data["ious"][0][j] < 0.75: |
|
|
83 |
print(f"{fname} with id {j} has iou {data['ious'][i, j]} and {data['ious'][0][j]} ") |
|
|
84 |
continue |
|
|
85 |
# dataset.append([dataset_names[i], model, data["ious"][i, j]]) |
|
|
86 |
|
|
|
87 |
dataset.append( |
|
|
88 |
[dataset_names[i], model, 100 * (data["ious"][i, j] - mean_iid_iou) / mean_iid_iou]) |
|
|
89 |
|
|
|
90 |
dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "\u0394%IoU"]) |
|
|
91 |
print(dataset) |
|
|
92 |
plt.ylim(0, -100) |
|
|
93 |
sns.barplot(x="Dataset", y="\u0394%IoU", hue="Model", data=dataset) |
|
|
94 |
plt.show() |
|
|
95 |
|
|
|
96 |
|
|
|
97 |
def get_variances_for_models(): |
|
|
98 |
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"] |
|
|
99 |
model_names = ["DeepLab", "FPN, Unet, InductiveNet, TriUnet"] |
|
|
100 |
dataset = [] |
|
|
101 |
for fname in sorted(os.listdir("experiments/Data/pickles")): |
|
|
102 |
if "maximum_consistency" in fname: |
|
|
103 |
continue |
|
|
104 |
if "0" in fname: |
|
|
105 |
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file: |
|
|
106 |
model = fname.split("_")[0] |
|
|
107 |
if model == "InductiveNet": |
|
|
108 |
model = "DD-DeepLabV3+" |
|
|
109 |
data = pickle.load(file) |
|
|
110 |
datasets, samples = data["ious"].shape |
|
|
111 |
|
|
|
112 |
if "maximum_consistency" in fname: |
|
|
113 |
continue |
|
|
114 |
for i in range(datasets): |
|
|
115 |
# if i == 0: |
|
|
116 |
# continue |
|
|
117 |
|
|
|
118 |
for j in range(samples): |
|
|
119 |
if data["ious"][0][j] < 0.75: |
|
|
120 |
print(fname, "-", j) |
|
|
121 |
continue |
|
|
122 |
if i == 3 and model == "InductiveNet": |
|
|
123 |
print("inductivenet", data["ious"][i, j]) |
|
|
124 |
if i == 3 and model == "DeepLab": |
|
|
125 |
print("DeepLab", data["ious"][i, j]) |
|
|
126 |
|
|
|
127 |
dataset.append([dataset_names[i], model, data["ious"][i, j]]) |
|
|
128 |
|
|
|
129 |
iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "Coefficient of Std.Dev"]) |
|
|
130 |
std_dataset = iou_dataset.groupby(["Model", "Dataset"]).std() / iou_dataset.groupby(["Model", "Dataset"]).mean() |
|
|
131 |
std_dataset = std_dataset.reset_index() |
|
|
132 |
print(std_dataset) |
|
|
133 |
plt.ylim((0, 0.15)) |
|
|
134 |
sns.barplot(x="Dataset", y="Coefficient of Std.Dev", hue="Model", data=std_dataset) |
|
|
135 |
plt.show() |
|
|
136 |
|
|
|
137 |
|
|
|
138 |
def plot_parameters_sizes(): |
|
|
139 |
models = [DeepLab, FPN, InductiveNet, Unet, TriUnet] |
|
|
140 |
model_names = ["DeepLab", "FPN", "InductiveNet", "Unet", "TriUnet"] |
|
|
141 |
for model_name, model_c in zip(model_names, models): |
|
|
142 |
model = model_c() |
|
|
143 |
print(f"{model_name}: {sum(p.numel() for p in model.parameters(recurse=True))}") |
|
|
144 |
|
|
|
145 |
|
|
|
146 |
def collate_ensemble_results_into_df(type="consistency"): |
|
|
147 |
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"] |
|
|
148 |
model_names = ["DeepLab", "FPN", "Unet", "InductiveNet", "TriUnet"] |
|
|
149 |
dataset = [] |
|
|
150 |
for fname in sorted(os.listdir("experiments/Data/pickles")): |
|
|
151 |
if "ensemble" not in fname: |
|
|
152 |
continue |
|
|
153 |
if "maximum_consistency" in fname or "last_epoch" in fname: |
|
|
154 |
continue |
|
|
155 |
if type != "all": |
|
|
156 |
if type == "consistency" and ("augmentation" in fname or "vanilla" in fname): |
|
|
157 |
continue |
|
|
158 |
if type == "augmentation" and "augmentation" not in fname: |
|
|
159 |
continue |
|
|
160 |
if type == "vanilla" and "vanilla" not in fname: |
|
|
161 |
continue |
|
|
162 |
|
|
|
163 |
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file: |
|
|
164 |
model = fname.split("-")[0] |
|
|
165 |
# experiment = fname.split("-")[-1] |
|
|
166 |
|
|
|
167 |
if "vanilla" in fname: |
|
|
168 |
experiment = "No Augmentation" |
|
|
169 |
elif "augmentation" in fname: |
|
|
170 |
experiment = "Vanilla Augmentation" |
|
|
171 |
else: |
|
|
172 |
experiment = "Consistency Training" |
|
|
173 |
data = pickle.load(file) |
|
|
174 |
|
|
|
175 |
# print(file, data.keys()) |
|
|
176 |
datasets, samples = data["ious"].shape |
|
|
177 |
if model == "InductiveNet": |
|
|
178 |
model = "DD-DeepLabV3+" |
|
|
179 |
for i in range(datasets): |
|
|
180 |
for j in range(samples): |
|
|
181 |
if data["ious"][0, j] < 0.75: # if bugged out; rare |
|
|
182 |
continue |
|
|
183 |
try: |
|
|
184 |
dataset.append( |
|
|
185 |
[dataset_names[i], model, j, experiment, data["ious"][i, j], data["constituents"][j]]) |
|
|
186 |
except KeyError: |
|
|
187 |
continue |
|
|
188 |
|
|
|
189 |
iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "ID", "Experiment", "IoU", "constituents"]) |
|
|
190 |
# print(iou_dataset) |
|
|
191 |
iou_dataset.to_csv("ensemble_data.csv") |
|
|
192 |
return iou_dataset |
|
|
193 |
|
|
|
194 |
|
|
|
195 |
def collate_base_results_into_df(): |
|
|
196 |
dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"] |
|
|
197 |
model_names = ["DeepLab", "FPN", "Unet", "InductiveNet", "TriUnet"] |
|
|
198 |
dataset = [] |
|
|
199 |
for fname in sorted(os.listdir("experiments/Data/pickles")): |
|
|
200 |
if "ensemble" in fname: |
|
|
201 |
# print(fname) |
|
|
202 |
continue |
|
|
203 |
if "maximum_consistency" in fname or "last_epoch" in fname: |
|
|
204 |
# print(fname) |
|
|
205 |
continue |
|
|
206 |
|
|
|
207 |
with open(os.path.join("experiments/Data/pickles", fname), "rb") as file: |
|
|
208 |
model = fname.split("_")[0] |
|
|
209 |
data = pickle.load(file) |
|
|
210 |
datasets, samples = data["ious"].shape |
|
|
211 |
if model == "InductiveNet": |
|
|
212 |
model = "DD-DeepLabV3+" |
|
|
213 |
experiment = "No Augmentation" |
|
|
214 |
if "sil" in fname and "_G" not in fname: |
|
|
215 |
experiment = "Consistency Training" |
|
|
216 |
elif "_V" in fname: |
|
|
217 |
experiment = "Vanilla Augmentation" |
|
|
218 |
elif "_G" in fname: |
|
|
219 |
experiment = "Inpainter Augmentation" |
|
|
220 |
|
|
|
221 |
for i in range(datasets): |
|
|
222 |
for j in range(samples): |
|
|
223 |
if data["ious"][0, j] < 0.75: |
|
|
224 |
continue |
|
|
225 |
dataset.append([dataset_names[i], model, j, experiment, data["ious"][i, j], data["sis"][i, j]]) |
|
|
226 |
|
|
|
227 |
iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "ID", "Experiment", "IoU", "SIS"]) |
|
|
228 |
iou_dataset.to_csv("base_data.csv") |
|
|
229 |
return iou_dataset |
|
|
230 |
|
|
|
231 |
|
|
|
232 |
def plot_ensemble_performance(): |
|
|
233 |
df = collate_ensemble_results_into_df("augmentation") |
|
|
234 |
print(df) |
|
|
235 |
latex = df.groupby(["Model", "Dataset"])["IoU"].mean() |
|
|
236 |
print(latex.reset_index()) |
|
|
237 |
print(latex.to_latex(float_format="%.3f")) |
|
|
238 |
order = df.groupby(["Dataset", "Model"])["IoU"].mean().sort_values().index |
|
|
239 |
sns.barplot(data=df, x="Dataset", y="IoU", hue="Model") |
|
|
240 |
plt.show() |
|
|
241 |
grouped_mean = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean() |
|
|
242 |
# print(grouped_mean) |
|
|
243 |
grouped_iid = np.abs(grouped_mean - grouped_mean["Kvasir-SEG"]) / grouped_mean["Kvasir-SEG"] |
|
|
244 |
# print(grouped_iid) |
|
|
245 |
|
|
|
246 |
nedf = collate_base_results_into_df() |
|
|
247 |
ne_grouped_mean = nedf.groupby(["Dataset", "Model"])["IoU"].mean() |
|
|
248 |
# print(ne_grouped_mean) |
|
|
249 |
ne_grouped_iid = np.abs(ne_grouped_mean["Kvasir-SEG"] - ne_grouped_mean) / ne_grouped_mean["Kvasir-SEG"] |
|
|
250 |
# print(ne_grouped_iid) |
|
|
251 |
|
|
|
252 |
comparison = ne_grouped_iid - grouped_iid |
|
|
253 |
comparison = comparison.reset_index() |
|
|
254 |
|
|
|
255 |
sns.barplot(data=comparison, x="Dataset", y="IoU", hue="Model") |
|
|
256 |
plt.show() |
|
|
257 |
|
|
|
258 |
# plot delta vs variance |
|
|
259 |
ne_grouped_coeff_std = nedf.groupby(["Dataset", "Model"])["IoU"].std() / ne_grouped_mean |
|
|
260 |
ne_grouped_coeff_std = ne_grouped_coeff_std.reset_index() |
|
|
261 |
ne_grouped_coeff_std = ne_grouped_coeff_std.rename(columns={"IoU": "Coeff. StD of IoUs"}) |
|
|
262 |
# print(ne_grouped_coeff_std.head(10)) |
|
|
263 |
sns.barplot(data=ne_grouped_coeff_std, x="Dataset", y="Coeff. StD of IoUs", hue="Model") |
|
|
264 |
plt.show() |
|
|
265 |
test = pd.merge(ne_grouped_coeff_std, comparison) |
|
|
266 |
test = test.rename(columns={"IoU": "% Improvement over mean constituent IoU"}) |
|
|
267 |
test["% Improvement over mean constituent IoU"] *= 100 |
|
|
268 |
test = test.groupby(["Model", "ID"]).mean() |
|
|
269 |
test = test.reset_index() |
|
|
270 |
|
|
|
271 |
print("mean", np.mean(test)) |
|
|
272 |
print("max", np.max(test)) |
|
|
273 |
# print(test) |
|
|
274 |
|
|
|
275 |
sns.lineplot(data=test, x="Coeff. StD of IoUs", y="% Improvement over mean constituent IoU", err_style="bars", |
|
|
276 |
color="gray", linestyle='--') |
|
|
277 |
test = test.groupby("Model").mean().reset_index() |
|
|
278 |
sns.scatterplot(test["Coeff. StD of IoUs"], test["% Improvement over mean constituent IoU"], hue=test["Model"], |
|
|
279 |
s=100, ci=99) |
|
|
280 |
plt.show() |
|
|
281 |
|
|
|
282 |
|
|
|
283 |
def plot_overall_ensemble_performance(): |
|
|
284 |
df = collate_ensemble_results_into_df("both") |
|
|
285 |
grouped_mean = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean() |
|
|
286 |
|
|
|
287 |
nedf = collate_base_results_into_df() |
|
|
288 |
ne_grouped_mean = nedf.groupby(["Dataset", "Model"])["IoU"].mean() |
|
|
289 |
|
|
|
290 |
# plot delta vs variance |
|
|
291 |
ne_grouped_coeff_std = nedf.groupby(["Dataset", "Model"])["IoU"].std() / ne_grouped_mean |
|
|
292 |
ne_grouped_coeff_std = ne_grouped_coeff_std.reset_index() |
|
|
293 |
ne_grouped_coeff_std = ne_grouped_coeff_std.rename(columns={"IoU": "Coeff. StD of IoUs"}) |
|
|
294 |
|
|
|
295 |
|
|
|
296 |
def plot_cons_vs_aug_ensembles(): |
|
|
297 |
df = collate_ensemble_results_into_df("consistency") |
|
|
298 |
df2 = collate_ensemble_results_into_df("augmentation") |
|
|
299 |
grouped = df2.groupby(["Model", "Dataset"])["IoU"].mean() |
|
|
300 |
grouped2 = df2.groupby(["Dataset"])["IoU"].mean() |
|
|
301 |
grouped3 = df.groupby(["Dataset"])["IoU"].mean() |
|
|
302 |
|
|
|
303 |
print(grouped2) |
|
|
304 |
print(grouped3) |
|
|
305 |
latex = grouped.to_latex(float_format="%.3f") |
|
|
306 |
for dset in np.unique(df2["Dataset"])[::-1]: |
|
|
307 |
utest = mannwhitneyu(df[df["Dataset"] == dset]["IoU"], df2[df2["Dataset"] == dset]["IoU"]) |
|
|
308 |
print(f"{dset} & {round(utest[0], 5)} & {round(utest[1], 5)} \\\ ") |
|
|
309 |
|
|
|
310 |
|
|
|
311 |
def plot_inpainter_vs_conventional_performance(): |
|
|
312 |
df = collate_base_results_into_df() |
|
|
313 |
df = df[df["Experiment"] != "Consistency Training"] |
|
|
314 |
models = np.unique(df["Model"]) |
|
|
315 |
for dset in np.unique(df["Dataset"])[::-1]: |
|
|
316 |
overall_utest = mannwhitneyu(df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset)]["IoU"], |
|
|
317 |
df[(df["Experiment"] == "Inpainter Augmentation") & (df["Dataset"] == dset)][ |
|
|
318 |
"IoU"]) |
|
|
319 |
print(f"{dset} & {overall_utest[0]}, p={round(overall_utest[1], 5)} \\\ ") |
|
|
320 |
|
|
|
321 |
for model in models: |
|
|
322 |
print(f"{model}", end="") |
|
|
323 |
for dset in np.unique(df["Dataset"]): |
|
|
324 |
ttest = ttest_ind( |
|
|
325 |
df[(df["Experiment"] == "Inpainter Augmentation") & (df["Dataset"] == dset) & (df["Model"] == model)][ |
|
|
326 |
"IoU"], |
|
|
327 |
df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset) & (df["Model"] == model)][ |
|
|
328 |
"IoU"], |
|
|
329 |
equal_var=False) |
|
|
330 |
print(f" & {round(ttest[1], 5)}", end="") |
|
|
331 |
print("\\\ ") |
|
|
332 |
table = df.groupby(["Dataset", "Model", "Experiment"])["IoU"].mean() |
|
|
333 |
no_augmentation = df[df["Experiment"] == "No Augmentation"].groupby(["Dataset"])[ |
|
|
334 |
"IoU"].mean() |
|
|
335 |
|
|
|
336 |
improvements = 100 * (table - no_augmentation) / no_augmentation |
|
|
337 |
improvements = improvements.reset_index() |
|
|
338 |
improvements = improvements[improvements["Experiment"] != "No Augmentation"] |
|
|
339 |
improvements.rename(columns={"IoU": "% Change in mean IoU with respect to No Augmentation"}, inplace=True) |
|
|
340 |
|
|
|
341 |
test = table.to_latex(float_format="%.3f") |
|
|
342 |
# improvements = improvements[improvements["Dataset"] == "CVC-ClinicDB"] |
|
|
343 |
print(np.max(improvements[improvements["Experiment"] == "Vanilla Augmentation"])) |
|
|
344 |
print(np.mean(improvements[improvements["Experiment"] == "Vanilla Augmentation"])) |
|
|
345 |
|
|
|
346 |
print(np.max(improvements[improvements["Experiment"] == "Inpainter Augmentation"])) |
|
|
347 |
print(np.mean(improvements[improvements["Experiment"] == "Inpainter Augmentation"])) |
|
|
348 |
sns.boxplot(data=improvements, x="Dataset", y="% Change in mean IoU with respect to No Augmentation", |
|
|
349 |
hue="Experiment") |
|
|
350 |
|
|
|
351 |
plt.savefig("augmentation_plot.eps") |
|
|
352 |
plt.show() |
|
|
353 |
return table |
|
|
354 |
|
|
|
355 |
|
|
|
356 |
def plot_training_procedure_performance(): |
|
|
357 |
df = collate_base_results_into_df() |
|
|
358 |
df = df[df["Experiment"] != "Inpainter Augmentation"] |
|
|
359 |
index = df.index[df["Experiment"] == "No Augmentation"].tolist() + df.index[ |
|
|
360 |
df["Experiment"] == "Vanilla Augmentation"].tolist() + df.index[ |
|
|
361 |
df["Experiment"] == "Consistency Training"].tolist() |
|
|
362 |
df = df.reindex(index) |
|
|
363 |
# print(df) |
|
|
364 |
filt = df.groupby(["Dataset", "ID", "IoU", "Experiment"]).mean() |
|
|
365 |
filt = filt.reset_index() |
|
|
366 |
hue_order = df.groupby(["Experiment"])["IoU"].mean().sort_values().index |
|
|
367 |
order = df.groupby(["Dataset"])["IoU"].mean().sort_values().index |
|
|
368 |
table = df.groupby(["Dataset", "Model", "Experiment"])["IoU"].mean() |
|
|
369 |
|
|
|
370 |
w_p_values = table.reset_index() |
|
|
371 |
for i, row in w_p_values.iterrows(): |
|
|
372 |
experiment = row["Experiment"] |
|
|
373 |
model = row["Model"] |
|
|
374 |
dataset = row["Dataset"] |
|
|
375 |
ious = df[(df["Dataset"] == dataset) & (df["Model"] == model) & (df["Experiment"] == experiment)]["IoU"] |
|
|
376 |
augmentation_ious = \ |
|
|
377 |
df[(df["Dataset"] == dataset) & (df["Model"] == model) & (df["Experiment"] == "Vanilla Augmentation")][ |
|
|
378 |
"IoU"] |
|
|
379 |
|
|
|
380 |
w_p_values.at[i, "p-value"] = round(ttest_ind(ious, augmentation_ious, equal_var=False)[-1], 3) |
|
|
381 |
for dset in np.unique(df["Dataset"]): |
|
|
382 |
overall_ttest = mannwhitneyu(df[(df["Experiment"] == "Consistency Training") & (df["Dataset"] == dset)]["IoU"], |
|
|
383 |
df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset)]["IoU"]) |
|
|
384 |
print(f"{dset}: {overall_ttest[0]}, p={round(overall_ttest[1], 5)} ") |
|
|
385 |
|
|
|
386 |
test = table.to_latex(float_format="%.3f") |
|
|
387 |
no_augmentation_performance = filt[filt["Experiment"] == "No Augmentation"].groupby(["Dataset"])["IoU"].mean() |
|
|
388 |
|
|
|
389 |
# C.StD analysis |
|
|
390 |
cstd = filt.groupby(["Dataset", "Experiment"])["IoU"].std() / filt.groupby(["Dataset", "Experiment"])[ |
|
|
391 |
"IoU"].mean() |
|
|
392 |
cstd = cstd.reset_index() |
|
|
393 |
cstd.rename(columns={"IoU": "Coefficient of Standard Deviation of IoUs"}, inplace=True) |
|
|
394 |
sns.barplot(data=cstd, x="Dataset", y="Coefficient of Standard Deviation of IoUs", hue="Experiment", |
|
|
395 |
hue_order=["No Augmentation", "Vanilla Augmentation", "Consistency Training"]) |
|
|
396 |
plt.savefig("consistency_training_cstd.eps") |
|
|
397 |
plt.show() |
|
|
398 |
augmentation_performance = filt[filt["Experiment"] == "Vanilla Augmentation"].groupby(["Dataset"])["IoU"].mean() |
|
|
399 |
|
|
|
400 |
test = improvement_pct = 100 * (filt.groupby(["Dataset", "Experiment", "ID"])[ |
|
|
401 |
"IoU"].mean() - augmentation_performance) / augmentation_performance |
|
|
402 |
print(test.groupby(["Experiment"]).mean()) |
|
|
403 |
input() |
|
|
404 |
improvement_pct = 100 * (filt.groupby(["Dataset", "Experiment", "ID"])[ |
|
|
405 |
"IoU"].mean() - no_augmentation_performance) / no_augmentation_performance |
|
|
406 |
|
|
|
407 |
improvement_pct = improvement_pct.reset_index() |
|
|
408 |
print(improvement_pct[improvement_pct["Experiment"] == "No Augmentation"]) |
|
|
409 |
improvement_pct = improvement_pct[improvement_pct["Experiment"] != "No Augmentation"] |
|
|
410 |
|
|
|
411 |
# print(np.max(improvement_pct[improvement_pct["Experiment"] == "Consistency Training"])) |
|
|
412 |
print("Consistency") |
|
|
413 |
print(np.mean(improvement_pct[improvement_pct["Experiment"] == "Consistency Training"])) |
|
|
414 |
print("Augmentation") |
|
|
415 |
print(np.mean(improvement_pct[improvement_pct["Experiment"] == "Vanilla Augmentation"])) |
|
|
416 |
|
|
|
417 |
improvement_pct.rename(columns={"IoU": "% Change in mean IoU with respect to No Augmentation"}, inplace=True) |
|
|
418 |
sns.boxplot(data=improvement_pct, x="Dataset", y="% Change in mean IoU with respect to No Augmentation", |
|
|
419 |
hue="Experiment") |
|
|
420 |
|
|
|
421 |
plt.savefig("consistency_training_percent.eps") |
|
|
422 |
plt.show() |
|
|
423 |
# print(w_p_values) |
|
|
424 |
# scatter = sns.barplot(data=filt, x="Dataset", y="IoU", hue="Experiment", hue_order=hue_order, order=order) |
|
|
425 |
# scatter.legend(loc='lower right') |
|
|
426 |
# plt.show() |
|
|
427 |
return table |
|
|
428 |
|
|
|
429 |
|
|
|
430 |
def compare_models(training_method): |
|
|
431 |
df = collate_base_results_into_df() |
|
|
432 |
df = df[df["Experiment"] == training_method] |
|
|
433 |
# p_value_matrix = np.zeros((len(np.unique(df["Model"])), len(np.unique(df["Model"])))) |
|
|
434 |
# models = np.unique(df["Model"]) |
|
|
435 |
# print() |
|
|
436 |
# np.set_printoptions(precision=5, suppress=True) |
|
|
437 |
# fig, ax = plt.subplots(2, 2, sharey=True, sharex=True, figsize=(8, 8)) |
|
|
438 |
# for didx, dataset in enumerate(np.unique(df["Dataset"])): |
|
|
439 |
# for i, model in enumerate(models): |
|
|
440 |
# for j, compare_model in enumerate(models): |
|
|
441 |
# p_value_matrix[i, j] = round(ttest_ind(df[(df["Model"] == model) & (df["Dataset"] == dataset)]["IoU"], |
|
|
442 |
# df[(df["Model"] == compare_model) & (df["Dataset"] == dataset)][ |
|
|
443 |
# "IoU"], |
|
|
444 |
# equal_var=False)[1], 5) |
|
|
445 |
# |
|
|
446 |
# sns.heatmap(p_value_matrix, ax=ax.flatten()[didx], annot=True, xticklabels=models, yticklabels=models, |
|
|
447 |
# cbar=False) |
|
|
448 |
# ax.flatten()[didx].set_title(dataset) |
|
|
449 |
# plt.tight_layout() |
|
|
450 |
# plt.savefig("model_pvals.eps") |
|
|
451 |
# plt.show() |
|
|
452 |
# |
|
|
453 |
# df_van = df.groupby(["Dataset", "Model"])["IoU"].mean() |
|
|
454 |
# df_van = df_van.reset_index() |
|
|
455 |
# order = df_van.groupby(["Dataset"])["IoU"].mean().sort_values().index |
|
|
456 |
# |
|
|
457 |
# plt.hist(df[df["Dataset"] == "Kvasir-SEG"]["IoU"]) |
|
|
458 |
# plt.show() |
|
|
459 |
# sns.barplot(data=df, x="Dataset", y="IoU", hue="Model", order=order) |
|
|
460 |
# plt.show() |
|
|
461 |
|
|
|
462 |
# generalizability_gap |
|
|
463 |
grouped = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean().reset_index() |
|
|
464 |
ood = grouped[grouped["Dataset"] != "Kvasir-SEG"].copy() |
|
|
465 |
print(ood.columns) |
|
|
466 |
iid = grouped[grouped["Dataset"] == "Kvasir-SEG"].copy() |
|
|
467 |
for i, row in ood.iterrows(): |
|
|
468 |
id = ood.at[i, "ID"] |
|
|
469 |
dataset = ood.at[i, "Dataset"] |
|
|
470 |
model = ood.at[i, "Model"] |
|
|
471 |
iou = row["IoU"] |
|
|
472 |
iid_iou = float(iid[(iid["ID"] == id) & (iid["Model"] == model)]["IoU"]) |
|
|
473 |
print(iou) |
|
|
474 |
print(iid_iou) |
|
|
475 |
ood.at[i, "gap"] = 100 * (iou - iid_iou) / iid_iou |
|
|
476 |
sns.barplot(data=ood, x="Dataset", hue="Model", y="gap") |
|
|
477 |
plt.ylim(-100, 0) |
|
|
478 |
plt.ylabel("% Change in IoU wrt IID") |
|
|
479 |
plt.savefig("delta_iou_baseline.eps") |
|
|
480 |
|
|
|
481 |
plt.show() |
|
|
482 |
|
|
|
483 |
cstds = df.groupby(["Dataset", "Model"])["IoU"].std() / df.groupby(["Dataset", "Model"])["IoU"].mean() |
|
|
484 |
cstds = cstds.reset_index() |
|
|
485 |
sns.barplot(data=cstds, x="Dataset", y="IoU", hue="Model") |
|
|
486 |
both = pd.merge(ood, cstds, on=["Model", "Dataset"]) |
|
|
487 |
plt.savefig("cstd_baseline.eps") |
|
|
488 |
|
|
|
489 |
plt.show() |
|
|
490 |
fig, ax = plt.subplots(3, 1, figsize=(6, 6)) |
|
|
491 |
for didx, dataset in enumerate(np.unique(both["Dataset"])): |
|
|
492 |
test = pearsonr(both[both["Dataset"] == dataset]["IoU_y"], both[both["Dataset"] == dataset]["gap"]) |
|
|
493 |
ax.flatten()[didx].set_title(f"{dataset} : Rp={round(test[0], 5)}, p={round(test[1], 5)}") |
|
|
494 |
if didx == 2: |
|
|
495 |
scatter = sns.scatterplot(ax=ax.flatten()[didx], data=both[both["Dataset"] == dataset], x="IoU_y", y="gap", |
|
|
496 |
hue="Model") |
|
|
497 |
scatter.legend(loc="upper center", bbox_to_anchor=(0.5, -0.2), ncol=3) |
|
|
498 |
else: |
|
|
499 |
sns.scatterplot(ax=ax.flatten()[didx], data=both[both["Dataset"] == dataset], x="IoU_y", y="gap", |
|
|
500 |
hue="Model", legend=False) |
|
|
501 |
# plt.tight_layout() |
|
|
502 |
|
|
|
503 |
for axis in ax: |
|
|
504 |
axis.set_ylabel("") |
|
|
505 |
axis.set_xlabel("") |
|
|
506 |
axis.set_yticklabels([]) |
|
|
507 |
axis.set_xticklabels([]) |
|
|
508 |
# axis.set_ylim(axis.get_ylim()[::-1]) |
|
|
509 |
|
|
|
510 |
ax.flatten()[2].set_xlabel("C.Std mIoU") |
|
|
511 |
ax.flatten()[1].set_ylabel("% Change in mIoU wrt IID") |
|
|
512 |
plt.tight_layout() |
|
|
513 |
plt.savefig("underspecification_baseline.eps") |
|
|
514 |
plt.show(ypad=4) |
|
|
515 |
|
|
|
516 |
|
|
|
517 |
def plot_consistencies(): |
|
|
518 |
df = collate_base_results_into_df() |
|
|
519 |
df.groupby(["Experiment", "Dataset", "Model", "ID"]).mean().reset_index().to_csv("test.csv") |
|
|
520 |
grouped = df.groupby(["Experiment", "Dataset", "Model", "ID"])["SIS"].mean().reset_index() |
|
|
521 |
grouped = grouped[grouped["Experiment"] != "Inpainter Augmentation"] |
|
|
522 |
grouped = grouped[grouped["Dataset"] == "Kvasir-SEG"] |
|
|
523 |
# grouped.to_csv("test.csv") |
|
|
524 |
sns.barplot(data=grouped, x="Model", y="SIS", hue="Experiment") |
|
|
525 |
plt.show() |
|
|
526 |
|
|
|
527 |
grouped = df.groupby(["Experiment", "Dataset", "Model", "ID"])["IoU"].mean().reset_index() |
|
|
528 |
grouped = grouped[grouped["Experiment"] != "Inpainter Augmentation"] |
|
|
529 |
grouped = grouped[grouped["Dataset"] == "Kvasir-SEG"] |
|
|
530 |
# grouped.to_csv("test.csv") |
|
|
531 |
sns.barplot(data=grouped, x="Model", y="IoU", hue="Experiment") |
|
|
532 |
plt.tight_layout() |
|
|
533 |
plt.show() |
|
|
534 |
|
|
|
535 |
# aug_consistencies = [] |
|
|
536 |
# aug_oods = [] |
|
|
537 |
# cons_consistencies = [] |
|
|
538 |
# cons_oods |
|
|
539 |
cons_df = pd.DataFrame() |
|
|
540 |
aug_df = pd.DataFrame() |
|
|
541 |
for file in os.listdir("logs/consistency/FPN"): |
|
|
542 |
if "augmentation" in file: |
|
|
543 |
aug_df = aug_df.append(pd.read_csv(os.path.join("logs/consistency/FPN", file)), ignore_index=True) |
|
|
544 |
if "consistency" in file: |
|
|
545 |
cons_df = aug_df.append(pd.read_csv(os.path.join("logs/consistency/FPN", file)), ignore_index=True) |
|
|
546 |
else: |
|
|
547 |
continue |
|
|
548 |
cons_df = cons_df[cons_df["epoch"] < 300] |
|
|
549 |
aug_df = aug_df[aug_df["epoch"] < 300] |
|
|
550 |
sns.lineplot(data=cons_df, x="epoch", y="consistency", color="orange") |
|
|
551 |
sns.lineplot(data=aug_df, x="epoch", y="consistency", color="blue") |
|
|
552 |
sns.lineplot(data=cons_df, x="epoch", y="ood_iou", color="orange") |
|
|
553 |
sns.lineplot(data=aug_df, x="epoch", y="ood_iou", color="blue") |
|
|
554 |
plt.show() |
|
|
555 |
|
|
|
556 |
|
|
|
557 |
def plot_ensemble_variance_relationship(training_method): |
|
|
558 |
df = collate_ensemble_results_into_df(training_method) |
|
|
559 |
df_constituents = collate_base_results_into_df() |
|
|
560 |
df_constituents = df_constituents[df_constituents["Experiment"] != "Inpainter Augmentation"] |
|
|
561 |
df["constituents"] = df["constituents"].apply( |
|
|
562 |
lambda x: [int(i.split("_")[-1]) for i in x] if type(x) == type([]) else int(x)) |
|
|
563 |
if training_method != "all": |
|
|
564 |
if training_method == "vanilla": training_method = "No Augmentation" |
|
|
565 |
if training_method == "augmentation": training_method = "Vanilla Augmentation" |
|
|
566 |
if training_method == "consistency": training_method = "Consistency Training" |
|
|
567 |
df_constituents = df_constituents[df_constituents["Experiment"] == training_method] |
|
|
568 |
|
|
|
569 |
colors = ["tab:blue", "tab:orange", "tab:green", "tab:red"] |
|
|
570 |
# colors = ["b", "g", "r", "c", "m", "y"] |
|
|
571 |
colormap = dict(zip(np.unique(df["Dataset"]), colors)) |
|
|
572 |
|
|
|
573 |
var_dataset = pd.DataFrame() |
|
|
574 |
for i, row in df.iterrows(): |
|
|
575 |
model = df.at[i, "Model"] |
|
|
576 |
id = df.at[i, "ID"] |
|
|
577 |
experiment = df.at[i, "Experiment"] |
|
|
578 |
if model == "diverse": |
|
|
579 |
filtered = df_constituents[ |
|
|
580 |
(df_constituents["ID"] == id) & |
|
|
581 |
(df_constituents["Experiment"] == experiment)] |
|
|
582 |
cstd = (filtered.groupby(["Dataset"]).std() / filtered.groupby(["Dataset"]).mean())["IoU"] |
|
|
583 |
improvements = df[ |
|
|
584 |
(df["Model"] == model) & (df["Experiment"] == experiment) & (df["ID"] == id)] |
|
|
585 |
improvements = 100 * (improvements.groupby(["Dataset"])["IoU"].mean() - filtered.groupby(["Dataset"])[ |
|
|
586 |
"IoU"].mean()) / filtered.groupby(["Dataset"])["IoU"].mean() |
|
|
587 |
cstd = cstd.reset_index() |
|
|
588 |
improvements = improvements.reset_index() |
|
|
589 |
cstd.rename(columns={"IoU": "C.StD"}, inplace=True) |
|
|
590 |
improvements.rename(columns={"IoU": "% Increase in Generalizability wrt Constituents Mean"}, inplace=True) |
|
|
591 |
merged = pd.merge(improvements, cstd) |
|
|
592 |
merged["Model"] = [model] * 4 # dataset length |
|
|
593 |
merged["ID"] = [id] * 4 |
|
|
594 |
merged["Experiment"] = [experiment] * 4 |
|
|
595 |
|
|
|
596 |
var_dataset = var_dataset.append(merged) |
|
|
597 |
else: |
|
|
598 |
|
|
|
599 |
constituents = df.at[i, "constituents"] |
|
|
600 |
filtered = df_constituents[ |
|
|
601 |
(df_constituents["Model"] == model) & (df_constituents["ID"].isin(constituents)) & ( |
|
|
602 |
df_constituents["Experiment"] == experiment)] |
|
|
603 |
cstd = (filtered.groupby(["Dataset"]).std() / filtered.groupby(["Dataset"]).mean())["IoU"] |
|
|
604 |
improvements = df[ |
|
|
605 |
(df["Model"] == model) & (df["Experiment"] == experiment) & (df["ID"] == id)] |
|
|
606 |
improvements = 100 * (improvements.groupby(["Dataset"])["IoU"].mean() - filtered.groupby(["Dataset"])[ |
|
|
607 |
"IoU"].mean()) / filtered.groupby(["Dataset"])["IoU"].mean() |
|
|
608 |
cstd = cstd.reset_index() |
|
|
609 |
|
|
|
610 |
improvements = improvements.reset_index() |
|
|
611 |
cstd.rename(columns={"IoU": "C.StD"}, inplace=True) |
|
|
612 |
improvements.rename(columns={"IoU": "% Increase in Generalizability wrt Constituents Mean"}, inplace=True) |
|
|
613 |
merged = pd.merge(improvements, cstd) |
|
|
614 |
merged["Model"] = [model] * 4 |
|
|
615 |
merged["ID"] = [id] * 4 |
|
|
616 |
merged["Experiment"] = [experiment] * 4 |
|
|
617 |
var_dataset = var_dataset.append(merged) |
|
|
618 |
# improvements = filtered.groupby |
|
|
619 |
# cstd = filtered |
|
|
620 |
# df.at[i, "cstd"] = |
|
|
621 |
# cstds.append(0) |
|
|
622 |
print(len(np.unique(var_dataset[var_dataset["Experiment"] == "Vanilla Augmentation"][ |
|
|
623 |
"% Increase in Generalizability wrt Constituents Mean"]))) |
|
|
624 |
print(len(np.unique(var_dataset[var_dataset["Experiment"] == "No Augmentation"][ |
|
|
625 |
"% Increase in Generalizability wrt Constituents Mean"]))) |
|
|
626 |
print(var_dataset.columns) |
|
|
627 |
datasets = np.unique(var_dataset["Dataset"]) |
|
|
628 |
training_methods = ["No Augmentation", "Vanilla Augmentation", "Consistency Training"] |
|
|
629 |
fig, ax = plt.subplots(len(datasets), len(training_methods), figsize=(11, 12)) |
|
|
630 |
var_dataset = var_dataset.replace("diverse", "MultiModel") |
|
|
631 |
|
|
|
632 |
for i, dataset_name in enumerate(datasets): |
|
|
633 |
for j, training_method in enumerate(training_methods): |
|
|
634 |
dataset_filtered = var_dataset[ |
|
|
635 |
(var_dataset["Dataset"] == dataset_name) & (var_dataset["Experiment"] == training_method)] |
|
|
636 |
# sns.regplot(ax=ax.flatten()[i], data=dataset_filtered, x="C.StD", |
|
|
637 |
# y="% Increase in Generalizability wrt Constituents Mean", |
|
|
638 |
# ci=99, |
|
|
639 |
# color=colormap[dataset_name], label=dataset_name) |
|
|
640 |
# correlation = pearsonr(dataset_filtered["C.StD"], |
|
|
641 |
# dataset_filtered["% Increase in Generalizability wrt Constituents Mean"]) |
|
|
642 |
if j == 0: # seaborn does not like global legends |
|
|
643 |
scatter = sns.scatterplot(ax=ax[i, j], data=dataset_filtered, x="C.StD", |
|
|
644 |
y="% Increase in Generalizability wrt Constituents Mean", |
|
|
645 |
ci=99, legend=False, color=colormap[dataset_name], label=dataset_name) |
|
|
646 |
ax[i, j].set_title(training_method) |
|
|
647 |
|
|
|
648 |
else: |
|
|
649 |
scatter = sns.scatterplot(ax=ax[i, j], data=dataset_filtered, x="C.StD", |
|
|
650 |
y="% Increase in Generalizability wrt Constituents Mean", |
|
|
651 |
ci=99, legend=False, color=colormap[dataset_name]) |
|
|
652 |
correlation = spearmanr(dataset_filtered["C.StD"], |
|
|
653 |
dataset_filtered["% Increase in Generalizability wrt Constituents Mean"]) |
|
|
654 |
ax[i, j].set_title(f"Rs={correlation[0]:.3f}, p={correlation[1]:.6f}") |
|
|
655 |
for a in ax.flatten(): |
|
|
656 |
a.set(xlabel=None) |
|
|
657 |
a.set(ylabel=None) |
|
|
658 |
for axis, col in zip(ax[0], training_methods): |
|
|
659 |
axis.annotate(col, xy=(0.5, 1.5), xytext=(0, 5), |
|
|
660 |
xycoords='axes fraction', textcoords='offset points', |
|
|
661 |
size='xx-large', ha='center', va='baseline') |
|
|
662 |
fig.add_subplot(111, frameon=False) |
|
|
663 |
# fig.legend(loc='lower center', bbox_to_anchor=(0.5, 0.5), ncol=2, labels=np.unique(var_dataset["Dataset"])) |
|
|
664 |
fig.legend(loc='lower center', bbox_to_anchor=(0.5, 0), ncol=4) |
|
|
665 |
plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False) |
|
|
666 |
|
|
|
667 |
plt.ylabel("% Increase in Generalizability wrt Constituents Mean") |
|
|
668 |
plt.xlabel("Coefficient of Standard Deviation") |
|
|
669 |
# plt.title() |
|
|
670 |
fig.tight_layout() |
|
|
671 |
# fig.subplots_adjust(bottom=0.2) |
|
|
672 |
plt.savefig("ensemble_variance_relationship_statistical.eps") |
|
|
673 |
plt.show() |
|
|
674 |
# hue_order = var_dataset.groupby(["Model"])[ |
|
|
675 |
# "% Increase in Generalizability wrt Constituents Mean"].mean().sort_values().index |
|
|
676 |
var_dataset = var_dataset.replace("diverse", "MultiModel") |
|
|
677 |
|
|
|
678 |
fig, ax = plt.subplots(figsize=(12, 6)) |
|
|
679 |
sns.boxplot(data=var_dataset, ax=ax, x="Dataset", y="% Increase in Generalizability wrt Constituents Mean", |
|
|
680 |
hue="Model", |
|
|
681 |
order=["Kvasir-SEG", "CVC-ClinicDB", "EndoCV2020", "Etis-LaribDB"]) |
|
|
682 |
|
|
|
683 |
plt.axhline(0, linestyle="--") |
|
|
684 |
plt.savefig("improvements_due_to_ensembles.eps") |
|
|
685 |
plt.show() |
|
|
686 |
|
|
|
687 |
|
|
|
688 |
def get_ensemble_p_vals(): |
|
|
689 |
singular = collate_base_results_into_df() |
|
|
690 |
# cross-model t-test (not used in thesis) |
|
|
691 |
print("No augmentation") |
|
|
692 |
for mix, model in enumerate(np.unique(singular["Model"])): |
|
|
693 |
print(model, end="&") |
|
|
694 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
695 |
single = singular[singular["Experiment"] == "No Augmentation"] |
|
|
696 |
ensemble = collate_ensemble_results_into_df(type="vanilla") |
|
|
697 |
single = single[(single["Dataset"] == dataset) & (single["Model"] == model)] |
|
|
698 |
ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)] |
|
|
699 |
ttest = ttest_ind( |
|
|
700 |
single["IoU"], ensemble["IoU"], equal_var=False |
|
|
701 |
) |
|
|
702 |
print(round(ttest[1], 5), end=" & ") |
|
|
703 |
print("\\\ ") |
|
|
704 |
print("Augmentation") |
|
|
705 |
for mix, model in enumerate(np.unique(singular["Model"])): |
|
|
706 |
print(model, end="&") |
|
|
707 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
708 |
single = singular[singular["Experiment"] == "Vanilla Augmentation"] |
|
|
709 |
ensemble = collate_ensemble_results_into_df(type="augmentation") |
|
|
710 |
single = single[(single["Dataset"] == dataset) & (single["Model"] == model)] |
|
|
711 |
ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)] |
|
|
712 |
ttest = ttest_ind( |
|
|
713 |
single["IoU"], ensemble["IoU"], equal_var=False |
|
|
714 |
) |
|
|
715 |
print(round(ttest[1], 5), end=" & ") |
|
|
716 |
print("\\\ ") |
|
|
717 |
print("Consistency Training") |
|
|
718 |
for mix, model in enumerate(np.unique(singular["Model"])): |
|
|
719 |
print(model, end="&") |
|
|
720 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
721 |
single = singular[singular["Experiment"] == "Consistency Training"] |
|
|
722 |
ensemble = collate_ensemble_results_into_df(type="consistency") |
|
|
723 |
single = single[(single["Dataset"] == dataset) & (single["Model"] == model)] |
|
|
724 |
ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)] |
|
|
725 |
ttest = ttest_ind( |
|
|
726 |
single["IoU"], ensemble["IoU"], equal_var=False |
|
|
727 |
) |
|
|
728 |
print(round(ttest[1], 5), end=" & ") |
|
|
729 |
print("\\\ ") |
|
|
730 |
|
|
|
731 |
# model-averaged |
|
|
732 |
print("When averaged across models:") |
|
|
733 |
print("No augmentation") |
|
|
734 |
experiments_long = ["No Augmentation", "Conventional Augmentation", "Consistency Training"] |
|
|
735 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
736 |
single = singular[singular["Experiment"] == "No Augmentation"] |
|
|
737 |
ensemble = collate_ensemble_results_into_df(type="vanilla") |
|
|
738 |
single = single[(single["Dataset"] == dataset)] |
|
|
739 |
ensemble = ensemble[(ensemble["Dataset"] == dataset)] |
|
|
740 |
ttest = mannwhitneyu( |
|
|
741 |
single["IoU"], ensemble["IoU"] |
|
|
742 |
) |
|
|
743 |
print(round(ttest[1], 3), end=" & ") |
|
|
744 |
print("\nAugmentation") |
|
|
745 |
|
|
|
746 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
747 |
single = singular[singular["Experiment"] == "Vanilla Augmentation"] |
|
|
748 |
ensemble = collate_ensemble_results_into_df(type="augmentation") |
|
|
749 |
single = single[(single["Dataset"] == dataset)] |
|
|
750 |
ensemble = ensemble[(ensemble["Dataset"] == dataset)] |
|
|
751 |
ttest = mannwhitneyu( |
|
|
752 |
single["IoU"], ensemble["IoU"] |
|
|
753 |
) |
|
|
754 |
print(round(ttest[1], 3), end=" & ") |
|
|
755 |
print("\nConsistency Training") |
|
|
756 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
757 |
single = singular[singular["Experiment"] == "Consistency Training"] |
|
|
758 |
ensemble = collate_ensemble_results_into_df(type="consistency") |
|
|
759 |
single = single[(single["Dataset"] == dataset)] |
|
|
760 |
ensemble = ensemble[(ensemble["Dataset"] == dataset)] |
|
|
761 |
ttest = mannwhitneyu( |
|
|
762 |
single["IoU"], ensemble["IoU"] |
|
|
763 |
) |
|
|
764 |
print(round(ttest[1], 3), end=" & ") |
|
|
765 |
|
|
|
766 |
experiments = ["vanilla", "augmentation", "consistency"] |
|
|
767 |
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(8, 8)) |
|
|
768 |
for dix, dataset in enumerate(np.unique(singular["Dataset"])): |
|
|
769 |
p_values = np.zeros((len(experiments), len(experiments))) |
|
|
770 |
for i, exp1 in enumerate(experiments): |
|
|
771 |
for j, exp2 in enumerate(experiments): |
|
|
772 |
df1 = collate_ensemble_results_into_df(exp1) |
|
|
773 |
df2 = collate_ensemble_results_into_df(exp2) |
|
|
774 |
test = mannwhitneyu(df1[df1["Dataset"] == dataset]["IoU"], |
|
|
775 |
df2[(df2["Dataset"] == dataset)]["IoU"]) |
|
|
776 |
p_values[i, j] = round(test[1], 5) |
|
|
777 |
sns.heatmap(p_values, ax=axes.flatten()[dix], annot=True, xticklabels=experiments_long, |
|
|
778 |
yticklabels=experiments_long, |
|
|
779 |
cbar=False) |
|
|
780 |
ax = axes.flatten()[dix].set_title(dataset) |
|
|
781 |
plt.tight_layout() |
|
|
782 |
plt.savefig("ensemble_relative_pvals.eps") |
|
|
783 |
plt.show() |
|
|
784 |
|
|
|
785 |
|
|
|
786 |
def compare_ensembles(): |
|
|
787 |
singular = collate_base_results_into_df() |
|
|
788 |
singular_no_augment = singular[singular["Experiment"] == "No Augmentation"].groupby(["Dataset", "ID"])[ |
|
|
789 |
"IoU"].mean() |
|
|
790 |
singular_augment = singular[singular["Experiment"] == "Vanilla Augmentation"].groupby(["Dataset", "ID"])[ |
|
|
791 |
"IoU"].mean() |
|
|
792 |
singular_ct = singular[singular["Experiment"] == "Consistency Training"].groupby(["Dataset", "ID"])[ |
|
|
793 |
"IoU"].mean() |
|
|
794 |
|
|
|
795 |
no_augment = collate_ensemble_results_into_df(type="vanilla").groupby(["Dataset", "ID"])[ |
|
|
796 |
"IoU"].mean() |
|
|
797 |
augment = collate_ensemble_results_into_df(type="augmentation").groupby(["Dataset", "ID"])[ |
|
|
798 |
"IoU"].mean() |
|
|
799 |
consistency = collate_ensemble_results_into_df(type="consistency").groupby(["Dataset", "ID"])[ |
|
|
800 |
"IoU"].mean() |
|
|
801 |
|
|
|
802 |
no_augment_improvements = (100 * (no_augment - singular_no_augment) / singular_no_augment).reset_index() |
|
|
803 |
augment_improvements = (100 * (augment - singular_augment) / singular_augment).reset_index() |
|
|
804 |
ct_improvements = (100 * (consistency - singular_ct) / singular_ct).reset_index() |
|
|
805 |
|
|
|
806 |
no_augment_improvements["Experiment"] = pd.Series(["No Augmentation"] * len(no_augment_improvements), |
|
|
807 |
index=no_augment_improvements.index) |
|
|
808 |
augment_improvements["Experiment"] = pd.Series(["Conventional Augmentation"] * len(augment_improvements), |
|
|
809 |
index=augment_improvements.index) |
|
|
810 |
ct_improvements["Experiment"] = pd.Series(["Consistency Training"] * len(ct_improvements), |
|
|
811 |
index=ct_improvements.index) |
|
|
812 |
# print("No augmentation") |
|
|
813 |
# print(no_augment_improvements) |
|
|
814 |
# print("Augmentation") |
|
|
815 |
# print(augment_improvements) |
|
|
816 |
# print("Consistency Training") |
|
|
817 |
# print(ct_improvements) |
|
|
818 |
# print(augment_improvements) |
|
|
819 |
overall_improvements = pd.concat([no_augment_improvements, augment_improvements, ct_improvements], |
|
|
820 |
ignore_index=True) |
|
|
821 |
|
|
|
822 |
experiments = np.unique(overall_improvements["Experiment"]) |
|
|
823 |
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(8, 8)) |
|
|
824 |
for dix, dataset in enumerate(np.unique(overall_improvements["Dataset"])): |
|
|
825 |
p_values = np.zeros((len(experiments), len(experiments))) |
|
|
826 |
for i, exp1 in enumerate(experiments): |
|
|
827 |
for j, exp2 in enumerate(experiments): |
|
|
828 |
test = ttest_ind(overall_improvements[(overall_improvements["Dataset"] == dataset) & ( |
|
|
829 |
overall_improvements["Experiment"] == exp1)]["IoU"], |
|
|
830 |
overall_improvements[(overall_improvements["Dataset"] == dataset) & ( |
|
|
831 |
overall_improvements["Experiment"] == exp2)]["IoU"], equal_var=True) |
|
|
832 |
p_values[i, j] = test[1] |
|
|
833 |
sns.heatmap(p_values, ax=axes.flatten()[dix], annot=True, xticklabels=experiments, yticklabels=experiments, |
|
|
834 |
cbar=False) |
|
|
835 |
ax = axes.flatten()[dix].set_title(dataset) |
|
|
836 |
plt.tight_layout() |
|
|
837 |
plt.savefig("ensemble_improvement_pvals.eps") |
|
|
838 |
plt.show() |
|
|
839 |
|
|
|
840 |
box = sns.boxplot(data=overall_improvements, x="Experiment", y="IoU", hue="Dataset", |
|
|
841 |
hue_order=["Kvasir-SEG", "EndoCV2020", "CVC-ClinicDB", "Etis-LaribDB"]) |
|
|
842 |
box.legend(loc="upper left") |
|
|
843 |
box.set(ylabel="Improvement in IoU (%)") |
|
|
844 |
box.set(xlabel="Training Method") |
|
|
845 |
box.axhline(0, linestyle="--") |
|
|
846 |
plt.savefig("ensemble_improvements.eps") |
|
|
847 |
print("..,.") |
|
|
848 |
print(overall_improvements.groupby(["Experiment"])["IoU"].mean()) |
|
|
849 |
print(overall_improvements.groupby(["Experiment"])["IoU"].max()) |
|
|
850 |
plt.show() |
|
|
851 |
|
|
|
852 |
grouped = singular[singular["Experiment"] != "Inpainter Augmentation"].groupby(["Model", "Dataset", "Experiment"])[ |
|
|
853 |
"IoU"] |
|
|
854 |
constituent_cstd = grouped.std() / grouped.mean() |
|
|
855 |
print(constituent_cstd) |
|
|
856 |
|
|
|
857 |
|
|
|
858 |
def test(): |
|
|
859 |
ensemble = collate_ensemble_results_into_df("all") |
|
|
860 |
ensemble = ensemble.replace("augmentation", "Vanilla Augmentation") |
|
|
861 |
ensemble = ensemble.replace("vanilla", "No Augmentation") |
|
|
862 |
ensemble = ensemble.replace("consistency", "Consistency Training") |
|
|
863 |
|
|
|
864 |
ensemble = ensemble[ensemble["Model"] != "diverse"] |
|
|
865 |
ensemble_means = ensemble.groupby(["Experiment", "Dataset", "Model", "ID"])["IoU"].mean() |
|
|
866 |
singular = collate_base_results_into_df() |
|
|
867 |
singular = singular[singular["Experiment"] != "Inpainter Augmentation"] |
|
|
868 |
singular_grouped = singular.groupby(["Experiment", "Dataset", "Model"])["IoU"] |
|
|
869 |
# input() |
|
|
870 |
|
|
|
871 |
ensemble_improvements = 100 * (ensemble_means - singular_grouped.mean()) / singular_grouped.mean() |
|
|
872 |
singular_cstds = singular_grouped.std() / singular_grouped.mean() |
|
|
873 |
merged = pd.merge(ensemble_improvements, singular_cstds, how='inner', on=["Experiment", "Dataset", "Model"]) |
|
|
874 |
# merged = merged.groupby(["Experiment", "Model"]).mean() |
|
|
875 |
fig = sns.scatterplot(data=merged, x="IoU_y", y="IoU_x", hue="Experiment") |
|
|
876 |
test = spearmanr(merged["IoU_y"], merged["IoU_x"]) |
|
|
877 |
plt.title(f"R_s = {round(test[0], 5)}, p={round(test[1], 5)}") |
|
|
878 |
fig.set_ylabel("Change in IoU (%)") |
|
|
879 |
fig.set_xlabel("IoU C.StD.") |
|
|
880 |
# print(spearmanr(merged["IoU_y"], merged["IoU_x"])) |
|
|
881 |
|
|
|
882 |
plt.savefig("ensembles_underspecification.eps") |
|
|
883 |
plt.show() |
|
|
884 |
|
|
|
885 |
|
|
|
886 |
if __name__ == '__main__': |
|
|
887 |
training_plot("logs/consistency/DeepLab/consistency_1.csv") |
|
|
888 |
# plot_inpainter_vs_conventional_performance() |
|
|
889 |
# plot_training_procedure_performance() |
|
|
890 |
# plot_ensemble_performance() |
|
|
891 |
# compare_models("No Augmentation") |
|
|
892 |
# compare_models("Vanilla Augmentation") |
|
|
893 |
# compare_models("Consistency Training") |
|
|
894 |
|
|
|
895 |
# plot_ensemble_variance_relationship("all") |
|
|
896 |
# plot_cons_vs_aug_ensembles() |
|
|
897 |
# compare_ensembles() |
|
|
898 |
# get_ensemble_p_vals() |
|
|
899 |
# test() |