Diff of /experiments/plotting.py [000000] .. [92cc18]

Switch to unified view

a b/experiments/plotting.py
1
import os
2
3
import numpy as np
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
import seaborn as sns
7
import torch
8
import pickle
9
from utils.formatting import SafeDict
10
from scipy.stats import wasserstein_distance
11
from scipy.stats import ttest_ind, pearsonr, mannwhitneyu, spearmanr
12
from models.segmentation_models import *
13
14
15
def training_plot(log_csv):
16
    log_df = pd.read_csv(log_csv)
17
    plt.title("Training Plot Sample")
18
    plt.xlabel("Epochs")
19
    plt.ylabel("Jaccard Loss")
20
    plt.xlim((0, 300))
21
    plt.ylim((0, 1))
22
    plt.plot(log_df["epoch"], log_df["train_loss"], label="Training Loss")
23
    plt.plot(log_df["epoch"], log_df["val_loss"], label="Validation Loss")
24
    # plt.plot(log_df["epoch"], log_df["ood_iou"], label="Etis-LaribDB iou")
25
    plt.legend()
26
    plt.show()
27
28
29
def ood_correlations(log_csv):
30
    log_df = pd.read_csv(log_csv)
31
    plt.title("SIS-OOD correlation")
32
    plt.xlabel("SIS")
33
    plt.ylabel("Etis-LaribDB OOD performance")
34
    plt.xlim((0, 1))
35
    plt.ylim((0, 1))
36
    plt.scatter(log_df["consistency"], log_df["ood_iou"], label="Consistency")
37
    plt.scatter(log_df["iid_test_iou"], log_df["ood_iou"], label="IID IoU")
38
39
    plt.legend()
40
    plt.show()
41
42
43
def ood_v_epoch(log_csv):
44
    log_df = pd.read_csv(log_csv)
45
    plt.title("Training Plot Sample")
46
    plt.xlabel("Epochs")
47
    plt.ylabel("SIL")
48
    plt.xlim((0, 500))
49
    plt.ylim((0, 1))
50
    plt.plot(log_df["epoch"], log_df["consistency"], label="consistency")
51
    plt.plot(log_df["epoch"], log_df["ood_iou"], label="ood iou")
52
    plt.legend()
53
    plt.show()
54
55
56
def get_boxplots_for_models():
57
    """
58
    box plot for comparing model performance. Considers d% reduced along datasets, split according to experiments
59
    and models
60
    :return:
61
    """
62
    dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
63
    model_names = ["DeepLab", "FPN, Unet, InductiveNet, TriUnet"]
64
    dataset = []
65
    for fname in sorted(os.listdir("experiments/Data/pickles")):
66
        if "0" in fname:
67
            with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
68
                model = fname.split("_")[0]
69
                if model == "InductiveNet":
70
                    model = "DD-DeepLabV3+"
71
                data = pickle.load(file)
72
                datasets, samples = data["ious"].shape
73
                kvasir_ious = data["ious"][0]
74
                mean_iid_iou = np.median(kvasir_ious)
75
                print(mean_iid_iou)
76
                if "maximum_consistency" in fname:
77
                    continue
78
                for i in range(datasets):
79
                    if i == 0:
80
                        continue
81
                    for j in range(samples):
82
                        if data["ious"][i, j] < 0.25 or data["ious"][0][j] < 0.75:
83
                            print(f"{fname} with id {j} has iou {data['ious'][i, j]} and {data['ious'][0][j]} ")
84
                            continue
85
                        # dataset.append([dataset_names[i], model, data["ious"][i, j]])
86
87
                        dataset.append(
88
                            [dataset_names[i], model, 100 * (data["ious"][i, j] - mean_iid_iou) / mean_iid_iou])
89
90
    dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "\u0394%IoU"])
91
    print(dataset)
92
    plt.ylim(0, -100)
93
    sns.barplot(x="Dataset", y="\u0394%IoU", hue="Model", data=dataset)
94
    plt.show()
95
96
97
def get_variances_for_models():
98
    dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
99
    model_names = ["DeepLab", "FPN, Unet, InductiveNet, TriUnet"]
100
    dataset = []
101
    for fname in sorted(os.listdir("experiments/Data/pickles")):
102
        if "maximum_consistency" in fname:
103
            continue
104
        if "0" in fname:
105
            with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
106
                model = fname.split("_")[0]
107
                if model == "InductiveNet":
108
                    model = "DD-DeepLabV3+"
109
                data = pickle.load(file)
110
                datasets, samples = data["ious"].shape
111
112
                if "maximum_consistency" in fname:
113
                    continue
114
                for i in range(datasets):
115
                    # if i == 0:
116
                    #     continue
117
118
                    for j in range(samples):
119
                        if data["ious"][0][j] < 0.75:
120
                            print(fname, "-", j)
121
                            continue
122
                        if i == 3 and model == "InductiveNet":
123
                            print("inductivenet", data["ious"][i, j])
124
                        if i == 3 and model == "DeepLab":
125
                            print("DeepLab", data["ious"][i, j])
126
127
                        dataset.append([dataset_names[i], model, data["ious"][i, j]])
128
129
    iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "Coefficient of Std.Dev"])
130
    std_dataset = iou_dataset.groupby(["Model", "Dataset"]).std() / iou_dataset.groupby(["Model", "Dataset"]).mean()
131
    std_dataset = std_dataset.reset_index()
132
    print(std_dataset)
133
    plt.ylim((0, 0.15))
134
    sns.barplot(x="Dataset", y="Coefficient of Std.Dev", hue="Model", data=std_dataset)
135
    plt.show()
136
137
138
def plot_parameters_sizes():
139
    models = [DeepLab, FPN, InductiveNet, Unet, TriUnet]
140
    model_names = ["DeepLab", "FPN", "InductiveNet", "Unet", "TriUnet"]
141
    for model_name, model_c in zip(model_names, models):
142
        model = model_c()
143
        print(f"{model_name}: {sum(p.numel() for p in model.parameters(recurse=True))}")
144
145
146
def collate_ensemble_results_into_df(type="consistency"):
147
    dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
148
    model_names = ["DeepLab", "FPN", "Unet", "InductiveNet", "TriUnet"]
149
    dataset = []
150
    for fname in sorted(os.listdir("experiments/Data/pickles")):
151
        if "ensemble" not in fname:
152
            continue
153
        if "maximum_consistency" in fname or "last_epoch" in fname:
154
            continue
155
        if type != "all":
156
            if type == "consistency" and ("augmentation" in fname or "vanilla" in fname):
157
                continue
158
            if type == "augmentation" and "augmentation" not in fname:
159
                continue
160
            if type == "vanilla" and "vanilla" not in fname:
161
                continue
162
163
        with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
164
            model = fname.split("-")[0]
165
            # experiment = fname.split("-")[-1]
166
167
            if "vanilla" in fname:
168
                experiment = "No Augmentation"
169
            elif "augmentation" in fname:
170
                experiment = "Vanilla Augmentation"
171
            else:
172
                experiment = "Consistency Training"
173
            data = pickle.load(file)
174
175
            # print(file, data.keys())
176
            datasets, samples = data["ious"].shape
177
            if model == "InductiveNet":
178
                model = "DD-DeepLabV3+"
179
            for i in range(datasets):
180
                for j in range(samples):
181
                    if data["ious"][0, j] < 0.75:  # if bugged out; rare
182
                        continue
183
                    try:
184
                        dataset.append(
185
                            [dataset_names[i], model, j, experiment, data["ious"][i, j], data["constituents"][j]])
186
                    except KeyError:
187
                        continue
188
189
    iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "ID", "Experiment", "IoU", "constituents"])
190
    # print(iou_dataset)
191
    iou_dataset.to_csv("ensemble_data.csv")
192
    return iou_dataset
193
194
195
def collate_base_results_into_df():
196
    dataset_names = ["Kvasir-SEG", "Etis-LaribDB", "CVC-ClinicDB", "EndoCV2020"]
197
    model_names = ["DeepLab", "FPN", "Unet", "InductiveNet", "TriUnet"]
198
    dataset = []
199
    for fname in sorted(os.listdir("experiments/Data/pickles")):
200
        if "ensemble" in fname:
201
            # print(fname)
202
            continue
203
        if "maximum_consistency" in fname or "last_epoch" in fname:
204
            # print(fname)
205
            continue
206
207
        with open(os.path.join("experiments/Data/pickles", fname), "rb") as file:
208
            model = fname.split("_")[0]
209
            data = pickle.load(file)
210
            datasets, samples = data["ious"].shape
211
            if model == "InductiveNet":
212
                model = "DD-DeepLabV3+"
213
            experiment = "No Augmentation"
214
            if "sil" in fname and "_G" not in fname:
215
                experiment = "Consistency Training"
216
            elif "_V" in fname:
217
                experiment = "Vanilla Augmentation"
218
            elif "_G" in fname:
219
                experiment = "Inpainter Augmentation"
220
221
            for i in range(datasets):
222
                for j in range(samples):
223
                    if data["ious"][0, j] < 0.75:
224
                        continue
225
                    dataset.append([dataset_names[i], model, j, experiment, data["ious"][i, j], data["sis"][i, j]])
226
227
    iou_dataset = pd.DataFrame(data=dataset, columns=["Dataset", "Model", "ID", "Experiment", "IoU", "SIS"])
228
    iou_dataset.to_csv("base_data.csv")
229
    return iou_dataset
230
231
232
def plot_ensemble_performance():
233
    df = collate_ensemble_results_into_df("augmentation")
234
    print(df)
235
    latex = df.groupby(["Model", "Dataset"])["IoU"].mean()
236
    print(latex.reset_index())
237
    print(latex.to_latex(float_format="%.3f"))
238
    order = df.groupby(["Dataset", "Model"])["IoU"].mean().sort_values().index
239
    sns.barplot(data=df, x="Dataset", y="IoU", hue="Model")
240
    plt.show()
241
    grouped_mean = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean()
242
    # print(grouped_mean)
243
    grouped_iid = np.abs(grouped_mean - grouped_mean["Kvasir-SEG"]) / grouped_mean["Kvasir-SEG"]
244
    # print(grouped_iid)
245
246
    nedf = collate_base_results_into_df()
247
    ne_grouped_mean = nedf.groupby(["Dataset", "Model"])["IoU"].mean()
248
    # print(ne_grouped_mean)
249
    ne_grouped_iid = np.abs(ne_grouped_mean["Kvasir-SEG"] - ne_grouped_mean) / ne_grouped_mean["Kvasir-SEG"]
250
    # print(ne_grouped_iid)
251
252
    comparison = ne_grouped_iid - grouped_iid
253
    comparison = comparison.reset_index()
254
255
    sns.barplot(data=comparison, x="Dataset", y="IoU", hue="Model")
256
    plt.show()
257
258
    # plot delta vs variance
259
    ne_grouped_coeff_std = nedf.groupby(["Dataset", "Model"])["IoU"].std() / ne_grouped_mean
260
    ne_grouped_coeff_std = ne_grouped_coeff_std.reset_index()
261
    ne_grouped_coeff_std = ne_grouped_coeff_std.rename(columns={"IoU": "Coeff. StD of IoUs"})
262
    # print(ne_grouped_coeff_std.head(10))
263
    sns.barplot(data=ne_grouped_coeff_std, x="Dataset", y="Coeff. StD of IoUs", hue="Model")
264
    plt.show()
265
    test = pd.merge(ne_grouped_coeff_std, comparison)
266
    test = test.rename(columns={"IoU": "% Improvement over mean constituent IoU"})
267
    test["% Improvement over mean constituent IoU"] *= 100
268
    test = test.groupby(["Model", "ID"]).mean()
269
    test = test.reset_index()
270
271
    print("mean", np.mean(test))
272
    print("max", np.max(test))
273
    # print(test)
274
275
    sns.lineplot(data=test, x="Coeff. StD of IoUs", y="% Improvement over mean constituent IoU", err_style="bars",
276
                 color="gray", linestyle='--')
277
    test = test.groupby("Model").mean().reset_index()
278
    sns.scatterplot(test["Coeff. StD of IoUs"], test["% Improvement over mean constituent IoU"], hue=test["Model"],
279
                    s=100, ci=99)
280
    plt.show()
281
282
283
def plot_overall_ensemble_performance():
284
    df = collate_ensemble_results_into_df("both")
285
    grouped_mean = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean()
286
287
    nedf = collate_base_results_into_df()
288
    ne_grouped_mean = nedf.groupby(["Dataset", "Model"])["IoU"].mean()
289
290
    # plot delta vs variance
291
    ne_grouped_coeff_std = nedf.groupby(["Dataset", "Model"])["IoU"].std() / ne_grouped_mean
292
    ne_grouped_coeff_std = ne_grouped_coeff_std.reset_index()
293
    ne_grouped_coeff_std = ne_grouped_coeff_std.rename(columns={"IoU": "Coeff. StD of IoUs"})
294
295
296
def plot_cons_vs_aug_ensembles():
297
    df = collate_ensemble_results_into_df("consistency")
298
    df2 = collate_ensemble_results_into_df("augmentation")
299
    grouped = df2.groupby(["Model", "Dataset"])["IoU"].mean()
300
    grouped2 = df2.groupby(["Dataset"])["IoU"].mean()
301
    grouped3 = df.groupby(["Dataset"])["IoU"].mean()
302
303
    print(grouped2)
304
    print(grouped3)
305
    latex = grouped.to_latex(float_format="%.3f")
306
    for dset in np.unique(df2["Dataset"])[::-1]:
307
        utest = mannwhitneyu(df[df["Dataset"] == dset]["IoU"], df2[df2["Dataset"] == dset]["IoU"])
308
        print(f"{dset} & {round(utest[0], 5)} & {round(utest[1], 5)} \\\ ")
309
310
311
def plot_inpainter_vs_conventional_performance():
312
    df = collate_base_results_into_df()
313
    df = df[df["Experiment"] != "Consistency Training"]
314
    models = np.unique(df["Model"])
315
    for dset in np.unique(df["Dataset"])[::-1]:
316
        overall_utest = mannwhitneyu(df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset)]["IoU"],
317
                                     df[(df["Experiment"] == "Inpainter Augmentation") & (df["Dataset"] == dset)][
318
                                         "IoU"])
319
        print(f"{dset} & {overall_utest[0]}, p={round(overall_utest[1], 5)} \\\ ")
320
321
    for model in models:
322
        print(f"{model}", end="")
323
        for dset in np.unique(df["Dataset"]):
324
            ttest = ttest_ind(
325
                df[(df["Experiment"] == "Inpainter Augmentation") & (df["Dataset"] == dset) & (df["Model"] == model)][
326
                    "IoU"],
327
                df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset) & (df["Model"] == model)][
328
                    "IoU"],
329
                equal_var=False)
330
            print(f" & {round(ttest[1], 5)}", end="")
331
        print("\\\ ")
332
    table = df.groupby(["Dataset", "Model", "Experiment"])["IoU"].mean()
333
    no_augmentation = df[df["Experiment"] == "No Augmentation"].groupby(["Dataset"])[
334
        "IoU"].mean()
335
336
    improvements = 100 * (table - no_augmentation) / no_augmentation
337
    improvements = improvements.reset_index()
338
    improvements = improvements[improvements["Experiment"] != "No Augmentation"]
339
    improvements.rename(columns={"IoU": "% Change in mean IoU with respect to No Augmentation"}, inplace=True)
340
341
    test = table.to_latex(float_format="%.3f")
342
    # improvements = improvements[improvements["Dataset"] == "CVC-ClinicDB"]
343
    print(np.max(improvements[improvements["Experiment"] == "Vanilla Augmentation"]))
344
    print(np.mean(improvements[improvements["Experiment"] == "Vanilla Augmentation"]))
345
346
    print(np.max(improvements[improvements["Experiment"] == "Inpainter Augmentation"]))
347
    print(np.mean(improvements[improvements["Experiment"] == "Inpainter Augmentation"]))
348
    sns.boxplot(data=improvements, x="Dataset", y="% Change in mean IoU with respect to No Augmentation",
349
                hue="Experiment")
350
351
    plt.savefig("augmentation_plot.eps")
352
    plt.show()
353
    return table
354
355
356
def plot_training_procedure_performance():
357
    df = collate_base_results_into_df()
358
    df = df[df["Experiment"] != "Inpainter Augmentation"]
359
    index = df.index[df["Experiment"] == "No Augmentation"].tolist() + df.index[
360
        df["Experiment"] == "Vanilla Augmentation"].tolist() + df.index[
361
                df["Experiment"] == "Consistency Training"].tolist()
362
    df = df.reindex(index)
363
    # print(df)
364
    filt = df.groupby(["Dataset", "ID", "IoU", "Experiment"]).mean()
365
    filt = filt.reset_index()
366
    hue_order = df.groupby(["Experiment"])["IoU"].mean().sort_values().index
367
    order = df.groupby(["Dataset"])["IoU"].mean().sort_values().index
368
    table = df.groupby(["Dataset", "Model", "Experiment"])["IoU"].mean()
369
370
    w_p_values = table.reset_index()
371
    for i, row in w_p_values.iterrows():
372
        experiment = row["Experiment"]
373
        model = row["Model"]
374
        dataset = row["Dataset"]
375
        ious = df[(df["Dataset"] == dataset) & (df["Model"] == model) & (df["Experiment"] == experiment)]["IoU"]
376
        augmentation_ious = \
377
            df[(df["Dataset"] == dataset) & (df["Model"] == model) & (df["Experiment"] == "Vanilla Augmentation")][
378
                "IoU"]
379
380
        w_p_values.at[i, "p-value"] = round(ttest_ind(ious, augmentation_ious, equal_var=False)[-1], 3)
381
    for dset in np.unique(df["Dataset"]):
382
        overall_ttest = mannwhitneyu(df[(df["Experiment"] == "Consistency Training") & (df["Dataset"] == dset)]["IoU"],
383
                                     df[(df["Experiment"] == "Vanilla Augmentation") & (df["Dataset"] == dset)]["IoU"])
384
        print(f"{dset}: {overall_ttest[0]}, p={round(overall_ttest[1], 5)} ")
385
386
    test = table.to_latex(float_format="%.3f")
387
    no_augmentation_performance = filt[filt["Experiment"] == "No Augmentation"].groupby(["Dataset"])["IoU"].mean()
388
389
    # C.StD analysis
390
    cstd = filt.groupby(["Dataset", "Experiment"])["IoU"].std() / filt.groupby(["Dataset", "Experiment"])[
391
        "IoU"].mean()
392
    cstd = cstd.reset_index()
393
    cstd.rename(columns={"IoU": "Coefficient of Standard Deviation of IoUs"}, inplace=True)
394
    sns.barplot(data=cstd, x="Dataset", y="Coefficient of Standard Deviation of IoUs", hue="Experiment",
395
                hue_order=["No Augmentation", "Vanilla Augmentation", "Consistency Training"])
396
    plt.savefig("consistency_training_cstd.eps")
397
    plt.show()
398
    augmentation_performance = filt[filt["Experiment"] == "Vanilla Augmentation"].groupby(["Dataset"])["IoU"].mean()
399
400
    test = improvement_pct = 100 * (filt.groupby(["Dataset", "Experiment", "ID"])[
401
                                        "IoU"].mean() - augmentation_performance) / augmentation_performance
402
    print(test.groupby(["Experiment"]).mean())
403
    input()
404
    improvement_pct = 100 * (filt.groupby(["Dataset", "Experiment", "ID"])[
405
                                 "IoU"].mean() - no_augmentation_performance) / no_augmentation_performance
406
407
    improvement_pct = improvement_pct.reset_index()
408
    print(improvement_pct[improvement_pct["Experiment"] == "No Augmentation"])
409
    improvement_pct = improvement_pct[improvement_pct["Experiment"] != "No Augmentation"]
410
411
    # print(np.max(improvement_pct[improvement_pct["Experiment"] == "Consistency Training"]))
412
    print("Consistency")
413
    print(np.mean(improvement_pct[improvement_pct["Experiment"] == "Consistency Training"]))
414
    print("Augmentation")
415
    print(np.mean(improvement_pct[improvement_pct["Experiment"] == "Vanilla Augmentation"]))
416
417
    improvement_pct.rename(columns={"IoU": "% Change in mean IoU with respect to No Augmentation"}, inplace=True)
418
    sns.boxplot(data=improvement_pct, x="Dataset", y="% Change in mean IoU with respect to No Augmentation",
419
                hue="Experiment")
420
421
    plt.savefig("consistency_training_percent.eps")
422
    plt.show()
423
    # print(w_p_values)
424
    # scatter = sns.barplot(data=filt, x="Dataset", y="IoU", hue="Experiment", hue_order=hue_order, order=order)
425
    # scatter.legend(loc='lower right')
426
    # plt.show()
427
    return table
428
429
430
def compare_models(training_method):
431
    df = collate_base_results_into_df()
432
    df = df[df["Experiment"] == training_method]
433
    # p_value_matrix = np.zeros((len(np.unique(df["Model"])), len(np.unique(df["Model"]))))
434
    # models = np.unique(df["Model"])
435
    # print()
436
    # np.set_printoptions(precision=5, suppress=True)
437
    # fig, ax = plt.subplots(2, 2, sharey=True, sharex=True, figsize=(8, 8))
438
    # for didx, dataset in enumerate(np.unique(df["Dataset"])):
439
    #     for i, model in enumerate(models):
440
    #         for j, compare_model in enumerate(models):
441
    #             p_value_matrix[i, j] = round(ttest_ind(df[(df["Model"] == model) & (df["Dataset"] == dataset)]["IoU"],
442
    #                                                    df[(df["Model"] == compare_model) & (df["Dataset"] == dataset)][
443
    #                                                        "IoU"],
444
    #                                                    equal_var=False)[1], 5)
445
    #
446
    #     sns.heatmap(p_value_matrix, ax=ax.flatten()[didx], annot=True, xticklabels=models, yticklabels=models,
447
    #                 cbar=False)
448
    #     ax.flatten()[didx].set_title(dataset)
449
    # plt.tight_layout()
450
    # plt.savefig("model_pvals.eps")
451
    # plt.show()
452
    #
453
    # df_van = df.groupby(["Dataset", "Model"])["IoU"].mean()
454
    # df_van = df_van.reset_index()
455
    # order = df_van.groupby(["Dataset"])["IoU"].mean().sort_values().index
456
    #
457
    # plt.hist(df[df["Dataset"] == "Kvasir-SEG"]["IoU"])
458
    # plt.show()
459
    # sns.barplot(data=df, x="Dataset", y="IoU", hue="Model", order=order)
460
    # plt.show()
461
462
    # generalizability_gap
463
    grouped = df.groupby(["Dataset", "Model", "ID"])["IoU"].mean().reset_index()
464
    ood = grouped[grouped["Dataset"] != "Kvasir-SEG"].copy()
465
    print(ood.columns)
466
    iid = grouped[grouped["Dataset"] == "Kvasir-SEG"].copy()
467
    for i, row in ood.iterrows():
468
        id = ood.at[i, "ID"]
469
        dataset = ood.at[i, "Dataset"]
470
        model = ood.at[i, "Model"]
471
        iou = row["IoU"]
472
        iid_iou = float(iid[(iid["ID"] == id) & (iid["Model"] == model)]["IoU"])
473
        print(iou)
474
        print(iid_iou)
475
        ood.at[i, "gap"] = 100 * (iou - iid_iou) / iid_iou
476
    sns.barplot(data=ood, x="Dataset", hue="Model", y="gap")
477
    plt.ylim(-100, 0)
478
    plt.ylabel("% Change in IoU wrt IID")
479
    plt.savefig("delta_iou_baseline.eps")
480
481
    plt.show()
482
483
    cstds = df.groupby(["Dataset", "Model"])["IoU"].std() / df.groupby(["Dataset", "Model"])["IoU"].mean()
484
    cstds = cstds.reset_index()
485
    sns.barplot(data=cstds, x="Dataset", y="IoU", hue="Model")
486
    both = pd.merge(ood, cstds, on=["Model", "Dataset"])
487
    plt.savefig("cstd_baseline.eps")
488
489
    plt.show()
490
    fig, ax = plt.subplots(3, 1, figsize=(6, 6))
491
    for didx, dataset in enumerate(np.unique(both["Dataset"])):
492
        test = pearsonr(both[both["Dataset"] == dataset]["IoU_y"], both[both["Dataset"] == dataset]["gap"])
493
        ax.flatten()[didx].set_title(f"{dataset} : Rp={round(test[0], 5)}, p={round(test[1], 5)}")
494
        if didx == 2:
495
            scatter = sns.scatterplot(ax=ax.flatten()[didx], data=both[both["Dataset"] == dataset], x="IoU_y", y="gap",
496
                                      hue="Model")
497
            scatter.legend(loc="upper center", bbox_to_anchor=(0.5, -0.2), ncol=3)
498
        else:
499
            sns.scatterplot(ax=ax.flatten()[didx], data=both[both["Dataset"] == dataset], x="IoU_y", y="gap",
500
                            hue="Model", legend=False)
501
    # plt.tight_layout()
502
503
    for axis in ax:
504
        axis.set_ylabel("")
505
        axis.set_xlabel("")
506
        axis.set_yticklabels([])
507
        axis.set_xticklabels([])
508
        # axis.set_ylim(axis.get_ylim()[::-1])
509
510
    ax.flatten()[2].set_xlabel("C.Std mIoU")
511
    ax.flatten()[1].set_ylabel("% Change in mIoU wrt IID")
512
    plt.tight_layout()
513
    plt.savefig("underspecification_baseline.eps")
514
    plt.show(ypad=4)
515
516
517
def plot_consistencies():
518
    df = collate_base_results_into_df()
519
    df.groupby(["Experiment", "Dataset", "Model", "ID"]).mean().reset_index().to_csv("test.csv")
520
    grouped = df.groupby(["Experiment", "Dataset", "Model", "ID"])["SIS"].mean().reset_index()
521
    grouped = grouped[grouped["Experiment"] != "Inpainter Augmentation"]
522
    grouped = grouped[grouped["Dataset"] == "Kvasir-SEG"]
523
    # grouped.to_csv("test.csv")
524
    sns.barplot(data=grouped, x="Model", y="SIS", hue="Experiment")
525
    plt.show()
526
527
    grouped = df.groupby(["Experiment", "Dataset", "Model", "ID"])["IoU"].mean().reset_index()
528
    grouped = grouped[grouped["Experiment"] != "Inpainter Augmentation"]
529
    grouped = grouped[grouped["Dataset"] == "Kvasir-SEG"]
530
    # grouped.to_csv("test.csv")
531
    sns.barplot(data=grouped, x="Model", y="IoU", hue="Experiment")
532
    plt.tight_layout()
533
    plt.show()
534
535
    # aug_consistencies = []
536
    # aug_oods = []
537
    # cons_consistencies = []
538
    # cons_oods
539
    cons_df = pd.DataFrame()
540
    aug_df = pd.DataFrame()
541
    for file in os.listdir("logs/consistency/FPN"):
542
        if "augmentation" in file:
543
            aug_df = aug_df.append(pd.read_csv(os.path.join("logs/consistency/FPN", file)), ignore_index=True)
544
        if "consistency" in file:
545
            cons_df = aug_df.append(pd.read_csv(os.path.join("logs/consistency/FPN", file)), ignore_index=True)
546
        else:
547
            continue
548
    cons_df = cons_df[cons_df["epoch"] < 300]
549
    aug_df = aug_df[aug_df["epoch"] < 300]
550
    sns.lineplot(data=cons_df, x="epoch", y="consistency", color="orange")
551
    sns.lineplot(data=aug_df, x="epoch", y="consistency", color="blue")
552
    sns.lineplot(data=cons_df, x="epoch", y="ood_iou", color="orange")
553
    sns.lineplot(data=aug_df, x="epoch", y="ood_iou", color="blue")
554
    plt.show()
555
556
557
def plot_ensemble_variance_relationship(training_method):
558
    df = collate_ensemble_results_into_df(training_method)
559
    df_constituents = collate_base_results_into_df()
560
    df_constituents = df_constituents[df_constituents["Experiment"] != "Inpainter Augmentation"]
561
    df["constituents"] = df["constituents"].apply(
562
        lambda x: [int(i.split("_")[-1]) for i in x] if type(x) == type([]) else int(x))
563
    if training_method != "all":
564
        if training_method == "vanilla": training_method = "No Augmentation"
565
        if training_method == "augmentation": training_method = "Vanilla Augmentation"
566
        if training_method == "consistency": training_method = "Consistency Training"
567
        df_constituents = df_constituents[df_constituents["Experiment"] == training_method]
568
569
    colors = ["tab:blue", "tab:orange", "tab:green", "tab:red"]
570
    # colors = ["b", "g", "r", "c", "m", "y"]
571
    colormap = dict(zip(np.unique(df["Dataset"]), colors))
572
573
    var_dataset = pd.DataFrame()
574
    for i, row in df.iterrows():
575
        model = df.at[i, "Model"]
576
        id = df.at[i, "ID"]
577
        experiment = df.at[i, "Experiment"]
578
        if model == "diverse":
579
            filtered = df_constituents[
580
                (df_constituents["ID"] == id) &
581
                (df_constituents["Experiment"] == experiment)]
582
            cstd = (filtered.groupby(["Dataset"]).std() / filtered.groupby(["Dataset"]).mean())["IoU"]
583
            improvements = df[
584
                (df["Model"] == model) & (df["Experiment"] == experiment) & (df["ID"] == id)]
585
            improvements = 100 * (improvements.groupby(["Dataset"])["IoU"].mean() - filtered.groupby(["Dataset"])[
586
                "IoU"].mean()) / filtered.groupby(["Dataset"])["IoU"].mean()
587
            cstd = cstd.reset_index()
588
            improvements = improvements.reset_index()
589
            cstd.rename(columns={"IoU": "C.StD"}, inplace=True)
590
            improvements.rename(columns={"IoU": "% Increase in Generalizability wrt Constituents Mean"}, inplace=True)
591
            merged = pd.merge(improvements, cstd)
592
            merged["Model"] = [model] * 4  # dataset length
593
            merged["ID"] = [id] * 4
594
            merged["Experiment"] = [experiment] * 4
595
596
            var_dataset = var_dataset.append(merged)
597
        else:
598
599
            constituents = df.at[i, "constituents"]
600
            filtered = df_constituents[
601
                (df_constituents["Model"] == model) & (df_constituents["ID"].isin(constituents)) & (
602
                        df_constituents["Experiment"] == experiment)]
603
            cstd = (filtered.groupby(["Dataset"]).std() / filtered.groupby(["Dataset"]).mean())["IoU"]
604
            improvements = df[
605
                (df["Model"] == model) & (df["Experiment"] == experiment) & (df["ID"] == id)]
606
            improvements = 100 * (improvements.groupby(["Dataset"])["IoU"].mean() - filtered.groupby(["Dataset"])[
607
                "IoU"].mean()) / filtered.groupby(["Dataset"])["IoU"].mean()
608
            cstd = cstd.reset_index()
609
610
            improvements = improvements.reset_index()
611
            cstd.rename(columns={"IoU": "C.StD"}, inplace=True)
612
            improvements.rename(columns={"IoU": "% Increase in Generalizability wrt Constituents Mean"}, inplace=True)
613
            merged = pd.merge(improvements, cstd)
614
            merged["Model"] = [model] * 4
615
            merged["ID"] = [id] * 4
616
            merged["Experiment"] = [experiment] * 4
617
            var_dataset = var_dataset.append(merged)
618
            # improvements = filtered.groupby
619
            # cstd = filtered
620
        # df.at[i, "cstd"] =
621
        # cstds.append(0)
622
    print(len(np.unique(var_dataset[var_dataset["Experiment"] == "Vanilla Augmentation"][
623
                            "% Increase in Generalizability wrt Constituents Mean"])))
624
    print(len(np.unique(var_dataset[var_dataset["Experiment"] == "No Augmentation"][
625
                            "% Increase in Generalizability wrt Constituents Mean"])))
626
    print(var_dataset.columns)
627
    datasets = np.unique(var_dataset["Dataset"])
628
    training_methods = ["No Augmentation", "Vanilla Augmentation", "Consistency Training"]
629
    fig, ax = plt.subplots(len(datasets), len(training_methods), figsize=(11, 12))
630
    var_dataset = var_dataset.replace("diverse", "MultiModel")
631
632
    for i, dataset_name in enumerate(datasets):
633
        for j, training_method in enumerate(training_methods):
634
            dataset_filtered = var_dataset[
635
                (var_dataset["Dataset"] == dataset_name) & (var_dataset["Experiment"] == training_method)]
636
            # sns.regplot(ax=ax.flatten()[i], data=dataset_filtered, x="C.StD",
637
            #             y="% Increase in Generalizability wrt Constituents Mean",
638
            #             ci=99,
639
            #             color=colormap[dataset_name], label=dataset_name)
640
            # correlation = pearsonr(dataset_filtered["C.StD"],
641
            #                        dataset_filtered["% Increase in Generalizability wrt Constituents Mean"])
642
            if j == 0:  # seaborn does not like global legends
643
                scatter = sns.scatterplot(ax=ax[i, j], data=dataset_filtered, x="C.StD",
644
                                          y="% Increase in Generalizability wrt Constituents Mean",
645
                                          ci=99, legend=False, color=colormap[dataset_name], label=dataset_name)
646
                ax[i, j].set_title(training_method)
647
648
            else:
649
                scatter = sns.scatterplot(ax=ax[i, j], data=dataset_filtered, x="C.StD",
650
                                          y="% Increase in Generalizability wrt Constituents Mean",
651
                                          ci=99, legend=False, color=colormap[dataset_name])
652
            correlation = spearmanr(dataset_filtered["C.StD"],
653
                                    dataset_filtered["% Increase in Generalizability wrt Constituents Mean"])
654
            ax[i, j].set_title(f"Rs={correlation[0]:.3f}, p={correlation[1]:.6f}")
655
    for a in ax.flatten():
656
        a.set(xlabel=None)
657
        a.set(ylabel=None)
658
    for axis, col in zip(ax[0], training_methods):
659
        axis.annotate(col, xy=(0.5, 1.5), xytext=(0, 5),
660
                      xycoords='axes fraction', textcoords='offset points',
661
                      size='xx-large', ha='center', va='baseline')
662
    fig.add_subplot(111, frameon=False)
663
    # fig.legend(loc='lower center', bbox_to_anchor=(0.5, 0.5), ncol=2, labels=np.unique(var_dataset["Dataset"]))
664
    fig.legend(loc='lower center', bbox_to_anchor=(0.5, 0), ncol=4)
665
    plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)
666
667
    plt.ylabel("% Increase in Generalizability wrt Constituents Mean")
668
    plt.xlabel("Coefficient of Standard Deviation")
669
    # plt.title()
670
    fig.tight_layout()
671
    # fig.subplots_adjust(bottom=0.2)
672
    plt.savefig("ensemble_variance_relationship_statistical.eps")
673
    plt.show()
674
    # hue_order = var_dataset.groupby(["Model"])[
675
    #     "% Increase in Generalizability wrt Constituents Mean"].mean().sort_values().index
676
    var_dataset = var_dataset.replace("diverse", "MultiModel")
677
678
    fig, ax = plt.subplots(figsize=(12, 6))
679
    sns.boxplot(data=var_dataset, ax=ax, x="Dataset", y="% Increase in Generalizability wrt Constituents Mean",
680
                hue="Model",
681
                order=["Kvasir-SEG", "CVC-ClinicDB", "EndoCV2020", "Etis-LaribDB"])
682
683
    plt.axhline(0, linestyle="--")
684
    plt.savefig("improvements_due_to_ensembles.eps")
685
    plt.show()
686
687
688
def get_ensemble_p_vals():
689
    singular = collate_base_results_into_df()
690
    # cross-model t-test (not used in thesis)
691
    print("No augmentation")
692
    for mix, model in enumerate(np.unique(singular["Model"])):
693
        print(model, end="&")
694
        for dix, dataset in enumerate(np.unique(singular["Dataset"])):
695
            single = singular[singular["Experiment"] == "No Augmentation"]
696
            ensemble = collate_ensemble_results_into_df(type="vanilla")
697
            single = single[(single["Dataset"] == dataset) & (single["Model"] == model)]
698
            ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)]
699
            ttest = ttest_ind(
700
                single["IoU"], ensemble["IoU"], equal_var=False
701
            )
702
            print(round(ttest[1], 5), end=" & ")
703
        print("\\\ ")
704
    print("Augmentation")
705
    for mix, model in enumerate(np.unique(singular["Model"])):
706
        print(model, end="&")
707
        for dix, dataset in enumerate(np.unique(singular["Dataset"])):
708
            single = singular[singular["Experiment"] == "Vanilla Augmentation"]
709
            ensemble = collate_ensemble_results_into_df(type="augmentation")
710
            single = single[(single["Dataset"] == dataset) & (single["Model"] == model)]
711
            ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)]
712
            ttest = ttest_ind(
713
                single["IoU"], ensemble["IoU"], equal_var=False
714
            )
715
            print(round(ttest[1], 5), end=" & ")
716
        print("\\\ ")
717
    print("Consistency Training")
718
    for mix, model in enumerate(np.unique(singular["Model"])):
719
        print(model, end="&")
720
        for dix, dataset in enumerate(np.unique(singular["Dataset"])):
721
            single = singular[singular["Experiment"] == "Consistency Training"]
722
            ensemble = collate_ensemble_results_into_df(type="consistency")
723
            single = single[(single["Dataset"] == dataset) & (single["Model"] == model)]
724
            ensemble = ensemble[(ensemble["Dataset"] == dataset) & (ensemble["Model"] == model)]
725
            ttest = ttest_ind(
726
                single["IoU"], ensemble["IoU"], equal_var=False
727
            )
728
            print(round(ttest[1], 5), end=" & ")
729
        print("\\\ ")
730
731
    # model-averaged
732
    print("When averaged across models:")
733
    print("No augmentation")
734
    experiments_long = ["No Augmentation", "Conventional Augmentation", "Consistency Training"]
735
    for dix, dataset in enumerate(np.unique(singular["Dataset"])):
736
        single = singular[singular["Experiment"] == "No Augmentation"]
737
        ensemble = collate_ensemble_results_into_df(type="vanilla")
738
        single = single[(single["Dataset"] == dataset)]
739
        ensemble = ensemble[(ensemble["Dataset"] == dataset)]
740
        ttest = mannwhitneyu(
741
            single["IoU"], ensemble["IoU"]
742
        )
743
        print(round(ttest[1], 3), end=" & ")
744
    print("\nAugmentation")
745
746
    for dix, dataset in enumerate(np.unique(singular["Dataset"])):
747
        single = singular[singular["Experiment"] == "Vanilla Augmentation"]
748
        ensemble = collate_ensemble_results_into_df(type="augmentation")
749
        single = single[(single["Dataset"] == dataset)]
750
        ensemble = ensemble[(ensemble["Dataset"] == dataset)]
751
        ttest = mannwhitneyu(
752
            single["IoU"], ensemble["IoU"]
753
        )
754
        print(round(ttest[1], 3), end=" & ")
755
    print("\nConsistency Training")
756
    for dix, dataset in enumerate(np.unique(singular["Dataset"])):
757
        single = singular[singular["Experiment"] == "Consistency Training"]
758
        ensemble = collate_ensemble_results_into_df(type="consistency")
759
        single = single[(single["Dataset"] == dataset)]
760
        ensemble = ensemble[(ensemble["Dataset"] == dataset)]
761
        ttest = mannwhitneyu(
762
            single["IoU"], ensemble["IoU"]
763
        )
764
        print(round(ttest[1], 3), end=" & ")
765
766
    experiments = ["vanilla", "augmentation", "consistency"]
767
    fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(8, 8))
768
    for dix, dataset in enumerate(np.unique(singular["Dataset"])):
769
        p_values = np.zeros((len(experiments), len(experiments)))
770
        for i, exp1 in enumerate(experiments):
771
            for j, exp2 in enumerate(experiments):
772
                df1 = collate_ensemble_results_into_df(exp1)
773
                df2 = collate_ensemble_results_into_df(exp2)
774
                test = mannwhitneyu(df1[df1["Dataset"] == dataset]["IoU"],
775
                                    df2[(df2["Dataset"] == dataset)]["IoU"])
776
                p_values[i, j] = round(test[1], 5)
777
        sns.heatmap(p_values, ax=axes.flatten()[dix], annot=True, xticklabels=experiments_long,
778
                    yticklabels=experiments_long,
779
                    cbar=False)
780
        ax = axes.flatten()[dix].set_title(dataset)
781
    plt.tight_layout()
782
    plt.savefig("ensemble_relative_pvals.eps")
783
    plt.show()
784
785
786
def compare_ensembles():
787
    singular = collate_base_results_into_df()
788
    singular_no_augment = singular[singular["Experiment"] == "No Augmentation"].groupby(["Dataset", "ID"])[
789
        "IoU"].mean()
790
    singular_augment = singular[singular["Experiment"] == "Vanilla Augmentation"].groupby(["Dataset", "ID"])[
791
        "IoU"].mean()
792
    singular_ct = singular[singular["Experiment"] == "Consistency Training"].groupby(["Dataset", "ID"])[
793
        "IoU"].mean()
794
795
    no_augment = collate_ensemble_results_into_df(type="vanilla").groupby(["Dataset", "ID"])[
796
        "IoU"].mean()
797
    augment = collate_ensemble_results_into_df(type="augmentation").groupby(["Dataset", "ID"])[
798
        "IoU"].mean()
799
    consistency = collate_ensemble_results_into_df(type="consistency").groupby(["Dataset", "ID"])[
800
        "IoU"].mean()
801
802
    no_augment_improvements = (100 * (no_augment - singular_no_augment) / singular_no_augment).reset_index()
803
    augment_improvements = (100 * (augment - singular_augment) / singular_augment).reset_index()
804
    ct_improvements = (100 * (consistency - singular_ct) / singular_ct).reset_index()
805
806
    no_augment_improvements["Experiment"] = pd.Series(["No Augmentation"] * len(no_augment_improvements),
807
                                                      index=no_augment_improvements.index)
808
    augment_improvements["Experiment"] = pd.Series(["Conventional Augmentation"] * len(augment_improvements),
809
                                                   index=augment_improvements.index)
810
    ct_improvements["Experiment"] = pd.Series(["Consistency Training"] * len(ct_improvements),
811
                                              index=ct_improvements.index)
812
    # print("No augmentation")
813
    # print(no_augment_improvements)
814
    # print("Augmentation")
815
    # print(augment_improvements)
816
    # print("Consistency Training")
817
    # print(ct_improvements)
818
    # print(augment_improvements)
819
    overall_improvements = pd.concat([no_augment_improvements, augment_improvements, ct_improvements],
820
                                     ignore_index=True)
821
822
    experiments = np.unique(overall_improvements["Experiment"])
823
    fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(8, 8))
824
    for dix, dataset in enumerate(np.unique(overall_improvements["Dataset"])):
825
        p_values = np.zeros((len(experiments), len(experiments)))
826
        for i, exp1 in enumerate(experiments):
827
            for j, exp2 in enumerate(experiments):
828
                test = ttest_ind(overall_improvements[(overall_improvements["Dataset"] == dataset) & (
829
                        overall_improvements["Experiment"] == exp1)]["IoU"],
830
                                 overall_improvements[(overall_improvements["Dataset"] == dataset) & (
831
                                         overall_improvements["Experiment"] == exp2)]["IoU"], equal_var=True)
832
                p_values[i, j] = test[1]
833
        sns.heatmap(p_values, ax=axes.flatten()[dix], annot=True, xticklabels=experiments, yticklabels=experiments,
834
                    cbar=False)
835
        ax = axes.flatten()[dix].set_title(dataset)
836
    plt.tight_layout()
837
    plt.savefig("ensemble_improvement_pvals.eps")
838
    plt.show()
839
840
    box = sns.boxplot(data=overall_improvements, x="Experiment", y="IoU", hue="Dataset",
841
                      hue_order=["Kvasir-SEG", "EndoCV2020", "CVC-ClinicDB", "Etis-LaribDB"])
842
    box.legend(loc="upper left")
843
    box.set(ylabel="Improvement in IoU (%)")
844
    box.set(xlabel="Training Method")
845
    box.axhline(0, linestyle="--")
846
    plt.savefig("ensemble_improvements.eps")
847
    print("..,.")
848
    print(overall_improvements.groupby(["Experiment"])["IoU"].mean())
849
    print(overall_improvements.groupby(["Experiment"])["IoU"].max())
850
    plt.show()
851
852
    grouped = singular[singular["Experiment"] != "Inpainter Augmentation"].groupby(["Model", "Dataset", "Experiment"])[
853
        "IoU"]
854
    constituent_cstd = grouped.std() / grouped.mean()
855
    print(constituent_cstd)
856
857
858
def test():
859
    ensemble = collate_ensemble_results_into_df("all")
860
    ensemble = ensemble.replace("augmentation", "Vanilla Augmentation")
861
    ensemble = ensemble.replace("vanilla", "No Augmentation")
862
    ensemble = ensemble.replace("consistency", "Consistency Training")
863
864
    ensemble = ensemble[ensemble["Model"] != "diverse"]
865
    ensemble_means = ensemble.groupby(["Experiment", "Dataset", "Model", "ID"])["IoU"].mean()
866
    singular = collate_base_results_into_df()
867
    singular = singular[singular["Experiment"] != "Inpainter Augmentation"]
868
    singular_grouped = singular.groupby(["Experiment", "Dataset", "Model"])["IoU"]
869
    # input()
870
871
    ensemble_improvements = 100 * (ensemble_means - singular_grouped.mean()) / singular_grouped.mean()
872
    singular_cstds = singular_grouped.std() / singular_grouped.mean()
873
    merged = pd.merge(ensemble_improvements, singular_cstds, how='inner', on=["Experiment", "Dataset", "Model"])
874
    # merged = merged.groupby(["Experiment", "Model"]).mean()
875
    fig = sns.scatterplot(data=merged, x="IoU_y", y="IoU_x", hue="Experiment")
876
    test = spearmanr(merged["IoU_y"], merged["IoU_x"])
877
    plt.title(f"R_s = {round(test[0], 5)}, p={round(test[1], 5)}")
878
    fig.set_ylabel("Change in IoU (%)")
879
    fig.set_xlabel("IoU C.StD.")
880
    # print(spearmanr(merged["IoU_y"], merged["IoU_x"]))
881
882
    plt.savefig("ensembles_underspecification.eps")
883
    plt.show()
884
885
886
if __name__ == '__main__':
887
    training_plot("logs/consistency/DeepLab/consistency_1.csv")
888
    # plot_inpainter_vs_conventional_performance()
889
    # plot_training_procedure_performance()
890
    # plot_ensemble_performance()
891
    # compare_models("No Augmentation")
892
    # compare_models("Vanilla Augmentation")
893
    # compare_models("Consistency Training")
894
895
    # plot_ensemble_variance_relationship("all")
896
    # plot_cons_vs_aug_ensembles()
897
    # compare_ensembles()
898
    # get_ensemble_p_vals()
899
    # test()