--- a +++ b/scripts/utils/manual_evaluation.R @@ -0,0 +1,422 @@ +# Import libraries +library("ggplot2") +library("data.table") +library("magrittr") + +# Data path +path <- "/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/cv_results.detailed.csv" + +########################################################################################### +# Dice +########################################################################################### + +# Load data +validation <- fread(path, sep=",", header=TRUE) + +# Preprocessing +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[score=="DSC", c("index", "lungs", "infection")] +val_df <- melt(validation, + measure.vars=c("infection", "lungs"), + variable.name="class", + value.name="dice", + variable.factor=TRUE) + +# Reorder classes +val_df$class <- factor(val_df$class , levels=c("lungs","infection")) +val_df[, hack:="A: Results of the 5-fold Cross-Validation"] + +# Plot scoring figure +figA <- ggplot(val_df, aes(class, dice, fill=class)) + + geom_boxplot() + + facet_wrap(hack ~ .) + + scale_x_discrete(labels=c("Lungs","COVID-19 Lesion")) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "", y="Dice Similarity Coefficient") +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/validation_boxplot.png", width=700, height=600, res=180) +figA +dev.off() + +########################################################################################### +# Accuracy +########################################################################################### + +# Load data +validation <- fread(path, sep=",", header=TRUE) + +# Preprocessing +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[score=="Acc", c("index", "lungs", "infection")] +val_df <- melt(validation, + measure.vars=c("infection", "lungs"), + variable.name="class", + value.name="dice", + variable.factor=TRUE) + +# Plot scoring figure for accuracy +plot_score <- ggplot(val_df, aes(class, dice, fill=class)) + + geom_boxplot() + + scale_x_discrete(labels=c("Lungs","COVID-19 Lesion")) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "", y="Pixelwise Accuracy") + + ggtitle("Results of the 5-fold Cross-Validation") +png("score.png", width=700, height=600, res=180) +plot_score +dev.off() + + +########################################################################################### +# Boxplot - Multiplot +########################################################################################### +# Load data +validation <- fread(path, sep=",", header=TRUE) + +# Preprocessing +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +val_df <- melt(validation, + measure.vars=c("lungs", "infection"), + variable.name="class", + value.name="value", + variable.factor=TRUE) + +# Reorder classes +val_df$class <- factor(val_df$class , levels=c("lungs","infection")) +val_df[score=="Acc"]$score <- "Accuracy" +val_df[score=="DSC"]$score <- "Dice Similarity Coef." +val_df[score=="Sens"]$score <- "Sensitivity" +val_df[score=="Spec"]$score <- "Specificity" +val_df[score=="Prec"]$score <- "Precision" +val_df[score=="IoU"]$score <- "Intersection over Union" + +# Plot scoring figure for accuracy +plot_score <- ggplot(val_df, aes(class, value, fill=class)) + + geom_boxplot() + + facet_grid(score ~ .) + + scale_x_discrete(labels=c("Lungs","COVID-19 Lesion")) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "", y="") + + ggtitle("Results of the 5-fold Cross-Validation") +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/multiplot.boxplot.png", width=800, height=1200, res=180) +plot_score +dev.off() + + +########################################################################################### +# Histogram - Multiplot +########################################################################################### +# Load data +validation <- fread(path, sep=",", header=TRUE) + +# Preprocessing +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +val_df <- melt(validation, + measure.vars=c("lungs", "infection"), + variable.name="class", + value.name="value", + variable.factor=TRUE) + +# Reorder classes +val_df[score=="Acc"]$score <- "Accuracy" +val_df[score=="DSC"]$score <- "Dice Similarity Coef." +val_df[score=="Sens"]$score <- "Sensitivity" +val_df[score=="Spec"]$score <- "Specificity" +val_df[score=="Prec"]$score <- "Precision" +val_df[score=="IoU"]$score <- "Intersection over Union" +val_df[class=="lungs"]$class <- "Lungs" +val_df[class=="infection"]$class <- "COVID-19" +val_df$class <- factor(val_df$class , levels=c("Lungs","COVID-19")) + +# Plot scoring figure for boxplots +plot_score <- ggplot(val_df, aes(value)) + + geom_histogram(binwidth = 0.05, color="darkblue", fill="lightblue") + + facet_grid(score ~ class) + + scale_y_continuous(breaks=seq(0, 20, 4), limits=c(0, 20)) + + #scale_x_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "Metric Score", y="Sample Frequency") + + ggtitle("Results of the 5-fold Cross-Validation") +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/multiplot.histogram.png", width=1000, height=1600, res=160) +plot_score +dev.off() + +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/multiplot.histogram.large.png", width=1400, height=1800, res=180) +plot_score +dev.off() + + +########################################################################################### +# Boxplot - Multiplot +########################################################################################### +# Load data +validation <- fread(path, sep=",", header=TRUE) + +# Preprocessing +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +val_df <- melt(validation, + measure.vars=c("lungs", "infection"), + variable.name="class", + value.name="value", + variable.factor=TRUE) + +# Reorder classes +val_df$class <- factor(val_df$class , levels=c("lungs","infection")) +val_df[score=="Acc"]$score <- "Accuracy" +val_df[score=="DSC"]$score <- "Dice Similarity Coef." +val_df[score=="Sens"]$score <- "Sensitivity" +val_df[score=="Spec"]$score <- "Specificity" +val_df[score=="Prec"]$score <- "Precision" +val_df[score=="IoU"]$score <- "Intersection over Union" + +# Plot scoring figure for histograms +plot_score <- ggplot(val_df, aes(class, value, fill=class)) + + geom_boxplot() + + facet_grid(score ~ .) + + scale_x_discrete(labels=c("Lungs","COVID-19 Lesion")) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "", y="") + + ggtitle("Results of the 5-fold Cross-Validation") +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/multiplot.boxplot.png", width=800, height=1400, res=140) +plot_score +dev.off() + + +########################################################################################### +# Sensitivity vs DSC vs Accuracy +########################################################################################### +# Load data +validation <- fread(path, sep=",", header=TRUE) + +# Preprocessing +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +validation <- melt(validation, + measure.vars=c("lungs", "infection"), + variable.name="class", + value.name="value", + variable.factor=TRUE) +val_df <- dcast(validation, index + class ~ score, value.var=c("value")) + +# Plot scoring figure for SENS vs DSC +plot_score <- ggplot(val_df, aes(DSC, Sens, col=class)) + + geom_abline(intercept=0, slope=1, size=0.1) + + geom_point() + + scale_x_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_color_discrete(name="Annotation", labels=c("Lungs", "COVID-19 Lesion")) + + labs(x="Dice Similarity Coefficient", y="Sensitivity") + + ggtitle("5-fold CV Results - Sens vs DSC") +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/sens_vs_dsc.png", width=1000, height=800, res=200) +plot_score +dev.off() + +# Plot scoring figure for DSC vs ACC +plot_score <- ggplot(val_df, aes(DSC, Acc, col=class)) + + geom_point() + + scale_x_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + theme_bw() + + scale_color_discrete(name="Annotation", labels=c("Lungs", "COVID-19 Lesion")) + + labs(x="Dice Similarity Coefficient", y="Accuracy") + + ggtitle("5-fold CV Results - Acc vs DSC") +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/acc_vs_dsc.png", width=1000, height=800, res=200) +plot_score +dev.off() + + +########################################################################################### +# REWORKED: Figure 4 +########################################################################################### + +# Load cv5 data +path <- "/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/cv_results.detailed.csv" +validation <- fread(path, sep=",", header=TRUE) +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +df_cv5 <- melt(validation, + measure.vars=c("infection", "lungs"), + variable.name="class", + value.name="value", + variable.factor=TRUE) +df_cv5[, "cv":="k=5"] + +# Load cv4 data +path <- "/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation.cv4/cv_results.detailed.csv" +validation <- fread(path, sep=",", header=TRUE) +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +df_cv4 <- melt(validation, + measure.vars=c("infection", "lungs"), + variable.name="class", + value.name="value", + variable.factor=TRUE) +df_cv4[, "cv":="k=4"] + +# Load cv3 data +path <- "/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation.cv3/cv_results.detailed.csv" +validation <- fread(path, sep=",", header=TRUE) +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +df_cv3 <- melt(validation, + measure.vars=c("infection", "lungs"), + variable.name="class", + value.name="value", + variable.factor=TRUE) +df_cv3[, "cv":="k=3"] + +# Load cv2 data +path <- "/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation.cv2/cv_results.detailed.csv" +validation <- fread(path, sep=",", header=TRUE) +validation[, lungs:=rowMeans(validation[,c("lung_R", "lung_L")])] +validation <- validation[, c("index", "score", "lungs", "infection")] +df_cv2 <- melt(validation, + measure.vars=c("infection", "lungs"), + variable.name="class", + value.name="value", + variable.factor=TRUE) +df_cv2[, "cv":="k=2"] + +# Combine all cv-N data +df <- rbindlist(list(df_cv2, df_cv3, df_cv4, df_cv5)) + +# Preprocess +df_filtered <- df[df$class=="infection" & df$score=="DSC"] +df_filtered$class <- "B: COVID-19 Lesion - Ma et al. Dataset" +df_filtered_mean <- df_filtered[, .(mean=mean(value)), by=cv] + +# Plot Figure 4-B +figB <- ggplot(data=df_filtered, aes(cv, value)) + + geom_bar(data=df_filtered_mean, aes(cv, mean, fill=cv), col="black", + alpha=0.4, stat="identity", position="stack", width=0.5) + + stat_boxplot(geom ='errorbar', width = 0.2) + + geom_boxplot(aes(fill=cv), width=0.3) + + facet_wrap(class ~ .) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + coord_flip() + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + scale_color_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "k-fold Cross-Validation", y="Dice Similarity Coefficient") + +########################################################################################### + +# Read testing data +path <- "/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation.testing/" +files <- list.files(path, full.names=TRUE, include.dirs=TRUE, recursive=FALSE) +# Iterate over all files +dt_list <- list() +for(file in files){ + # Skip wayne files + if (substring(file, nchar(file)-7) == ".std.csv"){ next } + if (substring(file, nchar(file)-8) == ".mean.csv"){ next } + if (substring(file, nchar(file)-10) == ".median.csv"){ next } + # identify CV and fold + cv <- substring(file, nchar(file)-8, nchar(file)-7) + f <- substring(file, nchar(file)-5, nchar(file)-4) + # read dataset + dt_tmp <- fread(file, sep=",", header=TRUE) + dt_tmp[, "fold":=f] + dt_tmp[, "cv":=cv] + # append to list + dt_list[[file]] <- dt_tmp +} + +# Combine list to single datatable +dt <- rbindlist(dt_list) +# Melt +dt <- melt(dt, measure.vars=c("background", "infection"), + variable.name="class", + value.name="value", + variable.factor=TRUE) + +# Plot Figure 4-C +dt_filtered <- dt[dt$score=="DSC" & dt$class=="infection"] +dt_filtered$class <- "C: COVID-19 Lesion - An et al. Dataset" +dt_filtered <- dt_filtered[index %in% sort(dt_filtered$index)[1:100]] +figC <- ggplot(dt_filtered, aes(cv, value, fill=fold)) + + geom_boxplot() + + facet_wrap(class ~ .) + + scale_x_discrete(labels=c("k=2", "k=3", "k=4", "k=5")) + + scale_y_continuous(breaks=seq(0, 1, 0.1), limits=c(0, 1)) + + coord_flip() + + theme_bw() + + scale_fill_brewer(palette="Dark2") + + theme(legend.position = "none") + + labs(x = "k-fold Cross-Validation", y="Dice Similarity Coefficient") + +########################################################################################### + +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +} + + +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/figure4.2000x800.png", width=2000, height=800, res=170) +multiplot(figA, figB, figC, layout=matrix(c(1,2,2,1,3,3), nrow=2, ncol=3, byrow=TRUE)) +dev.off() +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/figure4.1800x800.png", width=1800, height=800, res=170) +multiplot(figA, figB, figC, layout=matrix(c(1,2,2,1,3,3), nrow=2, ncol=3, byrow=TRUE)) +dev.off() +png("/home/mudomini/projects/covid19.MIScnn.RESULTS/evaluation/figure4.1800x800.SR.png", width=1800, height=800, res=150) +multiplot(figA, figB, figC, layout=matrix(c(1,2,2,1,3,3), nrow=2, ncol=3, byrow=TRUE)) +dev.off() + +########################################################################################### +# Summary An et al. +dt <- dt[index %in% sort(dt$index)[1:100]] +summary <- dt[, .(median=median(value), std=sd(value)), by=c("score", "cv", "class")] +summary <- summary[summary$class=="infection"] +summary +# Summary Ma et al. +summary <- df[, .(median=median(value), std=sd(value)), by=c("score", "cv", "class")] +summary