Diff of /R/runKappa.R [000000] .. [494cbf]

Switch to side-by-side view

--- a
+++ b/R/runKappa.R
@@ -0,0 +1,141 @@
+#' @name runKappa
+#' @title Run consistency evaluation using Kappa statistic
+#' @description Calculate Kappa statistic to measure the consistency between two appraisements
+#' @param subt1 A numeric vector to indicate the first appraisement. Order should be exactly the same with subt2 for each sample.
+#' @param subt2 A numeric vector to indicate the second appraisement. Order should be exactly the same with subt1 for each sample.
+#' @param subt1.lab A string value to indicate the label of the first subtype.
+#' @param subt2.lab A string value to indicate the label of the second subtype.
+#' @param fig.path A string value to indicate the output path for storing the consistency heatmap.
+#' @param fig.name A string value to indicate the name of the consistency heatmap.
+#' @param width A numeric value to indicate the width of output figure.
+#' @param height A numeric value to indicate the height of output figure.
+#' @details This function evaluates the consistency between two appraisements that targets to the same cohort.
+#' For example, the NTP-predicted subtype amd PAM-predicted subtype of external cohort, or the current subtype and predicted subtype of discovery cohort.
+#' Therefore, the arguments `subt1` and `subt2` can be the `clust` column of `clust.res` derived from `getMOIC()` with one specified algorithm
+#' or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms or `runNTP()` or `runPAM()`.
+#' However, subtypes identified from different algorithm (i.e., `get\%algorithm_name1\%` and `get\%algorithm_name2\%`) can not be evaluated
+#' because the subtype 1 identified from the first algorithm may not be the same subtype 1 from the second algorithm.
+#' @return A figure of consistency heatmap (.pdf).
+#' @export
+#' @importFrom grDevices dev.copy2pdf
+#' @examples # There is no example and please refer to vignette.
+runKappa <- function(subt1     = NULL,
+                     subt2     = NULL,
+                     subt1.lab = NULL,
+                     subt2.lab = NULL,
+                     fig.path  = getwd(),
+                     fig.name  = "constheatmap",
+                     width     = 5,
+                     height    = 5) {
+
+  subt1 <- as.vector(as.character(subt1))
+  subt2 <- as.vector(as.character(subt2))
+
+  # check sample
+  if(length(subt1) != length(subt2)) {
+    stop("subtypes identified from different cohorts.")
+  }
+
+  # check subtype
+  if(!identical(sort(unique(subt1)),sort(unique(subt2)))) {
+    stop("subtypes fail to matched from two appraisements.")
+  }
+
+  # check argument
+  if(is.null(subt1.lab) | is.null(subt2.lab)) {
+    stop("label for subtype1 and subtype2 must be both indicated.")
+  }
+
+  # calculate consistency
+  comb.subt <- data.frame(subt1 = paste0("CS", subt1),
+                          subt2 = paste0("CS", subt2),
+                          stringsAsFactors = F)
+
+  tab_classify <- as.data.frame.array(table(comb.subt$subt1,comb.subt$subt2))
+
+  # n.moic <- length(unique(moic.res$clust.res$clust))
+  # if(n.moic < 5) {
+  #   p.fisher <- fisher.test(tab_classify, workspace = 1e9)$p.value
+  # } else {
+  #   message("--using simulated p value in fisher's exact test.")
+  #   p.fisher <- fisher.test(tab_classify, simulate.p.value = T)$p.value
+  # }
+
+  # calculate kappa statistic
+  x <- table(comb.subt$subt1,comb.subt$subt2)
+  nr <- nrow(x); nc <- ncol(x); N <- sum(x)
+  Po <- sum(diag(x))/N; Pe <- sum(rowSums(x) * colSums(x)/N)/N
+  kappa <- (Po - Pe)/(1 - Pe)
+  seK0 <- sqrt(Pe/(N * (1 - Pe)))
+  p.v <- 1 - pnorm(kappa/seK0)
+  p.lab <- ifelse(p.v < 0.001, "P < 0.001", paste0("P = ", format(round(p.v,3), nsmall = 3)))
+
+  # generate consistency table
+  blue   <- "#204F8D"
+  lblue  <- "#498EB9"
+  dwhite <- "#B6D1E8"
+  white  <- "#E6EAF7"
+
+  par(bty="n", mgp = c(2,0.5,0), mar = c(4.1,4.1,4.1,2.1),tcl=-.25, font.main=3)
+  par(xpd=NA)
+  plot(c(0,ncol(tab_classify)),c(0,nrow(tab_classify)),
+       col = "white",
+       xlab = "",xaxt = "n",
+       ylab = "",yaxt = "n")
+  title(paste0("Consistency between ",subt1.lab," and ",subt2.lab,"\nKappa = ", format(round(kappa,3), nsmall = 3),
+               "\n", p.lab),
+        adj = 0, line = 0)
+
+  # add y-axis
+  axis(2, at = 0.5:(nrow(tab_classify)-0.5), labels = FALSE)
+  text(y = 0.5:(nrow(tab_classify)-0.5),
+       par("usr")[1],
+       labels = rownames(tab_classify)[nrow(tab_classify):1],
+       srt = 0, pos = 2, xpd = TRUE)
+  mtext(paste0("Subtypes derived from ", subt1.lab), side = 2, line = 3)
+
+  # add x-axis
+  axis(1, at = 0.5:(ncol(tab_classify)-0.5), labels = FALSE)
+  text(x = 0.5:(ncol(tab_classify)-0.5),
+       par("usr")[1] - 0.2,
+       labels = colnames(tab_classify),
+       srt = 45, pos = 1, xpd = TRUE)
+  mtext(paste0("Subtypes derived from ", subt2.lab), side = 1, line = 3)
+
+  # generate colors
+  input_matrix <- as.matrix(tab_classify)
+  mat.max = max(input_matrix)
+  unq.value <- unique(sort(as.vector(input_matrix)))
+  rbPal <- colorRampPalette(c(white,dwhite,lblue,blue))
+  col.vec <- rbPal(max(unq.value) + 1)
+  col.mat <- matrix(NA,byrow = T,ncol = ncol(input_matrix),nrow = nrow(input_matrix))
+
+  # fill matrix
+  for (i in 1:nrow(col.mat)) {
+    for (j in 1:ncol(col.mat)) {
+      col.mat[i,j] <- col.vec[input_matrix[i,j] + 1]
+    }
+  }
+
+  # generate heatmap
+  x_size <- ncol(input_matrix)
+  y_size <- nrow(input_matrix)
+
+  my_xleft = rep(c(0:(x_size-1)),each = x_size)
+  my_xright = my_xleft + 1
+  my_ybottom = rep(c((y_size-1):0),y_size)
+  my_ytop = my_ybottom + 1
+  rect(xleft = my_xleft,
+       ybottom = my_ybottom,
+       xright = my_xright,
+       ytop = my_ytop,
+       col=col.mat,
+       border = F)
+
+  # fill count
+  text(my_xleft + 0.5,my_ybottom + 0.5,input_matrix, cex = 1.3)
+
+  # output to pdf
+  outFig <- paste0(fig.name,".pdf")
+  invisible(dev.copy2pdf(file = file.path(fig.path, outFig), width = width, height = height))
+}