[494cbf]: / R / runKappa.R

Download this file

142 lines (123 with data), 5.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#' @name runKappa
#' @title Run consistency evaluation using Kappa statistic
#' @description Calculate Kappa statistic to measure the consistency between two appraisements
#' @param subt1 A numeric vector to indicate the first appraisement. Order should be exactly the same with subt2 for each sample.
#' @param subt2 A numeric vector to indicate the second appraisement. Order should be exactly the same with subt1 for each sample.
#' @param subt1.lab A string value to indicate the label of the first subtype.
#' @param subt2.lab A string value to indicate the label of the second subtype.
#' @param fig.path A string value to indicate the output path for storing the consistency heatmap.
#' @param fig.name A string value to indicate the name of the consistency heatmap.
#' @param width A numeric value to indicate the width of output figure.
#' @param height A numeric value to indicate the height of output figure.
#' @details This function evaluates the consistency between two appraisements that targets to the same cohort.
#' For example, the NTP-predicted subtype amd PAM-predicted subtype of external cohort, or the current subtype and predicted subtype of discovery cohort.
#' Therefore, the arguments `subt1` and `subt2` can be the `clust` column of `clust.res` derived from `getMOIC()` with one specified algorithm
#' or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms or `runNTP()` or `runPAM()`.
#' However, subtypes identified from different algorithm (i.e., `get\%algorithm_name1\%` and `get\%algorithm_name2\%`) can not be evaluated
#' because the subtype 1 identified from the first algorithm may not be the same subtype 1 from the second algorithm.
#' @return A figure of consistency heatmap (.pdf).
#' @export
#' @importFrom grDevices dev.copy2pdf
#' @examples # There is no example and please refer to vignette.
runKappa <- function(subt1 = NULL,
subt2 = NULL,
subt1.lab = NULL,
subt2.lab = NULL,
fig.path = getwd(),
fig.name = "constheatmap",
width = 5,
height = 5) {
subt1 <- as.vector(as.character(subt1))
subt2 <- as.vector(as.character(subt2))
# check sample
if(length(subt1) != length(subt2)) {
stop("subtypes identified from different cohorts.")
}
# check subtype
if(!identical(sort(unique(subt1)),sort(unique(subt2)))) {
stop("subtypes fail to matched from two appraisements.")
}
# check argument
if(is.null(subt1.lab) | is.null(subt2.lab)) {
stop("label for subtype1 and subtype2 must be both indicated.")
}
# calculate consistency
comb.subt <- data.frame(subt1 = paste0("CS", subt1),
subt2 = paste0("CS", subt2),
stringsAsFactors = F)
tab_classify <- as.data.frame.array(table(comb.subt$subt1,comb.subt$subt2))
# n.moic <- length(unique(moic.res$clust.res$clust))
# if(n.moic < 5) {
# p.fisher <- fisher.test(tab_classify, workspace = 1e9)$p.value
# } else {
# message("--using simulated p value in fisher's exact test.")
# p.fisher <- fisher.test(tab_classify, simulate.p.value = T)$p.value
# }
# calculate kappa statistic
x <- table(comb.subt$subt1,comb.subt$subt2)
nr <- nrow(x); nc <- ncol(x); N <- sum(x)
Po <- sum(diag(x))/N; Pe <- sum(rowSums(x) * colSums(x)/N)/N
kappa <- (Po - Pe)/(1 - Pe)
seK0 <- sqrt(Pe/(N * (1 - Pe)))
p.v <- 1 - pnorm(kappa/seK0)
p.lab <- ifelse(p.v < 0.001, "P < 0.001", paste0("P = ", format(round(p.v,3), nsmall = 3)))
# generate consistency table
blue <- "#204F8D"
lblue <- "#498EB9"
dwhite <- "#B6D1E8"
white <- "#E6EAF7"
par(bty="n", mgp = c(2,0.5,0), mar = c(4.1,4.1,4.1,2.1),tcl=-.25, font.main=3)
par(xpd=NA)
plot(c(0,ncol(tab_classify)),c(0,nrow(tab_classify)),
col = "white",
xlab = "",xaxt = "n",
ylab = "",yaxt = "n")
title(paste0("Consistency between ",subt1.lab," and ",subt2.lab,"\nKappa = ", format(round(kappa,3), nsmall = 3),
"\n", p.lab),
adj = 0, line = 0)
# add y-axis
axis(2, at = 0.5:(nrow(tab_classify)-0.5), labels = FALSE)
text(y = 0.5:(nrow(tab_classify)-0.5),
par("usr")[1],
labels = rownames(tab_classify)[nrow(tab_classify):1],
srt = 0, pos = 2, xpd = TRUE)
mtext(paste0("Subtypes derived from ", subt1.lab), side = 2, line = 3)
# add x-axis
axis(1, at = 0.5:(ncol(tab_classify)-0.5), labels = FALSE)
text(x = 0.5:(ncol(tab_classify)-0.5),
par("usr")[1] - 0.2,
labels = colnames(tab_classify),
srt = 45, pos = 1, xpd = TRUE)
mtext(paste0("Subtypes derived from ", subt2.lab), side = 1, line = 3)
# generate colors
input_matrix <- as.matrix(tab_classify)
mat.max = max(input_matrix)
unq.value <- unique(sort(as.vector(input_matrix)))
rbPal <- colorRampPalette(c(white,dwhite,lblue,blue))
col.vec <- rbPal(max(unq.value) + 1)
col.mat <- matrix(NA,byrow = T,ncol = ncol(input_matrix),nrow = nrow(input_matrix))
# fill matrix
for (i in 1:nrow(col.mat)) {
for (j in 1:ncol(col.mat)) {
col.mat[i,j] <- col.vec[input_matrix[i,j] + 1]
}
}
# generate heatmap
x_size <- ncol(input_matrix)
y_size <- nrow(input_matrix)
my_xleft = rep(c(0:(x_size-1)),each = x_size)
my_xright = my_xleft + 1
my_ybottom = rep(c((y_size-1):0),y_size)
my_ytop = my_ybottom + 1
rect(xleft = my_xleft,
ybottom = my_ybottom,
xright = my_xright,
ytop = my_ytop,
col=col.mat,
border = F)
# fill count
text(my_xleft + 0.5,my_ybottom + 0.5,input_matrix, cex = 1.3)
# output to pdf
outFig <- paste0(fig.name,".pdf")
invisible(dev.copy2pdf(file = file.path(fig.path, outFig), width = width, height = height))
}