Diff of /R/compAgree.R [000000] .. [494cbf]

Switch to unified view

a b/R/compAgree.R
1
#' @name compAgree
2
#' @title Comparison of agreement between two subtypes
3
#' @description Compute the Rand Index, Jaccard Index, Fowlkes-Mallows, and Normalized Mutual Information for agreement of two partitions, and generate alluvial diagrams for visualization.
4
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
5
#' @param subt2comp A data.frame of subtypes that need to compare with current subtype with rownames for samples and columns for other subtypes.
6
#' @param doPlot A logic value to indicate if generating alluvial diagram to show the agreement of different subtypes; TRUE by default.
7
#' @param box.width A numeric valur to indicate the width for box in alluvial diagram.
8
#' @param clust.col A string vector storing colors for each cluster.
9
#' @param width A numeric value to indicate the width of alluvial diagram.
10
#' @param height A numeric value to indicate the height of alluvial diagram.
11
#' @param fig.path A string value to indicate the output path for storing the figure.
12
#' @param fig.name A string value to indicate the name of the figure.
13
#' @return A figure of agreement (.pdf) if \code{doPlot = TRUE} and a data.frame storing four agreement measurements, including Rand Index (RI), Adjusted Mutual Information (AMI), Jaccard Index (JI), and Fowlkes-Mallows (FM).
14
#' @export
15
#' @import ggplot2
16
#' @import ggalluvial
17
#' @importFrom ggalluvial StatStratum
18
#' @importFrom dplyr group_by tally %>%
19
#' @importFrom cowplot plot_grid
20
#' @importFrom flexclust comPart
21
#' @importFrom aricode AMI ARI
22
#' @importFrom reshape2 melt
23
#' @examples # There is no example and please refer to vignette.
24
compAgree <- function(moic.res  = NULL,
25
                      subt2comp = NULL,
26
                      doPlot    = TRUE,
27
                      clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
28
                      box.width = 0.1,
29
                      fig.name  = NULL,
30
                      fig.path  = getwd(),
31
                      width     = 6,
32
                      height    = 5) {
33
  dat <- moic.res$clust.res
34
  colnames(dat)[2] <- "Subtype"
35
  dat$Subtype <- paste0("CS", dat$Subtype)
36
  comsam <- intersect(dat$samID,rownames(subt2comp))
37
38
  # check data
39
  if(length(comsam) == nrow(dat)) {
40
    message("--all samples matched.")
41
  } else {
42
    message(paste0("--",(nrow(dat)-length(comsam))," samples mismatched from current subtypes."))
43
  }
44
  dat <- cbind.data.frame("Subtype" = dat[comsam, "Subtype", drop = FALSE], subt2comp[comsam, , drop = FALSE])
45
  dat <- as.data.frame(na.omit(dat))
46
  if(nrow(dat) != nrow(moic.res$clust.res)) {message("--removed NA values in subt2comp.")}
47
48
  var <- colnames(dat)
49
  n.var <- length(var)
50
  if(n.var > 6) {stop("please indicate less than 6 subtypes (including current subtypes) that need to compare.")}
51
52
  # generate comparsion table
53
  outTab <- NULL
54
  c1 <- as.vector(as.numeric(factor(dat[,1])))
55
  for (i in 2:ncol(dat)) {
56
    #cat(paste0("Compare ", colnames(dat)[1]," with ", colnames(dat)[i],".\n"))
57
58
    c2 <- as.vector(as.numeric(factor(dat[,i])))
59
60
    # calculate Rand Index
61
    RI <- flexclust::comPart(c1, c2, type = c('RI'))
62
63
    # calculate Adjusted Mutual Information
64
    AMI <- aricode::AMI(c1, c2)
65
66
    # calculate Jaccard Index
67
    JI <- flexclust::comPart(c1, c2, type = c('J'))
68
69
    # calculate Fowlkes-Mallows
70
    FM <- flexclust::comPart(c1, c2, type = c('FM'))
71
72
    outTab <- rbind.data.frame(outTab,data.frame(current.subtype = colnames(dat)[1],
73
                                                 other.subtype = colnames(dat)[i],
74
                                                 RI = as.numeric(RI),
75
                                                 AMI = as.numeric(AMI),
76
                                                 JI = as.numeric(JI),
77
                                                 FM = as.numeric(FM),
78
                                                 stringsAsFactors = FALSE),
79
                               stringsAsFactors = FALSE)
80
  }
81
82
  assign("StatStratum", ggalluvial::StatStratum, envir=globalenv())
83
84
  if(doPlot) {
85
86
    if(is.null(fig.name)) {
87
      outFig <- "Agreement between current subtype and other classifications.pdf"
88
    } else {
89
      outFig <- paste0(fig.name,".pdf")
90
    }
91
92
    # generate barplot for agreement
93
    agreement <- reshape2::melt(outTab[,2:ncol(outTab)], id.vars = "other.subtype", variable.name = "Method")
94
    b <- ggplot(data = agreement, aes(x = Method, y = value, fill = other.subtype)) +
95
      geom_bar(stat = "identity", position = position_dodge()) +
96
      scale_fill_brewer(palette = "Set1") + ggplot2::labs(x = "", y = "Scalar") +
97
      scale_y_continuous(limits = c(0,1), expand = c(0, 0)) +
98
      theme_bw() +
99
      theme(legend.position = "top",
100
            legend.title = element_blank(),
101
            panel.grid = element_blank(),
102
            axis.ticks = element_blank(),
103
            axis.text.x = element_text(color = "black", size = 12, face = "bold", vjust = -5),
104
            axis.text.y = element_text(color = "black", size = 12, face = "bold"),
105
            axis.title.x = element_text(color = "black", size = 12, face = "bold"),
106
            axis.title.y = element_text(color = "black", size = 12, face = "bold")) +
107
      ggtitle("")
108
109
    # generate alluvial diagram
110
    col = clust.col[1:length(unique(dat$Subtype))]
111
    var1 <- var[1]
112
113
    # 1 subtype
114
    if(n.var == 2) {
115
      var2 <- var[2]
116
      subdf <- dat[,1:n.var]
117
      colnames(subdf) <- c("Subtype",paste0("Subtype",1:(n.var-1)))
118
      subdf <- subdf %>% group_by(Subtype, Subtype1) %>% tally(name = "Freq") %>% as.data.frame()
119
120
      p <- ggplot(subdf,
121
                  aes(y = Freq,
122
                      axis1 = Subtype,
123
                      axis2 = Subtype1)) +
124
        scale_fill_manual(values = col) +
125
        geom_flow(stat = "alluvium", width = 1/8, aes(fill = Subtype)) +
126
        geom_stratum(width = 1/8, reverse = TRUE) +
127
        geom_text(stat = "stratum", aes(label = after_stat(stratum)), reverse = TRUE) +
128
        scale_x_continuous(breaks = 1:n.var, labels = c(var1, var2)) +
129
        theme_bw() +
130
        theme(legend.position = "top",
131
              legend.title = element_blank(),
132
              panel.grid = element_blank(),
133
              panel.border = element_blank(),
134
              axis.title.x = element_blank(),
135
              axis.title.y = element_blank(),
136
              axis.text.y = element_blank(),
137
              axis.ticks = element_blank(),
138
              axis.text.x = element_text(size = 12, face = "bold", color = "black")) +
139
        ggtitle("")
140
    }
141
142
    # 2 subtypes
143
    if(n.var == 3) {
144
      var2 <- var[2]
145
      var3 <- var[3]
146
      subdf <- dat[,1:n.var]
147
      colnames(subdf) <- c("Subtype",paste0("Subtype",1:(n.var-1)))
148
      subdf <- subdf %>% group_by(Subtype, Subtype1, Subtype2) %>% tally(name = "Freq") %>% as.data.frame()
149
150
      p <- ggplot(subdf,
151
                  aes(y = Freq,
152
                      axis1 = Subtype,
153
                      axis2 = Subtype1,
154
                      axis3 = Subtype2)) +
155
        scale_fill_manual(values = col) +
156
        geom_flow(stat = "alluvium", width = 1/8, aes(fill = Subtype)) +
157
        geom_stratum(width = box.width, reverse = TRUE) +
158
        geom_text(stat = "stratum", aes(label = after_stat(stratum)), reverse = TRUE) +
159
        scale_x_continuous(breaks = 1:n.var, labels = c(var1, var2, var3)) +
160
        theme_bw() +
161
        theme(legend.position = "top",
162
              legend.title = element_blank(),
163
              panel.grid = element_blank(),
164
              panel.border = element_blank(),
165
              axis.title.x = element_blank(),
166
              axis.title.y = element_blank(),
167
              axis.text.y = element_blank(),
168
              axis.ticks = element_blank(),
169
              axis.text.x = element_text(size = 12, face = "bold", color = "black")) +
170
        ggtitle("")
171
    }
172
173
    # 3 subtypes
174
    if(n.var == 4) {
175
      var2 <- var[2]
176
      var3 <- var[3]
177
      var4 <- var[4]
178
      subdf <- dat[,1:n.var]
179
      colnames(subdf) <- c("Subtype",paste0("Subtype",1:(n.var-1)))
180
      subdf <- subdf %>% group_by(Subtype, Subtype1, Subtype2, Subtype3) %>% tally(name = "Freq") %>% as.data.frame()
181
182
      p <- ggplot(subdf,
183
                  aes(y = Freq,
184
                      axis1 = Subtype,
185
                      axis2 = Subtype1,
186
                      axis3 = Subtype2,
187
                      axis4 = Subtype3)) +
188
        scale_fill_manual(values = col) +
189
        geom_flow(stat = "alluvium", width = 1/8, aes(fill = Subtype)) +
190
        geom_stratum(width = 1/8, reverse = TRUE) +
191
        geom_text(stat = "stratum", aes(label = after_stat(stratum)), reverse = TRUE) +
192
        scale_x_continuous(breaks = 1:n.var, labels = c(var1, var2, var3, var4)) +
193
        theme_bw() +
194
        theme(legend.position = "top",
195
              legend.title = element_blank(),
196
              panel.grid = element_blank(),
197
              panel.border = element_blank(),
198
              axis.title.x = element_blank(),
199
              axis.title.y = element_blank(),
200
              axis.text.y = element_blank(),
201
              axis.ticks = element_blank(),
202
              axis.text.x = element_text(size = 12, face = "bold", color = "black")) +
203
        ggtitle("")
204
    }
205
206
    # 4 subtypes
207
    if(n.var == 5) {
208
      var2 <- var[2]
209
      var3 <- var[3]
210
      var4 <- var[4]
211
      var5 <- var[5]
212
      subdf <- dat[,1:n.var]
213
      colnames(subdf) <- c("Subtype",paste0("Subtype",1:(n.var-1)))
214
      subdf <- subdf %>% group_by(Subtype, Subtype1, Subtype2, Subtype3, Subtype4) %>% tally(name = "Freq") %>% as.data.frame()
215
216
      p <- ggplot(subdf,
217
                  aes(y = Freq,
218
                      axis1 = Subtype,
219
                      axis2 = Subtype1,
220
                      axis3 = Subtype2,
221
                      axis4 = Subtype3,
222
                      axis5 = Subtype4)) +
223
        scale_fill_manual(values = col) +
224
        geom_flow(stat = "alluvium", width = 1/8, aes(fill = Subtype)) +
225
        geom_stratum(width = 1/8, reverse = TRUE) +
226
        geom_text(stat = "stratum", aes(label = after_stat(stratum)), reverse = TRUE) +
227
        scale_x_continuous(breaks = 1:n.var, labels = c(var1, var2, var3, var4, var5)) +
228
        theme_bw() +
229
        theme(legend.position = "top",
230
              legend.title = element_blank(),
231
              panel.grid = element_blank(),
232
              panel.border = element_blank(),
233
              axis.title.x = element_blank(),
234
              axis.title.y = element_blank(),
235
              axis.text.y = element_blank(),
236
              axis.ticks = element_blank(),
237
              axis.text.x = element_text(size = 12, face = "bold", color = "black")) +
238
        ggtitle("")
239
    }
240
241
    # 5 subtypes
242
    if(n.var == 6) {
243
      var2 <- var[2]
244
      var3 <- var[3]
245
      var4 <- var[4]
246
      var5 <- var[5]
247
      var6 <- var[6]
248
      subdf <- dat[,1:n.var]
249
      colnames(subdf) <- c("Subtype",paste0("Subtype",1:(n.var-1)))
250
      subdf <- subdf %>% group_by(Subtype, Subtype1, Subtype2, Subtype3, Subtype4, Subtype5) %>% tally(name = "Freq") %>% as.data.frame()
251
252
      p <- ggplot(subdf,
253
                  aes(y = Freq,
254
                      axis1 = Subtype,
255
                      axis2 = Subtype1,
256
                      axis3 = Subtype2,
257
                      axis4 = Subtype3,
258
                      axis5 = Subtype4,
259
                      axis6 = Subtype5)) +
260
        scale_fill_manual(values = col) +
261
        geom_flow(stat = "alluvium", width = 1/8, aes(fill = Subtype)) +
262
        geom_stratum(width = 1/8, reverse = TRUE) +
263
        geom_text(stat = "stratum", aes(label = after_stat(stratum)), reverse = TRUE) +
264
        scale_x_continuous(breaks = 1:n.var, labels = c(var1, var2, var3, var4, var5, var6)) +
265
        theme_bw() +
266
        theme(legend.position = "top",
267
              legend.title = element_blank(),
268
              panel.grid = element_blank(),
269
              panel.border = element_blank(),
270
              axis.title.x = element_blank(),
271
              axis.title.y = element_blank(),
272
              axis.text.y = element_blank(),
273
              axis.ticks = element_blank(),
274
              axis.text.x = element_text(size = 12, face = "bold", color = "black")) +
275
        ggtitle("")
276
    }
277
    agree <- list(b,p)
278
    bp <- plot_grid(plotlist = agree, ncol = 2)
279
280
    # save to pdf
281
    ggsave(file.path(fig.path,outFig), width = width, height = height)
282
283
    # output to screen
284
    print(bp)
285
  }
286
  return(outTab)
287
}