[494cbf]: / R / compMut.R

Download this file

248 lines (224 with data), 12.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#' @name compMut
#' @title Comparison of mutational frequency
#' @description This function is used to compare mutational frequency among different multi-omics integerative clusters to test the independency between subtypes and mutational status. An oncoprint will be also generated with significant mutations.
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
#' @param mut.matrix A binary matrix storing binary mutation data with entries of 0 and 1 only.
#' @param test.method A string value to indicate statistical method for independency testing. Allowed values contain c('fisher', 'chisq'); fisher by default.
#' @param freq.cutoff A numeric value to indicate the frequency cutoff for mutation data. Specifically, only features that mutated in over than such proportion would be included in testing; 0.05 by default.
#' @param p.adj.method A string value to indicate the correction method for multiple comparision. Allowed values contain c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr'); BH by default.
#' @param tab.name A string value to indicate the name of the output table.
#' @param res.path A string value to indicate the path for saving the table.
#' @param doWord A logic value to indicate if transformating the .txt outfile to a .docx WORD file (.txt file will be also kept); TRUE by default.
#' @param doPlot A logic value to indicate if generating oncoprint; TRUE by default.
#' @param innerclust A logic value to indicate if perform clustering within each subtype; TRUE by default.
#' @param fig.path A string value to indicate the output path for storing the oncoprint.
#' @param fig.name A string value to indicate the name of the oncoprint.
#' @param annCol A data.frame storing annotation information for samples.
#' @param annColors A list of string vectors for colors matched with annCol.
#' @param clust.col A string vector storing colors for annotating each subtype.
#' @param mut.col A string vector to indicate the mutation color for oncoprint.
#' @param bg.col A string vector to indicate the background color for oncoprint.
#' @param p.cutoff A numeric value to indicate the nominal p value cutoff for significant mutations shown in the oncoprint; 0.05 by default.
#' @param p.adj.cutoff A numeric value to indicate the adjusted p value cutoff for significant mutations shown in the oncoprint; 0.05 by default.
#' @param width A numeric value to indicate the width of output figure.
#' @param height A numeric value to indicate the height of output figure.
#' @export
#' @return A figure of mutational oncoprint (.pdf) if \code{doPlot = TRUE}, a data.frame storing the difference of mutational frequency among different subtypes and a corresponding table in WORD format if \code{doWord = TRUE}.
#' @importFrom dplyr %>%
#' @importFrom grid grid.rect gpar
#' @importFrom officer read_docx body_add_par body_add_table body_add_par
#' @importFrom ComplexHeatmap HeatmapAnnotation oncoPrint draw
#' @references Gu Z, Eils R, Schlesner M (2016). Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics, 32(18):2847–2849.
#' @examples # There is no example and please refer to vignette.
compMut <- function(moic.res = NULL,
mut.matrix = NULL,
freq.cutoff = 0.05,
test.method = "fisher",
p.adj.method = "BH",
doWord = TRUE,
doPlot = TRUE,
innerclust = TRUE,
res.path = getwd(),
tab.name = NULL,
fig.path = getwd(),
fig.name = NULL,
annCol = NULL,
annColors = NULL,
mut.col = "#21498D",
bg.col = "#dcddde",
p.cutoff = 0.05,
p.adj.cutoff = 0.05,
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
width = 8,
height = 4) {
if(!is.element(test.method, c("fisher","chisq"))) {
stop("test.method for independency can be one of fisher or chisq.\n")
}
if(!is.element(p.adj.method, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"))) {
stop("p.adj.method can be one of holm, hochberg, hommel, bonferroni, BH, BY, fdr.\n")
}
create.anntrack <- function(samples=NULL, subtype=NULL, typesToPlot=NULL) {
if (is.null(samples)) {stop("samples can not be NULL!")}
if (length(samples)!=length(subtype)) {stop("samples and subtype do not have equal length!")}
if (is.null(typesToPlot)) {
typesToPlot <- levels(factor(unique(subtype)))
}
names(subtype) <- samples
return(list(subtype=subtype, typesToPlot=typesToPlot))
}
createMutSubtype <- function(indata=NULL, samples=NULL, genename=NULL) {
if (!is.element(genename, rownames(indata))) { stop (paste(genename, "not found in indata!", sep=" ")) }
comsam <- intersect(colnames(indata), samples)
out <- rep("Not Available", times=length(samples))
names(out) <- samples
out[comsam] <- as.character(indata[genename, comsam])
out[is.na(out)] <- "Not Available"
out[out == "0"] <- "Normal"
out[out == "1"] <- "Mutated"
return(out)
}
clust.res <- moic.res$clust.res
clust.res <- clust.res[order(clust.res$clust,decreasing = FALSE), , drop = FALSE]
comsam <- intersect(clust.res$samID, colnames(mut.matrix))
clust.res <- clust.res[comsam, , drop = FALSE]
mut.matrix <- mut.matrix[,comsam]
n.moic <- length(unique(clust.res$clust))
# check data
if(length(comsam) == nrow(moic.res$clust.res)) {
message("--all samples matched.")
} else {
message("--",paste0((nrow(moic.res$clust.res)-length(comsam))," samples mismatched from current subtypes."))
}
ans <- rep(paste0("CS",1:n.moic),as.numeric(table(clust.res$clust))); names(ans) <- clust.res$samID
genelist <- rownames(mut.matrix[rowSums(mut.matrix) > freq.cutoff * nrow(clust.res),])
binarymut <- as.data.frame(matrix(0, nrow=nrow(clust.res), ncol=length(genelist)))
rownames(binarymut) <- names(ans)
colnames(binarymut) <- genelist
for (k in 1:length(genelist)) {
res <- create.anntrack(samples=names(ans), subtype=createMutSubtype(mut.matrix, names(ans), genelist[k]))
binarymut[, genelist[k]] <- res$subtype
}
out <- matrix(0, nrow=length(genelist), ncol=n.moic + 1)
colnames(out) <- c(levels(factor(ans)), "pvalue")
rownames(out) <- paste(genelist, "Mutated", sep="_")
for (k in 1:length(genelist)) {
genek <- genelist[k]
x <- ans
y <- binarymut[names(x), genek, drop = TRUE]; y <- as.character(y); names(y) <- names(x)
tmp <- setdiff(names(x), names(y)[y=="Not Available"])
x <- x[tmp]
y <- y[tmp]
res <- table(y, x)
if (!all(colnames(res)==colnames(out)[1:(ncol(out)-1)])) {stop(paste("colnames mismatch for ", k, sep=""))}
pct <- paste0("(",format(round(res["Mutated", ]/as.numeric(table(clust.res$clust)) * 100, 1), digits = 3),"%)")
freqpct <- paste(res["Mutated", ], pct, sep = " ")
out[k, 1:(ncol(out)-1)] <- freqpct
if(test.method == "fisher") {
out[k, "pvalue"] <- as.numeric(fisher.test(x, y, workspace = 1e8)$p.value)
} else {
out[k, "pvalue"] <- as.numeric(chisq.test(x, y)$p.value)
}
}
out <- as.data.frame(out)
out$pvalue <- formatC(as.numeric(as.character(out$pvalue)), format = "e", digits = 2)
out$padj <- formatC(p.adjust(as.numeric(out$pvalue), method = p.adj.method), format = "e", digits = 2)
if(is.null(tab.name)) {
outFile <- "Independent test between subtype and mutation.txt"
} else {
outFile <- paste0(tab.name,".txt")
}
tmb <- rowSums(mut.matrix[genelist,])
pct <- paste0("(",format(round(tmb/ncol(mut.matrix) * 100, 1), digits = 1),"%)")
freqpct <- paste(tmb, pct, sep = " ")
out <- cbind.data.frame(data.frame("Gene (Mutated)" = gsub("_Mutated","",rownames(out)),
"TMB" = freqpct,
check.names = FALSE),
out)
rownames(out) <- NULL
# message("show heading...")
# print(head(out))
write.table(out, file.path(res.path, outFile), row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)
# generate WORD format
if(doWord){
comtable <- out
title_name <- paste0("Table *. ", gsub(".txt", "", outFile, fixed = TRUE))
mynote <- "Note: ..."
my_doc <- officer::read_docx()
my_doc %>%
officer::body_add_par(value = title_name, style = "table title") %>%
officer::body_add_table(value = comtable, style = "table_template") %>%
officer::body_add_par(value = mynote) %>%
print(target = file.path(res.path,paste0("TABLE ", gsub(".txt", "", outFile, fixed = TRUE), ".docx")))
}
# generate mutation oncoprint
if(doPlot) {
if(is.null(fig.name)) {
outFig <- paste0("oncoprint for mutations with frequency over than " , freq.cutoff * 100, " pct.pdf")
} else {
outFig <- paste0(fig.name,".pdf")
}
sam.order <- moic.res$clust.res[order(moic.res$clust.res$clust, decreasing = FALSE), "samID"]
colvec <- clust.col[1:length(unique(moic.res$clust.res$clust))]
names(colvec) <- paste0("CS",unique(moic.res$clust.res$clust))
if(!is.null(annCol) & !is.null(annColors)) {
annCol <- annCol[sam.order, , drop = FALSE]
annCol$Subtype <- paste0("CS",moic.res$clust.res[sam.order,"clust"])
annColors[["Subtype"]] <- colvec
} else {
annCol <- data.frame("Subtype" = paste0("CS",moic.res$clust.res[sam.order,"clust"]),
row.names = sam.order)
annColors <- list("Subtype" = colvec)
}
sig.mut <- as.character(out[which(as.numeric(out$pvalue) < p.cutoff & as.numeric(out$padj) < p.adj.cutoff), "Gene (Mutated)"])
onco_dat <- t(binarymut[rownames(annCol), sig.mut, drop = FALSE])
onco_dat[onco_dat == "Normal"] <- ""; onco_dat <- as.data.frame(onco_dat)
alter_fun = list(
background = function(x, y, w, h) {
grid::grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = bg.col, col = NA))
},
Mutated = function(x, y, w, h) {
grid::grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = mut.col, col = NA))
})
col = c("Mutated" = mut.col)
if(innerclust) {
sam.reorder <- c()
for (i in 1:n.moic) {
sam <- moic.res$clust.res[which(moic.res$clust.res$clust == i), "samID"]
tmp <- quiet(ComplexHeatmap::oncoPrint(onco_dat[,sam], get_type = function(x) x,
alter_fun = alter_fun, col = col,
remove_empty_columns = FALSE,
show_pct = FALSE,
bottom_annotation = NULL,
top_annotation = NULL,
show_heatmap_legend = FALSE))
sam.reorder <- c(sam.reorder, sam[tmp@column_order])
}
my_annotation = ComplexHeatmap::HeatmapAnnotation(df = annCol[sam.reorder, , drop = FALSE], col = annColors)
p <- quiet(ComplexHeatmap::oncoPrint(onco_dat[,sam.reorder], get_type = function(x) x,
alter_fun = alter_fun, col = col,
remove_empty_columns = FALSE,
column_order = sam.reorder,
show_pct = TRUE,
bottom_annotation = my_annotation,
top_annotation = NULL,
show_heatmap_legend = FALSE))
} else {
my_annotation = ComplexHeatmap::HeatmapAnnotation(df = annCol, col = annColors)
p <- quiet(ComplexHeatmap::oncoPrint(onco_dat, get_type = function(x) x,
alter_fun = alter_fun, col = col,
remove_empty_columns = FALSE,
column_order = colnames(onco_dat),
show_pct = TRUE,
bottom_annotation = my_annotation,
top_annotation = NULL,
show_heatmap_legend = FALSE))
}
# save to pdf
pdf(file.path(fig.path, outFig), width = width, height = height)
draw(p)
invisible(dev.off())
# print to screen
draw(p)
}
return(out)
}