|
a |
|
b/R/runMarker.R |
|
|
1 |
#' @name runMarker |
|
|
2 |
#' @title Run identification of unique biomarkers |
|
|
3 |
#' @description This function aims to identify uniquely and significantly expressed (overexpressed or downexpressed) biomarkers for each subtype identified by multi-omics integrative clustering algorithms. Top markers will be chosen to generate a template so as to run nearest template prediction for subtype verification. |
|
|
4 |
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms. |
|
|
5 |
#' @param dea.method A string value to indicate the algorithm for differential expression analysis. Allowed value contains c('deseq2', 'edger', 'limma'). |
|
|
6 |
#' @param dat.path A string value to indicate the path for saving the files of differential expression analysis. |
|
|
7 |
#' @param res.path A string value to indicate the path for saving the results for identifying subtype-specific markers. |
|
|
8 |
#' @param p.cutoff A numeric value to indicate the nominal p value for identifying significant markers; pvalue < 0.05 by default. |
|
|
9 |
#' @param p.adj.cutoff A numeric value to indicate the adjusted p value for identifying significant markers; padj < 0.05 by default. |
|
|
10 |
#' @param dirct A string value to indicate the direction of identifying significant marker. Allowed values contain c('up', 'down'); `up` means up-regulated marker, and `down` means down-regulated marker. |
|
|
11 |
#' @param n.marker A integer value to indicate how many top markers sorted by log2fc should be identified for each subtype; 200 by default. |
|
|
12 |
#' @param norm.expr A matrix of normalized expression data with rows for genes and columns for samples; FPKM or TPM without log2 transformation is recommended. |
|
|
13 |
#' @param prefix A string value to indicate the prefix of differential expression file (use for searching files). |
|
|
14 |
#' @param doplot A logic value to indicate if generating heatmap by using subtype-specific markers. TRUE by default. |
|
|
15 |
#' @param annCol A data.frame storing annotation information for samples. |
|
|
16 |
#' @param annColors A list of string vectors for colors matched with annCol. |
|
|
17 |
#' @param clust.col A string vector storing colors for annotating each subtype at the top of heatmap. |
|
|
18 |
#' @param halfwidth A numeric vector to assign marginal cutoff for truncating values in data; 3 by default. |
|
|
19 |
#' @param centerFlag A logical vector to indicate if expression data should be centered; TRUE by default. |
|
|
20 |
#' @param scaleFlag A logical vector to indicate if expression data should be scaled; TRUE by default. |
|
|
21 |
#' @param color A string vector storing colors for heatmap. |
|
|
22 |
#' @param fig.path A string value to indicate the output path for storing the marker heatmap. |
|
|
23 |
#' @param fig.name A string value to indicate the name of the marker heatmap. |
|
|
24 |
#' @param width A numeric value to indicate the width of output figure. |
|
|
25 |
#' @param height A numeric value to indicate the height of output figure. |
|
|
26 |
#' @param show_rownames A logic value to indicate if showing rownames (feature names) in heatmap; FALSE by default. |
|
|
27 |
#' @param show_colnames A logic value to indicate if showing colnames (sample ID) in heatmap; FALSE by default. |
|
|
28 |
#' @param ... Additional parameters pass to \link[ComplexHeatmap]{pheatmap}. |
|
|
29 |
#' @return A figure of subtype-specific marker heatmap (.pdf) if \code{doPlot = TRUE} and a list with the following components: |
|
|
30 |
#' |
|
|
31 |
#' \code{unqlist} a string vector storing the unique marker across all subtypes. |
|
|
32 |
#' |
|
|
33 |
#' \code{templates} a data.frame storing the the template information for nearest template prediction, which is used for verification in external cohort. |
|
|
34 |
#' |
|
|
35 |
#' \code{dirct} a string value indicating the direction for identifying subtype-specific markers. |
|
|
36 |
#' |
|
|
37 |
#' \code{heatmap} a complexheatmap object. |
|
|
38 |
#' @export |
|
|
39 |
#' @importFrom grDevices pdf dev.off colorRampPalette |
|
|
40 |
#' @importFrom ComplexHeatmap pheatmap draw |
|
|
41 |
#' @references Gu Z, Eils R, Schlesner M (2016). Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics, 32(18):2847-2849. |
|
|
42 |
#' @examples # There is no example and please refer to vignette. |
|
|
43 |
runMarker <- function(moic.res = NULL, |
|
|
44 |
dea.method = c("deseq2", "edger", "limma"), |
|
|
45 |
prefix = NULL, |
|
|
46 |
dat.path = getwd(), |
|
|
47 |
res.path = getwd(), |
|
|
48 |
p.cutoff = 0.05, |
|
|
49 |
p.adj.cutoff = 0.05, |
|
|
50 |
dirct = "up", |
|
|
51 |
n.marker = 200, |
|
|
52 |
doplot = TRUE, |
|
|
53 |
norm.expr = NULL, |
|
|
54 |
annCol = NULL, |
|
|
55 |
annColors = NULL, |
|
|
56 |
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"), |
|
|
57 |
halfwidth = 3, |
|
|
58 |
centerFlag = TRUE, |
|
|
59 |
scaleFlag = TRUE, |
|
|
60 |
show_rownames = FALSE, |
|
|
61 |
show_colnames = FALSE, |
|
|
62 |
color = c("#5bc0eb", "black", "#ECE700"), |
|
|
63 |
fig.path = getwd(), |
|
|
64 |
fig.name = NULL, |
|
|
65 |
width = 8, |
|
|
66 |
height = 8, |
|
|
67 |
...) { |
|
|
68 |
|
|
|
69 |
n.moic <- length(unique(moic.res$clust.res$clust)) |
|
|
70 |
mo.method <- moic.res$mo.method |
|
|
71 |
DEpattern <- paste(mo.method, "_", ifelse(is.null(prefix),"",paste0(prefix,"_")), dea.method, ".*._vs_Others.txt$", sep="") |
|
|
72 |
DEfiles <- dir(dat.path,pattern = DEpattern) |
|
|
73 |
|
|
|
74 |
if(length(DEfiles) == 0) {stop("no DEfiles!")} |
|
|
75 |
if(length(DEfiles) != n.moic) {stop("not all the multi-omics clusters have DEfile!")} |
|
|
76 |
if (!is.element(dirct, c("up", "down"))) {stop( "dirct type error! Allowed value contains c('up', 'down').") } |
|
|
77 |
|
|
|
78 |
if(dirct == "up") { outlabel <- "unique_upexpr_marker.txt" } |
|
|
79 |
if(dirct == "down") { outlabel <- "unique_downexpr_marker.txt" } |
|
|
80 |
|
|
|
81 |
genelist <- c() |
|
|
82 |
for (filek in DEfiles) { |
|
|
83 |
DEres <- read.table(file.path(dat.path, filek), header=TRUE, row.names=NULL, sep="\t", quote="", stringsAsFactors=FALSE) |
|
|
84 |
DEres <- DEres[!duplicated(DEres[, 1]),] |
|
|
85 |
DEres <- DEres[!is.na(DEres[, 1]), ] |
|
|
86 |
rownames(DEres) <- DEres[, 1] |
|
|
87 |
DEres <- DEres[, -1] |
|
|
88 |
|
|
|
89 |
#rownames(DEres) <- toupper(rownames(DEres)) |
|
|
90 |
if (dirct == "up") { |
|
|
91 |
genelist <- c( genelist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc > 0, ]) ) |
|
|
92 |
} |
|
|
93 |
if (dirct == "down") { |
|
|
94 |
genelist <- c( genelist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc < 0, ]) ) |
|
|
95 |
} |
|
|
96 |
} |
|
|
97 |
unqlist <- setdiff(genelist,genelist[duplicated(genelist)]) |
|
|
98 |
|
|
|
99 |
marker <- list() |
|
|
100 |
for (filek in DEfiles) { |
|
|
101 |
DEres <- read.table(file.path(dat.path, filek), header=TRUE, row.names=NULL, sep="\t", quote="", stringsAsFactors=FALSE) |
|
|
102 |
DEres <- DEres[!duplicated(DEres[, 1]),] |
|
|
103 |
DEres <- DEres[!is.na(DEres[, 1]), ] |
|
|
104 |
rownames(DEres) <- DEres[, 1] |
|
|
105 |
DEres <- DEres[, -1] |
|
|
106 |
|
|
|
107 |
#rownames(DEres) <- toupper(rownames(DEres)) |
|
|
108 |
if(dirct == "up") { |
|
|
109 |
outk <- intersect( unqlist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc > 0, ]) ) |
|
|
110 |
outk <- DEres[outk,] |
|
|
111 |
outk <- outk[order(outk$log2fc, decreasing = TRUE),] |
|
|
112 |
|
|
|
113 |
if(nrow(outk) > n.marker) { |
|
|
114 |
marker[[filek]] <- outk[1:n.marker,] |
|
|
115 |
} else { |
|
|
116 |
marker[[filek]] <- outk |
|
|
117 |
} |
|
|
118 |
marker$dirct <- "up" |
|
|
119 |
} |
|
|
120 |
if(dirct == "down") { |
|
|
121 |
outk <- intersect( unqlist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc < 0, ]) ) |
|
|
122 |
outk <- DEres[outk,] |
|
|
123 |
outk <- outk[order(outk$log2fc, decreasing = FALSE),] |
|
|
124 |
|
|
|
125 |
if(nrow(outk) > n.marker) { |
|
|
126 |
marker[[filek]] <- outk[1:n.marker,] |
|
|
127 |
} else { |
|
|
128 |
marker[[filek]] <- outk |
|
|
129 |
} |
|
|
130 |
marker$dirct <- "down" |
|
|
131 |
} |
|
|
132 |
# write file |
|
|
133 |
write.table(outk, file=file.path(res.path, paste(gsub("_vs_Others.txt","", filek, fixed = TRUE), outlabel, sep = "_")), row.names = TRUE, col.names = NA, sep = "\t", quote = FALSE) |
|
|
134 |
} |
|
|
135 |
|
|
|
136 |
# generate templates for nearest template prediction |
|
|
137 |
templates <- NULL |
|
|
138 |
for (filek in DEfiles) { |
|
|
139 |
tmp <- data.frame(probe = rownames(marker[[filek]]), |
|
|
140 |
class = sub("_vs_Others.txt","",sub(".*.result.","",filek)), |
|
|
141 |
dirct = marker$dirct, |
|
|
142 |
stringsAsFactors = FALSE) |
|
|
143 |
templates <- rbind.data.frame(templates, tmp, stringsAsFactors = FALSE) |
|
|
144 |
} |
|
|
145 |
write.table(templates, file=file.path(res.path, paste0(mo.method,"_",dea.method,"_",dirct,"regulated_marker_templates.txt")), row.names = FALSE, sep = "\t", quote = FALSE) |
|
|
146 |
|
|
|
147 |
# generate heatmap with subtype-specific markers |
|
|
148 |
if(doplot) { |
|
|
149 |
if(is.null(norm.expr)) { |
|
|
150 |
stop("please provide a matrix or data.frame of normalized expression data with rows for genes and columns for samples; FPKM or TPM without log2 transformation is recommended.") |
|
|
151 |
} |
|
|
152 |
|
|
|
153 |
# check data |
|
|
154 |
comsam <- intersect(moic.res$clust.res$samID, colnames(norm.expr)) |
|
|
155 |
if(length(comsam) == nrow(moic.res$clust.res)) { |
|
|
156 |
message("--all samples matched.") |
|
|
157 |
} else { |
|
|
158 |
message(paste0("--",(nrow(moic.res$clust.res)-length(comsam))," samples mismatched from current subtypes.")) |
|
|
159 |
} |
|
|
160 |
|
|
|
161 |
moic.res$clust.res <- moic.res$clust.res[comsam, , drop = FALSE] |
|
|
162 |
norm.expr <- norm.expr[,comsam] |
|
|
163 |
|
|
|
164 |
if(is.null(fig.name)) { |
|
|
165 |
outFig <- paste0("markerheatmap_using_",dirct,"regulated_genes.pdf") |
|
|
166 |
} else { |
|
|
167 |
outFig <- paste0(fig.name, "_using_", dirct,"regulated_genes.pdf") |
|
|
168 |
} |
|
|
169 |
sam.order <- moic.res$clust.res[order(moic.res$clust.res$clust, decreasing = FALSE), "samID"] |
|
|
170 |
colvec <- clust.col[1:n.moic] |
|
|
171 |
names(colvec) <- paste0("CS",1:n.moic) |
|
|
172 |
if(!is.null(annCol) & !is.null(annColors)) { |
|
|
173 |
annCol <- annCol[sam.order, , drop = FALSE] |
|
|
174 |
annCol$Subtype <- paste0("CS",moic.res$clust.res[sam.order,"clust"]) |
|
|
175 |
annColors[["Subtype"]] <- colvec |
|
|
176 |
} else { |
|
|
177 |
annCol <- data.frame("Subtype" = paste0("CS",moic.res$clust.res[sam.order,"clust"]), |
|
|
178 |
row.names = sam.order, |
|
|
179 |
stringsAsFactors = FALSE) |
|
|
180 |
annColors <- list("Subtype" = colvec) |
|
|
181 |
} |
|
|
182 |
|
|
|
183 |
if(max(norm.expr) < 25 | (max(norm.expr) >= 25 & min(norm.expr) < 0)) { |
|
|
184 |
message("--expression profile seems to have been standardised (z-score or log transformation), no more action will be performed.") |
|
|
185 |
gset <- norm.expr |
|
|
186 |
} |
|
|
187 |
if(max(norm.expr) >= 25 & min(norm.expr) >= 0){ |
|
|
188 |
message("--log2 transformation done for expression data.") |
|
|
189 |
gset <- log2(norm.expr + 1) |
|
|
190 |
} |
|
|
191 |
|
|
|
192 |
standarize.fun <- function(indata=NULL, halfwidth=NULL, centerFlag=TRUE, scaleFlag=TRUE) { |
|
|
193 |
outdata=t(scale(t(indata), center=centerFlag, scale=scaleFlag)) |
|
|
194 |
if (!is.null(halfwidth)) { |
|
|
195 |
outdata[outdata>halfwidth]=halfwidth |
|
|
196 |
outdata[outdata<(-halfwidth)]= -halfwidth |
|
|
197 |
} |
|
|
198 |
return(outdata) |
|
|
199 |
} |
|
|
200 |
|
|
|
201 |
plotdata <- standarize.fun(gset[intersect(templates$probe, rownames(gset)), sam.order], |
|
|
202 |
halfwidth = halfwidth, |
|
|
203 |
centerFlag = centerFlag, |
|
|
204 |
scaleFlag = scaleFlag) |
|
|
205 |
|
|
|
206 |
if(!is.null(annCol) & !is.null(annColors)) { |
|
|
207 |
for (i in names(annColors)) { |
|
|
208 |
if(is.function(annColors[[i]])) { |
|
|
209 |
annColors[[i]] <- annColors[[i]](pretty(range(annCol[,i]),n = 64)) # transformat colorRamp2 function to color vector |
|
|
210 |
} |
|
|
211 |
} |
|
|
212 |
} |
|
|
213 |
|
|
|
214 |
hm <- ComplexHeatmap::pheatmap(mat = plotdata, |
|
|
215 |
border_color = NA, |
|
|
216 |
cluster_cols = FALSE, |
|
|
217 |
cluster_rows = FALSE, |
|
|
218 |
annotation_col = annCol, |
|
|
219 |
annotation_colors = annColors, |
|
|
220 |
legend_breaks = pretty(c(-halfwidth,halfwidth)), |
|
|
221 |
legend_labels = pretty(c(-halfwidth,halfwidth)), |
|
|
222 |
show_rownames = show_rownames, |
|
|
223 |
show_colnames = show_colnames, |
|
|
224 |
treeheight_col = 0, |
|
|
225 |
treeheight_row = 0, |
|
|
226 |
color = grDevices::colorRampPalette(color)(64), |
|
|
227 |
...) |
|
|
228 |
|
|
|
229 |
# save to pdf |
|
|
230 |
pdf(file.path(fig.path, outFig), width = width, height = height) |
|
|
231 |
draw(hm,annotation_legend_side = "left",heatmap_legend_side = "left") |
|
|
232 |
invisible(dev.off()) |
|
|
233 |
|
|
|
234 |
# print to screen |
|
|
235 |
draw(hm,annotation_legend_side = "left",heatmap_legend_side = "left") |
|
|
236 |
return(list(unqlist = unqlist, templates = templates, dirct = dirct, heatmap = hm)) |
|
|
237 |
} else { |
|
|
238 |
return(list(unqlist = unqlist, templates = templates, dirct = dirct)) |
|
|
239 |
} |
|
|
240 |
} |