|
a |
|
b/atlantis/R/PlotATLANTISresults.R |
|
|
1 |
#' Generate ATLANTIS plots from intermediate files |
|
|
2 |
#' |
|
|
3 |
#' @param fi fit.info object (or file name) |
|
|
4 |
#' @param ATLANTIS_Summary the structure itself or file to load it from |
|
|
5 |
#' @param code.dir unused -- to be dropped |
|
|
6 |
#' @param output.dir if output.dir is NULL then the plot is written to the current device, otherwise a PDF is written to the output.dir |
|
|
7 |
#' @param annTable.file File containing feature annotations (the gene, location, etc) |
|
|
8 |
#' @param output.prefix a prefix to prepend to all output files |
|
|
9 |
#' @export |
|
|
10 |
PlotATLANTISresults <- function(fi, ATLANTIS_Summary, code.dir, output.dir, annTable.file, output.prefix = NULL){ |
|
|
11 |
|
|
|
12 |
if(is.character(fi) == TRUE){ |
|
|
13 |
load(fi) |
|
|
14 |
} |
|
|
15 |
|
|
|
16 |
if(is.character(ATLANTIS_Summary) == TRUE){ |
|
|
17 |
load(ATLANTIS_Summary) |
|
|
18 |
} |
|
|
19 |
|
|
|
20 |
#read in annTable file stored in libdir |
|
|
21 |
|
|
|
22 |
annTable <- read.table(annTable.file,header=TRUE,comment.char="",sep="\t", fill=FALSE,stringsAsFactors = FALSE, check.names = FALSE,row.names=1) |
|
|
23 |
|
|
|
24 |
#get analysisID, targetID, targetVec, predMat from ATLANTIS_Summary |
|
|
25 |
targetVec <- fi$targetVec |
|
|
26 |
predMat <- fi$predData |
|
|
27 |
targetID <- ATLANTIS_Summary$targetID |
|
|
28 |
analysisID <- ATLANTIS_Summary$analysisID |
|
|
29 |
additionalFeatures <- ATLANTIS_Summary$additionalFeatures |
|
|
30 |
|
|
|
31 |
if(is.null(output.prefix)) { |
|
|
32 |
output.prefix = targetID |
|
|
33 |
} |
|
|
34 |
|
|
|
35 |
# create plots |
|
|
36 |
if(!is.null(output.dir)) { |
|
|
37 |
plots.file <- paste(output.prefix, '_plots.pdf', sep='') |
|
|
38 |
pdf(file.path(output.dir, plots.file), w=11*1.5, h=8.5*1.5) |
|
|
39 |
} |
|
|
40 |
|
|
|
41 |
report.summary <- fi$report.summary |
|
|
42 |
|
|
|
43 |
for (nominalScale in c(F, T)) { |
|
|
44 |
for (orderByPred in c(F,T)) { |
|
|
45 |
tmp <- plots.plotModel(fi, targetVec, targetID, |
|
|
46 |
predMat, annTable, |
|
|
47 |
maxNumPreds=20, |
|
|
48 |
report.summary, |
|
|
49 |
orderByPred=orderByPred, |
|
|
50 |
additionalFeatures=additionalFeatures, nominalScale=nominalScale) |
|
|
51 |
} |
|
|
52 |
} |
|
|
53 |
|
|
|
54 |
if(!is.null(output.dir)) { |
|
|
55 |
dev.off() |
|
|
56 |
} |
|
|
57 |
} |
|
|
58 |
|
|
|
59 |
# draw text "msg" scaling to fit in the specified bounding box |
|
|
60 |
drawBoundedText <- function(msg, left, bottom, right, top) { |
|
|
61 |
target.width <- right - left; |
|
|
62 |
target.height <- top - bottom; |
|
|
63 |
scale <- min(target.width/strwidth(msg), target.height/strheight(msg)) |
|
|
64 |
text(left, bottom, msg, cex=scale, adj=c(0, 0)) |
|
|
65 |
} |
|
|
66 |
|
|
|
67 |
# generates a plot with a summary of predictive variables used in gbm model |
|
|
68 |
# fi - fit.info of model |
|
|
69 |
# targetVec - target vector (from targetMat) |
|
|
70 |
# targetID - the ID of the target variable (its name in targetMat) |
|
|
71 |
# maxNumPreds - number of features to plot |
|
|
72 |
plots.plotModel <- function(fi, targetVec, targetID, predMat, annTable, maxNumPreds, report.summary, ...) { |
|
|
73 |
library(caTools) # for colAUC |
|
|
74 |
|
|
|
75 |
# top 20 features, ordered by cluster & contribution of their clusters |
|
|
76 |
# head(used.features.inf[order(used.features.inf[1:20,"clusterRank"]), ], 20) |
|
|
77 |
|
|
|
78 |
# can move the following to model creation GBM too |
|
|
79 |
predProfile <- fi$OOB$prediction |
|
|
80 |
|
|
|
81 |
model_quality <- list() |
|
|
82 |
model_quality$value <- fi$OOB$quality |
|
|
83 |
model_quality$metric <- fi$OOB$metric |
|
|
84 |
|
|
|
85 |
model.features.ordered <- fi$varImp |
|
|
86 |
|
|
|
87 |
# convert targetVec to numeric binary vector if is factor |
|
|
88 |
if (fi$modelType == "Classification") { |
|
|
89 |
targetVec <- ifelse(as.numeric(targetVec) == 1, 0, 1) |
|
|
90 |
} |
|
|
91 |
|
|
|
92 |
plots.predictorsBars(targetVec, predMat, model.features.ordered, |
|
|
93 |
targetID, model_quality, annTable=annTable, |
|
|
94 |
report.summary=report.summary, |
|
|
95 |
predProfile=predProfile, |
|
|
96 |
maxNumPreds=maxNumPreds, ...) |
|
|
97 |
} |
|
|
98 |
|
|
|
99 |
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
# model.features - relative influence for predictors. a named vector |
|
|
103 |
# predProfile - predictions of the targetVec, to be plotted on-top |
|
|
104 |
# model_quality - a list. $metric is a string ("R^2" or "AUC"), $value holds the value |
|
|
105 |
plots.predictorsBars <- function(depProfile, predMat, model.features, gsID, model_quality, |
|
|
106 |
annTable, report.summary, |
|
|
107 |
additionalFeatures=NULL, maxNumPreds=10, predProfile=NULL, |
|
|
108 |
orderByPred=FALSE, nominalScale=F) { |
|
|
109 |
|
|
|
110 |
# subset influence table to include only top predictive features (whose influence sum to 50) |
|
|
111 |
## order.by.inf <- order(model.features[, "rel.inf"], decreasing=T) |
|
|
112 |
## model.features <- model.features[order.by.inf, ] |
|
|
113 |
|
|
|
114 |
# how many features are used by the model? |
|
|
115 |
numModelFeatures <- length(model.features) |
|
|
116 |
|
|
|
117 |
#lastPredToPlot <- min(max(which(cumsum(model.features[,"rel.inf"]) < 50)) + 1, maxNumPreds) |
|
|
118 |
lastPredToPlot <- min(numModelFeatures, maxNumPreds) |
|
|
119 |
|
|
|
120 |
|
|
|
121 |
# we're going to plot only these predictors |
|
|
122 |
features.to.plot <- model.features[1:lastPredToPlot] |
|
|
123 |
stopifnot(all(names(features.to.plot) %in% colnames(predMat))) |
|
|
124 |
feat.data <- predMat[, names(features.to.plot), drop=F] |
|
|
125 |
|
|
|
126 |
# plot barplots for all predictors, target GS, influence weights too |
|
|
127 |
num.feats <- length(features.to.plot) |
|
|
128 |
if (orderByPred) { |
|
|
129 |
samp.ord <- order(predProfile, depProfile) # the order of the samples in the plots |
|
|
130 |
} else { |
|
|
131 |
samp.ord <- order(depProfile, predProfile) # order by true then prediction |
|
|
132 |
} |
|
|
133 |
layout(matrix(c(num.feats+4, rep(1, num.feats+2), 2, num.feats+5, 3:(num.feats+3)), ncol=2), |
|
|
134 |
width=c(1, 5), |
|
|
135 |
heights=c(3, 0.5, rep(1, num.feats), 1.5)) |
|
|
136 |
|
|
|
137 |
# plot rel.inf values |
|
|
138 |
op <- par(mar = c(3, 2, 0, 0)) |
|
|
139 |
|
|
|
140 |
featCorToTarget <- cor(feat.data, |
|
|
141 |
as.numeric(depProfile), |
|
|
142 |
use="pairwise.complete.obs", |
|
|
143 |
method='spearman') |
|
|
144 |
cols <- rev(c("goldenrod", "deeppink3")[as.numeric(featCorToTarget > 0)+1]) |
|
|
145 |
|
|
|
146 |
barplot(rev(features.to.plot), horiz=T, col=cols, names.arg="") |
|
|
147 |
par(op) |
|
|
148 |
|
|
|
149 |
###### |
|
|
150 |
# plot response variable |
|
|
151 |
###### |
|
|
152 |
# figure out chr loci for response varibale |
|
|
153 |
responseHUGO <- annTable[gsID, "HUGOsymbol"] |
|
|
154 |
responseChrLoci <- annTable[gsID, "ChrLoci"] |
|
|
155 |
gsLabel <- paste(gsID, "\n", responseChrLoci, sep='') |
|
|
156 |
|
|
|
157 |
cols <- ifelse(depProfile[samp.ord] < -2.0, "red", "grey") |
|
|
158 |
op <- par(mar = c(0, 2, 1, 13)) # c(bottom, left, top, right) |
|
|
159 |
mp <- barplot(depProfile[samp.ord], names.arg="", axes=T, col=cols, border=NA) |
|
|
160 |
points(mp, predProfile[samp.ord], pch='+', col="palegreen4", type='l', lwd=2) |
|
|
161 |
mtext(gsLabel, side=4, las=1, cex=0.9) |
|
|
162 |
par(op) |
|
|
163 |
|
|
|
164 |
# plot barplots for predictive features |
|
|
165 |
uniqChrLoci <- unique(annTable[names(features.to.plot), "ChrLoci"], border=NA) |
|
|
166 |
chrBands <- sub("\\..+$", "", uniqChrLoci) |
|
|
167 |
names(chrBands) <- uniqChrLoci |
|
|
168 |
|
|
|
169 |
colMap <- plots.colorChrLoc(chrBands[annTable[names(features.to.plot), "ChrLoci"]]) |
|
|
170 |
for (i in 1:num.feats) { |
|
|
171 |
featID <- names(features.to.plot)[i] |
|
|
172 |
plotdata <- feat.data[samp.ord, i] |
|
|
173 |
if (annTable[featID, "VarType"] == "CN") { |
|
|
174 |
# for CN features |
|
|
175 |
# color amps in red, dels in blue |
|
|
176 |
cols <- ifelse(plotdata > 0.5, "red", "grey") |
|
|
177 |
cols[plotdata < -0.5] <- "blue" |
|
|
178 |
ylim=c(-1,1) |
|
|
179 |
} |
|
|
180 |
if (annTable[featID, "VarType"] %in% c("MUT", "Mmis", "Mbad", "Mcos", "Mall", "RsMmis", "RsMnon")) { |
|
|
181 |
# color mutations in purple |
|
|
182 |
f <- feat.data[samp.ord, i] |
|
|
183 |
stopifnot(all(f %in% c(0,1,2) | is.na(f))) |
|
|
184 |
cols <- c("grey", "sienna4", "darkorange")[f+1] |
|
|
185 |
ylim=c(0,2) |
|
|
186 |
} |
|
|
187 |
if (annTable[featID, "VarType"] %in% c("ACH", "GS", "SS")) { |
|
|
188 |
# color high dependency in red |
|
|
189 |
cols <- ifelse(feat.data[samp.ord, i] > 0.44, "orangered3", "grey") |
|
|
190 |
cols[plotdata < -0.44] <- "seagreen3" |
|
|
191 |
ylim=c(-2,2) |
|
|
192 |
} |
|
|
193 |
if (annTable[featID, "VarType"] %in% c("Exp", "GSE", "CTD", "mirE")) { |
|
|
194 |
# transform data to robust Z |
|
|
195 |
z.plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T) |
|
|
196 |
if(!nominalScale) { |
|
|
197 |
plotdata <- z.plotdata |
|
|
198 |
} |
|
|
199 |
# color high expression |
|
|
200 |
cols <- ifelse(z.plotdata > 2, "orangered3", "grey") |
|
|
201 |
cols[z.plotdata < -2] <- "seagreen3" |
|
|
202 |
ylim=c(-3,3) |
|
|
203 |
} |
|
|
204 |
if (annTable[featID, "VarType"] == "SI") { |
|
|
205 |
if (length(unique(feat.data[,i])) <= 3) { # binary + NA |
|
|
206 |
cols <- ifelse(feat.data[samp.ord, i] > 0, "orange3", "grey") |
|
|
207 |
ylim=c(0,1) |
|
|
208 |
} else { |
|
|
209 |
ylim <- NULL |
|
|
210 |
cols <- NULL |
|
|
211 |
} |
|
|
212 |
} |
|
|
213 |
if (annTable[featID, "VarType"] == "San") { |
|
|
214 |
# transform data to robust Z |
|
|
215 |
plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T) |
|
|
216 |
# color high expression |
|
|
217 |
cols <- ifelse(plotdata > 2, "orangered3", "grey") |
|
|
218 |
cols[plotdata < -2] <- "seagreen3" |
|
|
219 |
ylim=c(-3,3) |
|
|
220 |
# # color high sensitivity in red |
|
|
221 |
# cols <- ifelse(feat.data[samp.ord, i] < 0.3, "orangered3", "grey") |
|
|
222 |
# ylim=c(0,1) |
|
|
223 |
} |
|
|
224 |
|
|
|
225 |
feature.barplot <- function(plotdata, axes.range, cols, ylim) { |
|
|
226 |
#par(oma=rep(1,4)) |
|
|
227 |
#par(oma=c(1,0,1,0)) |
|
|
228 |
orig.mar <- par()$mar |
|
|
229 |
par(mar=c(1,0,1,0)+orig.mar) |
|
|
230 |
par(mgp=c(0,-2,-3)) |
|
|
231 |
|
|
|
232 |
r.range <- round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0) |
|
|
233 |
if(is.null(ylim)) { |
|
|
234 |
ylim <- c( min(c(plotdata, r.range[1]), na.rm=T), max(c(plotdata, r.range[2]), na.rm=T) ) |
|
|
235 |
} |
|
|
236 |
|
|
|
237 |
if(any(is.infinite(ylim))) { |
|
|
238 |
# hack -- ylim is infinite in some case. Need to investigate where how this happens |
|
|
239 |
ylim <- c(-1e10, 1e10) |
|
|
240 |
warning("ylim was infinite so skipping this bar") |
|
|
241 |
} else { |
|
|
242 |
|
|
|
243 |
barplot(plotdata, names.arg="", axes=F, col=cols, ylim=ylim, border=NA) |
|
|
244 |
if(!is.null(axes.range)) { |
|
|
245 |
axes.range <-round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0) |
|
|
246 |
# axis(ifelse((i %% 2) == 1,2,4), axes.range, las=2) |
|
|
247 |
axis(2, axes.range, las=2) |
|
|
248 |
} |
|
|
249 |
} |
|
|
250 |
|
|
|
251 |
par(mar=orig.mar) |
|
|
252 |
} |
|
|
253 |
|
|
|
254 |
op <- par(mar = c(0, 2, 0, 13)) # c(bottom, left, top, right) |
|
|
255 |
axes.range <- NULL |
|
|
256 |
# if(nominalScale) { |
|
|
257 |
ylim <- NULL |
|
|
258 |
axes.range = pretty(plotdata, n=1) |
|
|
259 |
# } |
|
|
260 |
feature.barplot(plotdata, axes.range, cols, ylim) |
|
|
261 |
|
|
|
262 |
if (annTable[featID, "VarType"] %in% c("GSE", "SI")) { |
|
|
263 |
first.line.text <- substr(featID,1,15) |
|
|
264 |
second.line.text <- paste(substr(featID,16,32), "\n", substr(featID,33,49), sep='') |
|
|
265 |
} else { |
|
|
266 |
first.line.text <- substr(featID,1,25) |
|
|
267 |
second.line.text <- annTable[featID, "ChrLoci"] |
|
|
268 |
} |
|
|
269 |
|
|
|
270 |
feature.label <- paste(first.line.text, "\n", second.line.text, sep='') |
|
|
271 |
mtext(feature.label, side=4, las=1, col=colMap[chrBands[annTable[featID, "ChrLoci"]]], cex=0.9) |
|
|
272 |
par(op) |
|
|
273 |
} |
|
|
274 |
|
|
|
275 |
# plot text on top left corner |
|
|
276 |
op <- par(mar = rep(0, 4)) |
|
|
277 |
plot.new() |
|
|
278 |
# text(0.85, 0.5, pos=4, paste(report.summary, collapse="\n")) |
|
|
279 |
par(op) |
|
|
280 |
|
|
|
281 |
# model info in the top left corner |
|
|
282 |
op <- par(mar = rep(0, 4)) |
|
|
283 |
plot.new() |
|
|
284 |
|
|
|
285 |
drawBoundedText(paste("CV", model_quality$metric, "=", format(model_quality$value, digits=2)), 0, 0.9, 1, 1) |
|
|
286 |
drawBoundedText(paste(report.summary, collapse="\n"), 0, 0, 1, 0.85) |
|
|
287 |
par(op) |
|
|
288 |
|
|
|
289 |
list(feat.data=feat.data, samp.ord=samp.ord) |
|
|
290 |
} |
|
|
291 |
|
|
|
292 |
|
|
|
293 |
# given a vector of strings containing chromosomal locations, assign |
|
|
294 |
# a color to each one so that chromosomal loci that are represented more |
|
|
295 |
# than once get colored in the same color. |
|
|
296 |
plots.colorChrLoc <- function(chr.loci, def.color="black") { |
|
|
297 |
library(RColorBrewer) |
|
|
298 |
|
|
|
299 |
freqs <- table(chr.loci) |
|
|
300 |
num.unique <- length(freqs) |
|
|
301 |
cols <- rep(def.color, num.unique) |
|
|
302 |
names(cols) <- names(freqs) |
|
|
303 |
|
|
|
304 |
repeats <- names(freqs)[freqs >= 2] |
|
|
305 |
num.repeats <- length(repeats) |
|
|
306 |
cols[repeats] <- brewer.pal(8, "Set1")[1:length(repeats)] |
|
|
307 |
|
|
|
308 |
cols |
|
|
309 |
} |
|
|
310 |
|