#' Generate ATLANTIS plots from intermediate files
#'
#' @param fi fit.info object (or file name)
#' @param ATLANTIS_Summary the structure itself or file to load it from
#' @param code.dir unused -- to be dropped
#' @param output.dir if output.dir is NULL then the plot is written to the current device, otherwise a PDF is written to the output.dir
#' @param annTable.file File containing feature annotations (the gene, location, etc)
#' @param output.prefix a prefix to prepend to all output files
#' @export
PlotATLANTISresults <- function(fi, ATLANTIS_Summary, code.dir, output.dir, annTable.file, output.prefix = NULL){
if(is.character(fi) == TRUE){
load(fi)
}
if(is.character(ATLANTIS_Summary) == TRUE){
load(ATLANTIS_Summary)
}
#read in annTable file stored in libdir
annTable <- read.table(annTable.file,header=TRUE,comment.char="",sep="\t", fill=FALSE,stringsAsFactors = FALSE, check.names = FALSE,row.names=1)
#get analysisID, targetID, targetVec, predMat from ATLANTIS_Summary
targetVec <- fi$targetVec
predMat <- fi$predData
targetID <- ATLANTIS_Summary$targetID
analysisID <- ATLANTIS_Summary$analysisID
additionalFeatures <- ATLANTIS_Summary$additionalFeatures
if(is.null(output.prefix)) {
output.prefix = targetID
}
# create plots
if(!is.null(output.dir)) {
plots.file <- paste(output.prefix, '_plots.pdf', sep='')
pdf(file.path(output.dir, plots.file), w=11*1.5, h=8.5*1.5)
}
report.summary <- fi$report.summary
for (nominalScale in c(F, T)) {
for (orderByPred in c(F,T)) {
tmp <- plots.plotModel(fi, targetVec, targetID,
predMat, annTable,
maxNumPreds=20,
report.summary,
orderByPred=orderByPred,
additionalFeatures=additionalFeatures, nominalScale=nominalScale)
}
}
if(!is.null(output.dir)) {
dev.off()
}
}
# draw text "msg" scaling to fit in the specified bounding box
drawBoundedText <- function(msg, left, bottom, right, top) {
target.width <- right - left;
target.height <- top - bottom;
scale <- min(target.width/strwidth(msg), target.height/strheight(msg))
text(left, bottom, msg, cex=scale, adj=c(0, 0))
}
# generates a plot with a summary of predictive variables used in gbm model
# fi - fit.info of model
# targetVec - target vector (from targetMat)
# targetID - the ID of the target variable (its name in targetMat)
# maxNumPreds - number of features to plot
plots.plotModel <- function(fi, targetVec, targetID, predMat, annTable, maxNumPreds, report.summary, ...) {
library(caTools) # for colAUC
# top 20 features, ordered by cluster & contribution of their clusters
# head(used.features.inf[order(used.features.inf[1:20,"clusterRank"]), ], 20)
# can move the following to model creation GBM too
predProfile <- fi$OOB$prediction
model_quality <- list()
model_quality$value <- fi$OOB$quality
model_quality$metric <- fi$OOB$metric
model.features.ordered <- fi$varImp
# convert targetVec to numeric binary vector if is factor
if (fi$modelType == "Classification") {
targetVec <- ifelse(as.numeric(targetVec) == 1, 0, 1)
}
plots.predictorsBars(targetVec, predMat, model.features.ordered,
targetID, model_quality, annTable=annTable,
report.summary=report.summary,
predProfile=predProfile,
maxNumPreds=maxNumPreds, ...)
}
# model.features - relative influence for predictors. a named vector
# predProfile - predictions of the targetVec, to be plotted on-top
# model_quality - a list. $metric is a string ("R^2" or "AUC"), $value holds the value
plots.predictorsBars <- function(depProfile, predMat, model.features, gsID, model_quality,
annTable, report.summary,
additionalFeatures=NULL, maxNumPreds=10, predProfile=NULL,
orderByPred=FALSE, nominalScale=F) {
# subset influence table to include only top predictive features (whose influence sum to 50)
## order.by.inf <- order(model.features[, "rel.inf"], decreasing=T)
## model.features <- model.features[order.by.inf, ]
# how many features are used by the model?
numModelFeatures <- length(model.features)
#lastPredToPlot <- min(max(which(cumsum(model.features[,"rel.inf"]) < 50)) + 1, maxNumPreds)
lastPredToPlot <- min(numModelFeatures, maxNumPreds)
# we're going to plot only these predictors
features.to.plot <- model.features[1:lastPredToPlot]
stopifnot(all(names(features.to.plot) %in% colnames(predMat)))
feat.data <- predMat[, names(features.to.plot), drop=F]
# plot barplots for all predictors, target GS, influence weights too
num.feats <- length(features.to.plot)
if (orderByPred) {
samp.ord <- order(predProfile, depProfile) # the order of the samples in the plots
} else {
samp.ord <- order(depProfile, predProfile) # order by true then prediction
}
layout(matrix(c(num.feats+4, rep(1, num.feats+2), 2, num.feats+5, 3:(num.feats+3)), ncol=2),
width=c(1, 5),
heights=c(3, 0.5, rep(1, num.feats), 1.5))
# plot rel.inf values
op <- par(mar = c(3, 2, 0, 0))
featCorToTarget <- cor(feat.data,
as.numeric(depProfile),
use="pairwise.complete.obs",
method='spearman')
cols <- rev(c("goldenrod", "deeppink3")[as.numeric(featCorToTarget > 0)+1])
barplot(rev(features.to.plot), horiz=T, col=cols, names.arg="")
par(op)
######
# plot response variable
######
# figure out chr loci for response varibale
responseHUGO <- annTable[gsID, "HUGOsymbol"]
responseChrLoci <- annTable[gsID, "ChrLoci"]
gsLabel <- paste(gsID, "\n", responseChrLoci, sep='')
cols <- ifelse(depProfile[samp.ord] < -2.0, "red", "grey")
op <- par(mar = c(0, 2, 1, 13)) # c(bottom, left, top, right)
mp <- barplot(depProfile[samp.ord], names.arg="", axes=T, col=cols, border=NA)
points(mp, predProfile[samp.ord], pch='+', col="palegreen4", type='l', lwd=2)
mtext(gsLabel, side=4, las=1, cex=0.9)
par(op)
# plot barplots for predictive features
uniqChrLoci <- unique(annTable[names(features.to.plot), "ChrLoci"], border=NA)
chrBands <- sub("\\..+$", "", uniqChrLoci)
names(chrBands) <- uniqChrLoci
colMap <- plots.colorChrLoc(chrBands[annTable[names(features.to.plot), "ChrLoci"]])
for (i in 1:num.feats) {
featID <- names(features.to.plot)[i]
plotdata <- feat.data[samp.ord, i]
if (annTable[featID, "VarType"] == "CN") {
# for CN features
# color amps in red, dels in blue
cols <- ifelse(plotdata > 0.5, "red", "grey")
cols[plotdata < -0.5] <- "blue"
ylim=c(-1,1)
}
if (annTable[featID, "VarType"] %in% c("MUT", "Mmis", "Mbad", "Mcos", "Mall", "RsMmis", "RsMnon")) {
# color mutations in purple
f <- feat.data[samp.ord, i]
stopifnot(all(f %in% c(0,1,2) | is.na(f)))
cols <- c("grey", "sienna4", "darkorange")[f+1]
ylim=c(0,2)
}
if (annTable[featID, "VarType"] %in% c("ACH", "GS", "SS")) {
# color high dependency in red
cols <- ifelse(feat.data[samp.ord, i] > 0.44, "orangered3", "grey")
cols[plotdata < -0.44] <- "seagreen3"
ylim=c(-2,2)
}
if (annTable[featID, "VarType"] %in% c("Exp", "GSE", "CTD", "mirE")) {
# transform data to robust Z
z.plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T)
if(!nominalScale) {
plotdata <- z.plotdata
}
# color high expression
cols <- ifelse(z.plotdata > 2, "orangered3", "grey")
cols[z.plotdata < -2] <- "seagreen3"
ylim=c(-3,3)
}
if (annTable[featID, "VarType"] == "SI") {
if (length(unique(feat.data[,i])) <= 3) { # binary + NA
cols <- ifelse(feat.data[samp.ord, i] > 0, "orange3", "grey")
ylim=c(0,1)
} else {
ylim <- NULL
cols <- NULL
}
}
if (annTable[featID, "VarType"] == "San") {
# transform data to robust Z
plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T)
# color high expression
cols <- ifelse(plotdata > 2, "orangered3", "grey")
cols[plotdata < -2] <- "seagreen3"
ylim=c(-3,3)
# # color high sensitivity in red
# cols <- ifelse(feat.data[samp.ord, i] < 0.3, "orangered3", "grey")
# ylim=c(0,1)
}
feature.barplot <- function(plotdata, axes.range, cols, ylim) {
#par(oma=rep(1,4))
#par(oma=c(1,0,1,0))
orig.mar <- par()$mar
par(mar=c(1,0,1,0)+orig.mar)
par(mgp=c(0,-2,-3))
r.range <- round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0)
if(is.null(ylim)) {
ylim <- c( min(c(plotdata, r.range[1]), na.rm=T), max(c(plotdata, r.range[2]), na.rm=T) )
}
if(any(is.infinite(ylim))) {
# hack -- ylim is infinite in some case. Need to investigate where how this happens
ylim <- c(-1e10, 1e10)
warning("ylim was infinite so skipping this bar")
} else {
barplot(plotdata, names.arg="", axes=F, col=cols, ylim=ylim, border=NA)
if(!is.null(axes.range)) {
axes.range <-round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0)
# axis(ifelse((i %% 2) == 1,2,4), axes.range, las=2)
axis(2, axes.range, las=2)
}
}
par(mar=orig.mar)
}
op <- par(mar = c(0, 2, 0, 13)) # c(bottom, left, top, right)
axes.range <- NULL
# if(nominalScale) {
ylim <- NULL
axes.range = pretty(plotdata, n=1)
# }
feature.barplot(plotdata, axes.range, cols, ylim)
if (annTable[featID, "VarType"] %in% c("GSE", "SI")) {
first.line.text <- substr(featID,1,15)
second.line.text <- paste(substr(featID,16,32), "\n", substr(featID,33,49), sep='')
} else {
first.line.text <- substr(featID,1,25)
second.line.text <- annTable[featID, "ChrLoci"]
}
feature.label <- paste(first.line.text, "\n", second.line.text, sep='')
mtext(feature.label, side=4, las=1, col=colMap[chrBands[annTable[featID, "ChrLoci"]]], cex=0.9)
par(op)
}
# plot text on top left corner
op <- par(mar = rep(0, 4))
plot.new()
# text(0.85, 0.5, pos=4, paste(report.summary, collapse="\n"))
par(op)
# model info in the top left corner
op <- par(mar = rep(0, 4))
plot.new()
drawBoundedText(paste("CV", model_quality$metric, "=", format(model_quality$value, digits=2)), 0, 0.9, 1, 1)
drawBoundedText(paste(report.summary, collapse="\n"), 0, 0, 1, 0.85)
par(op)
list(feat.data=feat.data, samp.ord=samp.ord)
}
# given a vector of strings containing chromosomal locations, assign
# a color to each one so that chromosomal loci that are represented more
# than once get colored in the same color.
plots.colorChrLoc <- function(chr.loci, def.color="black") {
library(RColorBrewer)
freqs <- table(chr.loci)
num.unique <- length(freqs)
cols <- rep(def.color, num.unique)
names(cols) <- names(freqs)
repeats <- names(freqs)[freqs >= 2]
num.repeats <- length(repeats)
cols[repeats] <- brewer.pal(8, "Set1")[1:length(repeats)]
cols
}