atlantis / Git / Diff of /atlantis/R/PlotATLANTISresults.R

Models:
DanielG/
atlantis
Downloads: 1
Diff of /atlantis/R/PlotATLANTISresults.R [000000] .. [fbf06f]
Switch to side-by-side view

--- a
+++ b/atlantis/R/PlotATLANTISresults.R
@@ -0,0 +1,310 @@
+#' Generate ATLANTIS plots from intermediate files
+#'
+#' @param fi fit.info object (or file name)
+#' @param ATLANTIS_Summary the structure itself or file to load it from
+#' @param code.dir unused -- to be dropped 
+#' @param output.dir if output.dir is NULL then the plot is written to the current device, otherwise a PDF is written to the output.dir
+#' @param annTable.file File containing feature annotations (the gene, location, etc)
+#' @param output.prefix a prefix to prepend to all output files
+#' @export
+PlotATLANTISresults <- function(fi, ATLANTIS_Summary, code.dir, output.dir, annTable.file, output.prefix = NULL){
+  
+  if(is.character(fi) == TRUE){
+    load(fi)
+  }
+
+  if(is.character(ATLANTIS_Summary) == TRUE){
+    load(ATLANTIS_Summary)
+  }
+
+  #read in annTable file stored in libdir
+  
+  annTable <- read.table(annTable.file,header=TRUE,comment.char="",sep="\t", fill=FALSE,stringsAsFactors = FALSE, check.names = FALSE,row.names=1)
+  
+  #get analysisID, targetID, targetVec, predMat from ATLANTIS_Summary
+  targetVec <- fi$targetVec
+  predMat <- fi$predData
+  targetID <- ATLANTIS_Summary$targetID
+  analysisID <- ATLANTIS_Summary$analysisID
+  additionalFeatures <- ATLANTIS_Summary$additionalFeatures
+
+  if(is.null(output.prefix)) {
+    output.prefix = targetID
+  }
+  
+  # create plots
+  if(!is.null(output.dir)) {
+    plots.file <- paste(output.prefix, '_plots.pdf', sep='') 
+    pdf(file.path(output.dir, plots.file), w=11*1.5, h=8.5*1.5)
+  }
+ 
+  report.summary <- fi$report.summary
+
+  for (nominalScale in c(F, T)) {
+    for (orderByPred in c(F,T)) {
+      tmp <- plots.plotModel(fi, targetVec, targetID, 
+                         predMat, annTable,
+                         maxNumPreds=20,
+                         report.summary,
+                         orderByPred=orderByPred,
+                         additionalFeatures=additionalFeatures, nominalScale=nominalScale)
+    } 
+  }
+
+  if(!is.null(output.dir)) {
+    dev.off()
+  }
+}
+
+# draw text "msg" scaling to fit in the specified bounding box
+drawBoundedText <- function(msg, left, bottom, right, top) {
+  target.width <- right - left;
+  target.height <- top - bottom;
+  scale <- min(target.width/strwidth(msg), target.height/strheight(msg))
+  text(left, bottom, msg, cex=scale, adj=c(0, 0))
+}
+
+# generates a plot with a summary of predictive variables used in gbm model
+# fi - fit.info of model
+# targetVec - target vector (from targetMat)
+# targetID - the ID of the target variable (its name in targetMat)
+# maxNumPreds - number of features to plot
+plots.plotModel <- function(fi, targetVec, targetID, predMat, annTable, maxNumPreds, report.summary, ...) {
+  library(caTools) # for colAUC
+  
+  # top 20 features, ordered by cluster & contribution of their clusters
+  # head(used.features.inf[order(used.features.inf[1:20,"clusterRank"]), ], 20)
+  
+  # can move the following to model creation GBM too
+  predProfile <- fi$OOB$prediction
+  
+  model_quality <- list()
+  model_quality$value <- fi$OOB$quality
+  model_quality$metric <- fi$OOB$metric
+  
+  model.features.ordered <- fi$varImp
+  
+# convert targetVec to numeric binary vector if is factor
+  if (fi$modelType == "Classification") {
+    targetVec <- ifelse(as.numeric(targetVec) == 1, 0, 1)  
+  }
+
+  plots.predictorsBars(targetVec, predMat, model.features.ordered, 
+                       targetID, model_quality, annTable=annTable,
+                       report.summary=report.summary,
+                       predProfile=predProfile, 
+                       maxNumPreds=maxNumPreds, ...)
+}
+
+
+
+
+# model.features - relative influence for predictors.  a named vector
+# predProfile - predictions of the targetVec, to be plotted on-top
+# model_quality - a list. $metric is a string ("R^2" or "AUC"), $value holds the value
+plots.predictorsBars <- function(depProfile, predMat, model.features, gsID, model_quality,
+                                 annTable, report.summary,
+                                 additionalFeatures=NULL, maxNumPreds=10, predProfile=NULL,
+                                 orderByPred=FALSE, nominalScale=F) {
+
+  # subset influence table to include only top predictive features (whose influence sum to 50)
+  ## order.by.inf <- order(model.features[, "rel.inf"], decreasing=T)
+  ## model.features <- model.features[order.by.inf, ]
+  
+  # how many features are used by the model?
+  numModelFeatures <- length(model.features)
+  
+  #lastPredToPlot <- min(max(which(cumsum(model.features[,"rel.inf"]) < 50)) + 1, maxNumPreds)
+  lastPredToPlot <- min(numModelFeatures, maxNumPreds)
+  
+  
+  # we're going to plot only these predictors
+  features.to.plot <- model.features[1:lastPredToPlot]
+  stopifnot(all(names(features.to.plot) %in% colnames(predMat)))
+  feat.data <- predMat[, names(features.to.plot), drop=F]
+  
+  # plot barplots for all predictors, target GS, influence weights too
+  num.feats <- length(features.to.plot)
+  if (orderByPred) {
+    samp.ord <- order(predProfile, depProfile)  # the order of the samples in the plots
+  } else {
+    samp.ord <- order(depProfile, predProfile)  # order by true then prediction
+  }
+  layout(matrix(c(num.feats+4, rep(1, num.feats+2), 2, num.feats+5, 3:(num.feats+3)), ncol=2), 
+         width=c(1, 5),
+         heights=c(3, 0.5, rep(1, num.feats), 1.5))
+  
+  # plot rel.inf values
+  op <- par(mar = c(3, 2, 0, 0))
+  
+  featCorToTarget <- cor(feat.data, 
+                         as.numeric(depProfile), 
+                         use="pairwise.complete.obs", 
+                         method='spearman')
+  cols <- rev(c("goldenrod", "deeppink3")[as.numeric(featCorToTarget > 0)+1])
+  
+  barplot(rev(features.to.plot), horiz=T, col=cols, names.arg="")
+  par(op)
+  
+  ######
+  # plot response variable
+  ######
+  # figure out chr loci for response varibale
+  responseHUGO <- annTable[gsID, "HUGOsymbol"]
+  responseChrLoci <- annTable[gsID, "ChrLoci"]
+  gsLabel <- paste(gsID, "\n", responseChrLoci, sep='')
+  
+  cols <- ifelse(depProfile[samp.ord] < -2.0, "red", "grey")
+  op <- par(mar = c(0, 2, 1, 13)) # c(bottom, left, top, right) 
+  mp <- barplot(depProfile[samp.ord], names.arg="", axes=T, col=cols, border=NA)
+  points(mp, predProfile[samp.ord], pch='+', col="palegreen4", type='l', lwd=2)
+  mtext(gsLabel, side=4, las=1, cex=0.9)
+  par(op)
+  
+  # plot barplots for predictive features
+  uniqChrLoci <- unique(annTable[names(features.to.plot), "ChrLoci"], border=NA)
+  chrBands <- sub("\\..+$", "", uniqChrLoci)
+  names(chrBands) <- uniqChrLoci
+  
+  colMap <- plots.colorChrLoc(chrBands[annTable[names(features.to.plot), "ChrLoci"]])
+  for (i in 1:num.feats) {
+    featID <- names(features.to.plot)[i]
+    plotdata <- feat.data[samp.ord, i]
+    if (annTable[featID, "VarType"] == "CN") {
+      # for CN features
+      # color amps in red, dels in blue
+      cols <- ifelse(plotdata > 0.5, "red", "grey")
+      cols[plotdata < -0.5] <- "blue"
+      ylim=c(-1,1)
+    }
+    if (annTable[featID, "VarType"] %in% c("MUT", "Mmis", "Mbad", "Mcos", "Mall", "RsMmis", "RsMnon")) {
+      # color mutations in purple
+      f <- feat.data[samp.ord, i]
+      stopifnot(all(f %in% c(0,1,2) | is.na(f)))
+      cols <- c("grey", "sienna4", "darkorange")[f+1]
+      ylim=c(0,2)      
+    }
+    if (annTable[featID, "VarType"] %in% c("ACH", "GS", "SS")) {
+      # color high dependency in red
+      cols <- ifelse(feat.data[samp.ord, i] > 0.44, "orangered3", "grey")
+      cols[plotdata < -0.44] <- "seagreen3"
+      ylim=c(-2,2)      
+    }
+    if (annTable[featID, "VarType"] %in% c("Exp", "GSE", "CTD", "mirE")) {
+      # transform data to robust Z
+      z.plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T)
+      if(!nominalScale) {
+        plotdata <- z.plotdata
+      }
+      # color high expression
+      cols <- ifelse(z.plotdata > 2, "orangered3", "grey")
+      cols[z.plotdata < -2] <- "seagreen3"
+      ylim=c(-3,3)      
+    }
+    if (annTable[featID, "VarType"] == "SI") {
+      if (length(unique(feat.data[,i])) <= 3)  { # binary + NA
+        cols <- ifelse(feat.data[samp.ord, i] > 0, "orange3", "grey")
+        ylim=c(0,1)      
+      } else {
+        ylim <- NULL
+        cols <- NULL
+      }
+    }
+    if (annTable[featID, "VarType"] == "San") {
+      # transform data to robust Z
+      plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T)      
+      # color high expression
+      cols <- ifelse(plotdata > 2, "orangered3", "grey")
+      cols[plotdata < -2] <- "seagreen3"
+      ylim=c(-3,3)      
+#       # color high sensitivity in red
+#       cols <- ifelse(feat.data[samp.ord, i] < 0.3, "orangered3", "grey")
+#       ylim=c(0,1)      
+    }
+
+feature.barplot <- function(plotdata, axes.range, cols, ylim) {
+  #par(oma=rep(1,4))
+  #par(oma=c(1,0,1,0))
+  orig.mar <- par()$mar
+  par(mar=c(1,0,1,0)+orig.mar) 
+  par(mgp=c(0,-2,-3))
+
+  r.range <- round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0)
+  if(is.null(ylim)) {
+    ylim <- c( min(c(plotdata, r.range[1]), na.rm=T), max(c(plotdata, r.range[2]), na.rm=T) )
+  }
+
+  if(any(is.infinite(ylim))) {
+     # hack -- ylim is infinite in some case. Need to investigate where how this happens
+     ylim <- c(-1e10, 1e10)
+     warning("ylim was infinite so skipping this bar")
+  } else {
+
+    barplot(plotdata, names.arg="", axes=F, col=cols, ylim=ylim, border=NA)
+    if(!is.null(axes.range)) {
+      axes.range <-round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0)
+  #    axis(ifelse((i %% 2) == 1,2,4), axes.range, las=2)
+      axis(2, axes.range, las=2)
+    }
+  }
+
+  par(mar=orig.mar)
+}
+    
+    op <- par(mar = c(0, 2, 0, 13)) # c(bottom, left, top, right)
+    axes.range <- NULL
+#    if(nominalScale) { 
+      ylim <- NULL 
+      axes.range = pretty(plotdata, n=1)
+#    }
+    feature.barplot(plotdata, axes.range, cols, ylim)
+
+    if (annTable[featID, "VarType"] %in% c("GSE", "SI")) {
+      first.line.text <- substr(featID,1,15)
+      second.line.text <- paste(substr(featID,16,32), "\n", substr(featID,33,49), sep='')
+    } else {
+      first.line.text <- substr(featID,1,25)
+      second.line.text <- annTable[featID, "ChrLoci"]    
+    }
+    
+    feature.label <- paste(first.line.text, "\n", second.line.text, sep='')
+    mtext(feature.label, side=4, las=1, col=colMap[chrBands[annTable[featID, "ChrLoci"]]], cex=0.9) 
+    par(op)
+  }
+  
+# plot text on top left corner
+  op <- par(mar = rep(0, 4))
+  plot.new()
+#  text(0.85, 0.5, pos=4, paste(report.summary, collapse="\n"))
+  par(op)
+  
+  # model info in the top left corner
+  op <- par(mar = rep(0, 4))
+  plot.new()
+  
+  drawBoundedText(paste("CV", model_quality$metric, "=", format(model_quality$value, digits=2)), 0, 0.9, 1, 1)
+  drawBoundedText(paste(report.summary, collapse="\n"), 0, 0, 1, 0.85)
+  par(op)
+  
+  list(feat.data=feat.data, samp.ord=samp.ord)
+}
+
+
+# given a vector of strings containing chromosomal locations, assign
+# a color to each one so that chromosomal loci that are represented more
+# than once get colored in the same color.
+plots.colorChrLoc <- function(chr.loci, def.color="black") {
+  library(RColorBrewer)
+  
+  freqs <- table(chr.loci)
+  num.unique <- length(freqs)
+  cols <- rep(def.color, num.unique)
+  names(cols) <- names(freqs)
+  
+  repeats <- names(freqs)[freqs >= 2]
+  num.repeats <- length(repeats)
+  cols[repeats] <- brewer.pal(8, "Set1")[1:length(repeats)]
+  
+  cols
+}
+