a b/atlantis/R/PlotATLANTISresults.R
1
#' Generate ATLANTIS plots from intermediate files
2
#'
3
#' @param fi fit.info object (or file name)
4
#' @param ATLANTIS_Summary the structure itself or file to load it from
5
#' @param code.dir unused -- to be dropped 
6
#' @param output.dir if output.dir is NULL then the plot is written to the current device, otherwise a PDF is written to the output.dir
7
#' @param annTable.file File containing feature annotations (the gene, location, etc)
8
#' @param output.prefix a prefix to prepend to all output files
9
#' @export
10
PlotATLANTISresults <- function(fi, ATLANTIS_Summary, code.dir, output.dir, annTable.file, output.prefix = NULL){
11
  
12
  if(is.character(fi) == TRUE){
13
    load(fi)
14
  }
15
16
  if(is.character(ATLANTIS_Summary) == TRUE){
17
    load(ATLANTIS_Summary)
18
  }
19
20
  #read in annTable file stored in libdir
21
  
22
  annTable <- read.table(annTable.file,header=TRUE,comment.char="",sep="\t", fill=FALSE,stringsAsFactors = FALSE, check.names = FALSE,row.names=1)
23
  
24
  #get analysisID, targetID, targetVec, predMat from ATLANTIS_Summary
25
  targetVec <- fi$targetVec
26
  predMat <- fi$predData
27
  targetID <- ATLANTIS_Summary$targetID
28
  analysisID <- ATLANTIS_Summary$analysisID
29
  additionalFeatures <- ATLANTIS_Summary$additionalFeatures
30
31
  if(is.null(output.prefix)) {
32
    output.prefix = targetID
33
  }
34
  
35
  # create plots
36
  if(!is.null(output.dir)) {
37
    plots.file <- paste(output.prefix, '_plots.pdf', sep='') 
38
    pdf(file.path(output.dir, plots.file), w=11*1.5, h=8.5*1.5)
39
  }
40
 
41
  report.summary <- fi$report.summary
42
43
  for (nominalScale in c(F, T)) {
44
    for (orderByPred in c(F,T)) {
45
      tmp <- plots.plotModel(fi, targetVec, targetID, 
46
                         predMat, annTable,
47
                         maxNumPreds=20,
48
                         report.summary,
49
                         orderByPred=orderByPred,
50
                         additionalFeatures=additionalFeatures, nominalScale=nominalScale)
51
    } 
52
  }
53
54
  if(!is.null(output.dir)) {
55
    dev.off()
56
  }
57
}
58
59
# draw text "msg" scaling to fit in the specified bounding box
60
drawBoundedText <- function(msg, left, bottom, right, top) {
61
  target.width <- right - left;
62
  target.height <- top - bottom;
63
  scale <- min(target.width/strwidth(msg), target.height/strheight(msg))
64
  text(left, bottom, msg, cex=scale, adj=c(0, 0))
65
}
66
67
# generates a plot with a summary of predictive variables used in gbm model
68
# fi - fit.info of model
69
# targetVec - target vector (from targetMat)
70
# targetID - the ID of the target variable (its name in targetMat)
71
# maxNumPreds - number of features to plot
72
plots.plotModel <- function(fi, targetVec, targetID, predMat, annTable, maxNumPreds, report.summary, ...) {
73
  library(caTools) # for colAUC
74
  
75
  # top 20 features, ordered by cluster & contribution of their clusters
76
  # head(used.features.inf[order(used.features.inf[1:20,"clusterRank"]), ], 20)
77
  
78
  # can move the following to model creation GBM too
79
  predProfile <- fi$OOB$prediction
80
  
81
  model_quality <- list()
82
  model_quality$value <- fi$OOB$quality
83
  model_quality$metric <- fi$OOB$metric
84
  
85
  model.features.ordered <- fi$varImp
86
  
87
# convert targetVec to numeric binary vector if is factor
88
  if (fi$modelType == "Classification") {
89
    targetVec <- ifelse(as.numeric(targetVec) == 1, 0, 1)  
90
  }
91
92
  plots.predictorsBars(targetVec, predMat, model.features.ordered, 
93
                       targetID, model_quality, annTable=annTable,
94
                       report.summary=report.summary,
95
                       predProfile=predProfile, 
96
                       maxNumPreds=maxNumPreds, ...)
97
}
98
99
100
101
102
# model.features - relative influence for predictors.  a named vector
103
# predProfile - predictions of the targetVec, to be plotted on-top
104
# model_quality - a list. $metric is a string ("R^2" or "AUC"), $value holds the value
105
plots.predictorsBars <- function(depProfile, predMat, model.features, gsID, model_quality,
106
                                 annTable, report.summary,
107
                                 additionalFeatures=NULL, maxNumPreds=10, predProfile=NULL,
108
                                 orderByPred=FALSE, nominalScale=F) {
109
110
  # subset influence table to include only top predictive features (whose influence sum to 50)
111
  ## order.by.inf <- order(model.features[, "rel.inf"], decreasing=T)
112
  ## model.features <- model.features[order.by.inf, ]
113
  
114
  # how many features are used by the model?
115
  numModelFeatures <- length(model.features)
116
  
117
  #lastPredToPlot <- min(max(which(cumsum(model.features[,"rel.inf"]) < 50)) + 1, maxNumPreds)
118
  lastPredToPlot <- min(numModelFeatures, maxNumPreds)
119
  
120
  
121
  # we're going to plot only these predictors
122
  features.to.plot <- model.features[1:lastPredToPlot]
123
  stopifnot(all(names(features.to.plot) %in% colnames(predMat)))
124
  feat.data <- predMat[, names(features.to.plot), drop=F]
125
  
126
  # plot barplots for all predictors, target GS, influence weights too
127
  num.feats <- length(features.to.plot)
128
  if (orderByPred) {
129
    samp.ord <- order(predProfile, depProfile)  # the order of the samples in the plots
130
  } else {
131
    samp.ord <- order(depProfile, predProfile)  # order by true then prediction
132
  }
133
  layout(matrix(c(num.feats+4, rep(1, num.feats+2), 2, num.feats+5, 3:(num.feats+3)), ncol=2), 
134
         width=c(1, 5),
135
         heights=c(3, 0.5, rep(1, num.feats), 1.5))
136
  
137
  # plot rel.inf values
138
  op <- par(mar = c(3, 2, 0, 0))
139
  
140
  featCorToTarget <- cor(feat.data, 
141
                         as.numeric(depProfile), 
142
                         use="pairwise.complete.obs", 
143
                         method='spearman')
144
  cols <- rev(c("goldenrod", "deeppink3")[as.numeric(featCorToTarget > 0)+1])
145
  
146
  barplot(rev(features.to.plot), horiz=T, col=cols, names.arg="")
147
  par(op)
148
  
149
  ######
150
  # plot response variable
151
  ######
152
  # figure out chr loci for response varibale
153
  responseHUGO <- annTable[gsID, "HUGOsymbol"]
154
  responseChrLoci <- annTable[gsID, "ChrLoci"]
155
  gsLabel <- paste(gsID, "\n", responseChrLoci, sep='')
156
  
157
  cols <- ifelse(depProfile[samp.ord] < -2.0, "red", "grey")
158
  op <- par(mar = c(0, 2, 1, 13)) # c(bottom, left, top, right) 
159
  mp <- barplot(depProfile[samp.ord], names.arg="", axes=T, col=cols, border=NA)
160
  points(mp, predProfile[samp.ord], pch='+', col="palegreen4", type='l', lwd=2)
161
  mtext(gsLabel, side=4, las=1, cex=0.9)
162
  par(op)
163
  
164
  # plot barplots for predictive features
165
  uniqChrLoci <- unique(annTable[names(features.to.plot), "ChrLoci"], border=NA)
166
  chrBands <- sub("\\..+$", "", uniqChrLoci)
167
  names(chrBands) <- uniqChrLoci
168
  
169
  colMap <- plots.colorChrLoc(chrBands[annTable[names(features.to.plot), "ChrLoci"]])
170
  for (i in 1:num.feats) {
171
    featID <- names(features.to.plot)[i]
172
    plotdata <- feat.data[samp.ord, i]
173
    if (annTable[featID, "VarType"] == "CN") {
174
      # for CN features
175
      # color amps in red, dels in blue
176
      cols <- ifelse(plotdata > 0.5, "red", "grey")
177
      cols[plotdata < -0.5] <- "blue"
178
      ylim=c(-1,1)
179
    }
180
    if (annTable[featID, "VarType"] %in% c("MUT", "Mmis", "Mbad", "Mcos", "Mall", "RsMmis", "RsMnon")) {
181
      # color mutations in purple
182
      f <- feat.data[samp.ord, i]
183
      stopifnot(all(f %in% c(0,1,2) | is.na(f)))
184
      cols <- c("grey", "sienna4", "darkorange")[f+1]
185
      ylim=c(0,2)      
186
    }
187
    if (annTable[featID, "VarType"] %in% c("ACH", "GS", "SS")) {
188
      # color high dependency in red
189
      cols <- ifelse(feat.data[samp.ord, i] > 0.44, "orangered3", "grey")
190
      cols[plotdata < -0.44] <- "seagreen3"
191
      ylim=c(-2,2)      
192
    }
193
    if (annTable[featID, "VarType"] %in% c("Exp", "GSE", "CTD", "mirE")) {
194
      # transform data to robust Z
195
      z.plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T)
196
      if(!nominalScale) {
197
        plotdata <- z.plotdata
198
      }
199
      # color high expression
200
      cols <- ifelse(z.plotdata > 2, "orangered3", "grey")
201
      cols[z.plotdata < -2] <- "seagreen3"
202
      ylim=c(-3,3)      
203
    }
204
    if (annTable[featID, "VarType"] == "SI") {
205
      if (length(unique(feat.data[,i])) <= 3)  { # binary + NA
206
        cols <- ifelse(feat.data[samp.ord, i] > 0, "orange3", "grey")
207
        ylim=c(0,1)      
208
      } else {
209
        ylim <- NULL
210
        cols <- NULL
211
      }
212
    }
213
    if (annTable[featID, "VarType"] == "San") {
214
      # transform data to robust Z
215
      plotdata <- (plotdata - median(plotdata, na.rm=T)) / mad(plotdata, na.rm=T)      
216
      # color high expression
217
      cols <- ifelse(plotdata > 2, "orangered3", "grey")
218
      cols[plotdata < -2] <- "seagreen3"
219
      ylim=c(-3,3)      
220
#       # color high sensitivity in red
221
#       cols <- ifelse(feat.data[samp.ord, i] < 0.3, "orangered3", "grey")
222
#       ylim=c(0,1)      
223
    }
224
225
feature.barplot <- function(plotdata, axes.range, cols, ylim) {
226
  #par(oma=rep(1,4))
227
  #par(oma=c(1,0,1,0))
228
  orig.mar <- par()$mar
229
  par(mar=c(1,0,1,0)+orig.mar) 
230
  par(mgp=c(0,-2,-3))
231
232
  r.range <- round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0)
233
  if(is.null(ylim)) {
234
    ylim <- c( min(c(plotdata, r.range[1]), na.rm=T), max(c(plotdata, r.range[2]), na.rm=T) )
235
  }
236
237
  if(any(is.infinite(ylim))) {
238
     # hack -- ylim is infinite in some case. Need to investigate where how this happens
239
     ylim <- c(-1e10, 1e10)
240
     warning("ylim was infinite so skipping this bar")
241
  } else {
242
243
    barplot(plotdata, names.arg="", axes=F, col=cols, ylim=ylim, border=NA)
244
    if(!is.null(axes.range)) {
245
      axes.range <-round( c(min(plotdata,na.rm=T), max(plotdata,na.rm=T)), 0)
246
  #    axis(ifelse((i %% 2) == 1,2,4), axes.range, las=2)
247
      axis(2, axes.range, las=2)
248
    }
249
  }
250
251
  par(mar=orig.mar)
252
}
253
    
254
    op <- par(mar = c(0, 2, 0, 13)) # c(bottom, left, top, right)
255
    axes.range <- NULL
256
#    if(nominalScale) { 
257
      ylim <- NULL 
258
      axes.range = pretty(plotdata, n=1)
259
#    }
260
    feature.barplot(plotdata, axes.range, cols, ylim)
261
262
    if (annTable[featID, "VarType"] %in% c("GSE", "SI")) {
263
      first.line.text <- substr(featID,1,15)
264
      second.line.text <- paste(substr(featID,16,32), "\n", substr(featID,33,49), sep='')
265
    } else {
266
      first.line.text <- substr(featID,1,25)
267
      second.line.text <- annTable[featID, "ChrLoci"]    
268
    }
269
    
270
    feature.label <- paste(first.line.text, "\n", second.line.text, sep='')
271
    mtext(feature.label, side=4, las=1, col=colMap[chrBands[annTable[featID, "ChrLoci"]]], cex=0.9) 
272
    par(op)
273
  }
274
  
275
# plot text on top left corner
276
  op <- par(mar = rep(0, 4))
277
  plot.new()
278
#  text(0.85, 0.5, pos=4, paste(report.summary, collapse="\n"))
279
  par(op)
280
  
281
  # model info in the top left corner
282
  op <- par(mar = rep(0, 4))
283
  plot.new()
284
  
285
  drawBoundedText(paste("CV", model_quality$metric, "=", format(model_quality$value, digits=2)), 0, 0.9, 1, 1)
286
  drawBoundedText(paste(report.summary, collapse="\n"), 0, 0, 1, 0.85)
287
  par(op)
288
  
289
  list(feat.data=feat.data, samp.ord=samp.ord)
290
}
291
292
293
# given a vector of strings containing chromosomal locations, assign
294
# a color to each one so that chromosomal loci that are represented more
295
# than once get colored in the same color.
296
plots.colorChrLoc <- function(chr.loci, def.color="black") {
297
  library(RColorBrewer)
298
  
299
  freqs <- table(chr.loci)
300
  num.unique <- length(freqs)
301
  cols <- rep(def.color, num.unique)
302
  names(cols) <- names(freqs)
303
  
304
  repeats <- names(freqs)[freqs >= 2]
305
  num.repeats <- length(repeats)
306
  cols[repeats] <- brewer.pal(8, "Set1")[1:length(repeats)]
307
  
308
  cols
309
}
310