|
a |
|
b/createBulkAtlantisFlock.R |
|
|
1 |
library(gridExtra) |
|
|
2 |
library(foreach) |
|
|
3 |
library(atlantis) |
|
|
4 |
|
|
|
5 |
createBulkAtlantisFlock <- function(analysis.description, run_mode, predMat.file, targetMat.file, anno.file, featureSetToUse, kMinNumSamples=50, seed=300, batch.size=1, predFeaturesPerTarget=NULL, fitControlSettings="default", config.summary=NULL, predictOnly=NULL, permutation.count=NULL, only.permutations=F, |
|
|
6 |
code.dir=".", |
|
|
7 |
test.fraction=NA, |
|
|
8 |
predFeatureRegExps=NULL) { |
|
|
9 |
# analysis.description: a description which will be saved in the run directory |
|
|
10 |
# run_mode: ignored |
|
|
11 |
# mustHave: a vector of dataset ids which must have a value present to be included in the prediction matrix |
|
|
12 |
# niceToHave: a vector of dataset ids to include in the prediction matrix, but these may have missing values in the prediction matrix |
|
|
13 |
# featureSetToUse: one of "all", "perTarget", "perTargetAndSelf" or "single" determining how to subset the prediction matrix when |
|
|
14 |
# for a given target. |
|
|
15 |
# predFeaturesPerTarget: if featureSetToUse = "perTarget" or "perTargetAndSelf" then this should be a dataframe with two columns: c("target", "partner") which will be used to look up which genes to use as predicitive features based on a given target |
|
|
16 |
|
|
|
17 |
if (featureSetToUse == "perTarget" || featureSetToUse == "perTargetAndSelf") { |
|
|
18 |
stopifnot(!is.null(predFeaturesPerTarget) && is.data.frame(predFeaturesPerTarget) && all(colnames(predFeaturesPerTarget) == c("target", "partner"))) |
|
|
19 |
} else if(featureSetToUse == "perGroup") { |
|
|
20 |
stopifnot(!is.null(predFeaturesPerTarget) && is.data.frame(predFeaturesPerTarget) && all(colnames(predFeaturesPerTarget) == c("group", "target", "group.name"))) |
|
|
21 |
} |
|
|
22 |
|
|
|
23 |
# flock_run_dir is the name of the directory of where the results will be written |
|
|
24 |
# the analysis.ID is the trailing dir name |
|
|
25 |
analysis.ID <- basename(flock_run_dir) |
|
|
26 |
output.dir <- paste(flock_run_dir, '/results/', sep='') |
|
|
27 |
dir.create(output.dir) |
|
|
28 |
|
|
|
29 |
# cache the functional related genes if we'll need them |
|
|
30 |
predFeaturesPerTarget.file <- NULL |
|
|
31 |
if(featureSetToUse == "perTarget" || featureSetToUse == "perTargetAndSelf" || featureSetToUse == "perGroup") { |
|
|
32 |
stopifnot(!is.null(predFeaturesPerTarget)) |
|
|
33 |
|
|
|
34 |
predFeaturesPerTarget.file <- paste(output.dir, '/featuresPerTarget.Rdata', sep="") |
|
|
35 |
save(predFeaturesPerTarget, file=predFeaturesPerTarget.file) |
|
|
36 |
} |
|
|
37 |
|
|
|
38 |
# load the target matrix to determine the targets |
|
|
39 |
load(targetMat.file); |
|
|
40 |
stopifnot(is.matrix(targetMat) || is.data.frame(targetMat)) |
|
|
41 |
targets <- colnames(targetMat); |
|
|
42 |
if(is.null(permutation.count)) { |
|
|
43 |
permutation.count <- length(targets) |
|
|
44 |
} |
|
|
45 |
|
|
|
46 |
if(featureSetToUse == "perGroup") { |
|
|
47 |
# then we want to create a target per member in our predFeaturesPerTarget table |
|
|
48 |
with.valid.targets <- predFeaturesPerTarget[paste("GS_",predFeaturesPerTarget$target,sep='') %in% targets,,drop=F] |
|
|
49 |
targets <- lapply(seq(nrow( with.valid.targets)), function(i) { |
|
|
50 |
x<-with.valid.targets[i,]; |
|
|
51 |
list(ID=paste("GS_",as.character(x$target),sep=''), |
|
|
52 |
gene=as.character(x$target), |
|
|
53 |
group=as.character(x$group), |
|
|
54 |
output.prefix=paste(x$group,'-',x$target,sep=''), |
|
|
55 |
group.name=as.character(x$group.name)) |
|
|
56 |
} ) |
|
|
57 |
stopifnot(! ("GS_NA" %in% sapply(targets, function(t){t$ID}))) |
|
|
58 |
} else { |
|
|
59 |
# filter out targets which have a single output value |
|
|
60 |
print("before filtering out singles") |
|
|
61 |
print(length(targets)) |
|
|
62 |
is.bad.target <- sapply(seq(dim(targetMat)[[2]]), function(i) { t<-targetMat[,i]; t<-t[!is.na(t)]; length(unique(t))==1 } ) |
|
|
63 |
targets <- targets[!is.bad.target] |
|
|
64 |
targets <- lapply(targets, function(t) { list(ID=t, output.prefix=t) }) |
|
|
65 |
print("after filtering out targets with a single target value") |
|
|
66 |
print(length(targets)) |
|
|
67 |
stopifnot(grep("GS_NA", targets)!=0) |
|
|
68 |
} |
|
|
69 |
|
|
|
70 |
common <- list( |
|
|
71 |
fitControlSettings=fitControlSettings, |
|
|
72 |
targetMat.file=targetMat.file, |
|
|
73 |
predMat.file=predMat.file, |
|
|
74 |
anno.file=anno.file, |
|
|
75 |
output.dir=output.dir, |
|
|
76 |
featureSetToUse=featureSetToUse, |
|
|
77 |
kMinNumSamples=kMinNumSamples, |
|
|
78 |
predFeaturesPerTarget.file=predFeaturesPerTarget.file, |
|
|
79 |
seed=seed, |
|
|
80 |
quality.dist.file=paste(output.dir, "null_quality_distribution.txt", sep=""), |
|
|
81 |
config.summary=config.summary, |
|
|
82 |
predictOnly=predictOnly, |
|
|
83 |
permutation.count=permutation.count, |
|
|
84 |
only.permutations=only.permutations, |
|
|
85 |
batch.size=batch.size, |
|
|
86 |
predFeatureRegExps=predFeatureRegExps |
|
|
87 |
) |
|
|
88 |
|
|
|
89 |
sources <- c("createBulkAtlantisFlock.R") |
|
|
90 |
|
|
|
91 |
common$sources <- sources |
|
|
92 |
|
|
|
93 |
# add in runs for each permuted version used for the null |
|
|
94 |
permutations <- sample(targets, permutation.count, replace=T) |
|
|
95 |
for(i in seq_along(permutations)) { |
|
|
96 |
permutations[[i]]$permute.seed <- i + seed |
|
|
97 |
} |
|
|
98 |
|
|
|
99 |
if(common$only.permutations) { |
|
|
100 |
tasks <- permutations |
|
|
101 |
} else { |
|
|
102 |
tasks <- c(targets, permutations) |
|
|
103 |
} |
|
|
104 |
|
|
|
105 |
tasks <- split(tasks, as.integer(seq(length(tasks))/common$batch.size)) |
|
|
106 |
|
|
|
107 |
if(!is.na(test.fraction)) { |
|
|
108 |
task.count <- as.integer(ceiling(test.fraction * length(tasks))) |
|
|
109 |
cat("Only running", task.count, "tasks out of", length(tasks), "because test.fraction was set to", test.fraction, "\n") |
|
|
110 |
tasks <- tasks [ seq(task.count) ] |
|
|
111 |
} |
|
|
112 |
|
|
|
113 |
flock.run(tasks, task_function='per_target', gather_function='gather', sources=common$sources, flock_common_state=common) |
|
|
114 |
} |
|
|
115 |
|
|
|
116 |
prev.proc.time <- NULL |
|
|
117 |
|
|
|
118 |
time.checkpoint <- function(label) { |
|
|
119 |
# now <- proc.time() |
|
|
120 |
# if(!is.null(prev.proc.time)) { |
|
|
121 |
# cat(sprintf("%s: %f\n", label, (now["elapsed"] - prev.proc.time["elapsed"]))) |
|
|
122 |
# } else { |
|
|
123 |
# cat(sprintf("%s: first\n", label)) |
|
|
124 |
# } |
|
|
125 |
# prev.proc.time <<- now |
|
|
126 |
} |
|
|
127 |
|
|
|
128 |
# given a gene symbol, returns a list of functionaly related genes based on dataframe which was persisted |
|
|
129 |
get.related.genes <- function(predFeaturesPerTarget, targetToLookup, target, include.self) { |
|
|
130 |
matches <- as.character(predFeaturesPerTarget$target) == targetToLookup |
|
|
131 |
partners <- as.character(predFeaturesPerTarget[matches, "partner"]) |
|
|
132 |
if(include.self) { |
|
|
133 |
ret <- unique(c(target, partners)) |
|
|
134 |
} else { |
|
|
135 |
ret <- setdiff(unique(partners), target) |
|
|
136 |
} |
|
|
137 |
|
|
|
138 |
return(ret) |
|
|
139 |
# sapply(ret, getFeatureNameFromFeatureID) |
|
|
140 |
} |
|
|
141 |
|
|
|
142 |
get.gene.group <- function(predFeaturesPerTarget, groupToLookup, target, include.self) { |
|
|
143 |
partners <- as.character(predFeaturesPerTarget[predFeaturesPerTarget$group == groupToLookup, "target"]) |
|
|
144 |
if(include.self) { |
|
|
145 |
ret <- unique(c(target, partners)) |
|
|
146 |
} else { |
|
|
147 |
ret <- setdiff(unique(partners), target) |
|
|
148 |
} |
|
|
149 |
return(ret) |
|
|
150 |
# sapply(ret, getFeatureNameFromFeatureID) |
|
|
151 |
} |
|
|
152 |
|
|
|
153 |
determine.feature.genes <- function(target, featureSetToUse, predFeaturesPerTarget.file) { |
|
|
154 |
if(!is.null(predFeaturesPerTarget.file)) { |
|
|
155 |
load(predFeaturesPerTarget.file) |
|
|
156 |
} |
|
|
157 |
|
|
|
158 |
if (featureSetToUse == "all") { |
|
|
159 |
genes <- NULL |
|
|
160 |
} else if (featureSetToUse == "perTarget" || featureSetToUse == "perTargetAndSelf") { |
|
|
161 |
targetName <- getFeatureNameFromFeatureID(target$ID) |
|
|
162 |
stopifnot(!is.null(targetName)) |
|
|
163 |
genes <- get.related.genes(predFeaturesPerTarget, targetName, targetName, featureSetToUse == "perTargetAndSelf") |
|
|
164 |
cat("found", length(genes), "gene names to be used as features for", targetName, "\n") |
|
|
165 |
} else if (featureSetToUse == "perGroup") { |
|
|
166 |
stopifnot(all(colnames(predFeaturesPerTarget) == c("group", "target", "group.name"))) |
|
|
167 |
group <- target$group |
|
|
168 |
stopifnot(! is.null(group)) |
|
|
169 |
stopifnot(! is.null(target$gene)) |
|
|
170 |
genes <- get.gene.group(predFeaturesPerTarget, group, target$gene, FALSE) |
|
|
171 |
} else { |
|
|
172 |
stopifnot(featureSetToUse == "single") |
|
|
173 |
targetName <- getFeatureNameFromFeatureID(target$ID) |
|
|
174 |
genes <- targetName |
|
|
175 |
stopifnot(!is.null(genes)) |
|
|
176 |
} |
|
|
177 |
|
|
|
178 |
return(genes) |
|
|
179 |
} |
|
|
180 |
|
|
|
181 |
per_target <- function(per.task.state, common.state, output.file, run) { |
|
|
182 |
library(foreach) |
|
|
183 |
|
|
|
184 |
flock_run_dir <- run$dir |
|
|
185 |
|
|
|
186 |
analysis.dir <- basename(flock_run_dir); |
|
|
187 |
analysis.ID <- analysis.dir; |
|
|
188 |
output.dir <- flock_common_state$output.dir |
|
|
189 |
predMat.file <- flock_common_state$predMat.file |
|
|
190 |
targetMat.file <- flock_common_state$targetMat.file |
|
|
191 |
anno.file <- flock_common_state$anno |
|
|
192 |
kMinNumSamples <- flock_common_state$kMinNumSamples |
|
|
193 |
predFeaturesPerTarget.file <- flock_common_state$predFeaturesPerTarget.file |
|
|
194 |
config.summary <- flock_common_state$config.summary |
|
|
195 |
predFeatureRegExps <- flock_common_state$predFeatureRegExps |
|
|
196 |
|
|
|
197 |
fitControlSettings = flock_common_state$fitControlSettings |
|
|
198 |
|
|
|
199 |
# the following figures out what are the 100 most similar genes to target gene and uses only them |
|
|
200 |
# for prediction. |
|
|
201 |
#### |
|
|
202 |
targetMat.file <- flock_common_state$targetMat.file |
|
|
203 |
load(targetMat.file) |
|
|
204 |
|
|
|
205 |
featureSetToUse <- flock_common_state$featureSetToUse |
|
|
206 |
|
|
|
207 |
fit.single.target <- function(target) { |
|
|
208 |
# allow target to be specified as either a string or a list of (ID, group) |
|
|
209 |
if(is.character(target)) { |
|
|
210 |
target <- list(ID=target, targetUsedToLookupRelated=target, permute.seed=NULL) |
|
|
211 |
} |
|
|
212 |
|
|
|
213 |
targetID <- target$ID |
|
|
214 |
permute.seed <- target$permute.seed |
|
|
215 |
|
|
|
216 |
print(targetID) |
|
|
217 |
cat(sprintf("Generating model for %s\n", targetID)) |
|
|
218 |
set.seed(flock_common_state$seed) |
|
|
219 |
|
|
|
220 |
if (is.numeric(targetID)) { |
|
|
221 |
targetID <- colnames(targetMat)[targetID] |
|
|
222 |
} |
|
|
223 |
|
|
|
224 |
stopifnot(length(grep("NO_CURRENT", targetID)) == 0) |
|
|
225 |
|
|
|
226 |
permuteRows <- function(seed, targetMat) { |
|
|
227 |
saved <- .Random.seed |
|
|
228 |
set.seed(seed) |
|
|
229 |
permutedTargetMat <- targetMat |
|
|
230 |
for(i in seq(ncol(targetMat))) { |
|
|
231 |
permutedTargetMat[,i] = sample(targetMat[,i]) |
|
|
232 |
} |
|
|
233 |
.Random.seed <- saved |
|
|
234 |
permutedTargetMat |
|
|
235 |
} |
|
|
236 |
|
|
|
237 |
if(featureSetToUse == "all") { |
|
|
238 |
predFeatureRegExps <- ".*" |
|
|
239 |
genes <- NULL |
|
|
240 |
} else { |
|
|
241 |
genes <- determine.feature.genes(target, featureSetToUse, predFeaturesPerTarget.file) |
|
|
242 |
} |
|
|
243 |
|
|
|
244 |
output.prefix <- target$output.prefix |
|
|
245 |
group.name <- target$group.name |
|
|
246 |
if(is.null(group.name)) { |
|
|
247 |
group.name <- NA |
|
|
248 |
} else { |
|
|
249 |
config.summary <- c(config.summary, paste("group:", group.name)) |
|
|
250 |
} |
|
|
251 |
|
|
|
252 |
# if this is for the null distribution |
|
|
253 |
if(!is.null(permute.seed)) { |
|
|
254 |
permutedTargetMat <- permuteRows(permute.seed, targetMat) |
|
|
255 |
|
|
|
256 |
res <- runATLANTIS( |
|
|
257 |
analysis.ID=analysis.ID, |
|
|
258 |
targetID=targetID, |
|
|
259 |
output.prefix=paste(output.prefix, "-NULL-",permute.seed, sep=''), |
|
|
260 |
makePlot=FALSE, |
|
|
261 |
output.dir = paste(dirname(output.dir), '/temp', sep=''), |
|
|
262 |
fitControlSettings = fitControlSettings, |
|
|
263 |
additionalFeatures=NULL, |
|
|
264 |
predFeatureNamesToUse=genes, |
|
|
265 |
predFeatureRegExps=predFeatureRegExps, |
|
|
266 |
predMat.file = predMat.file, |
|
|
267 |
targetMat = permutedTargetMat, |
|
|
268 |
anno.file=anno.file, |
|
|
269 |
kMinNumSamples=kMinNumSamples, |
|
|
270 |
save.params=F, |
|
|
271 |
save.model=F, |
|
|
272 |
save.featureData=F) |
|
|
273 |
} else { |
|
|
274 |
res <- runATLANTIS( |
|
|
275 |
analysis.ID=analysis.ID, |
|
|
276 |
targetID=targetID, |
|
|
277 |
makePlot=TRUE, |
|
|
278 |
output.dir = output.dir, |
|
|
279 |
output.prefix=output.prefix, |
|
|
280 |
fitControlSettings = fitControlSettings, |
|
|
281 |
additionalFeatures=NULL, |
|
|
282 |
predFeatureNamesToUse=genes, |
|
|
283 |
predFeatureRegExps=predFeatureRegExps, |
|
|
284 |
predMat.file = predMat.file, |
|
|
285 |
targetMat = targetMat, |
|
|
286 |
anno.file=anno.file, |
|
|
287 |
kMinNumSamples=kMinNumSamples, |
|
|
288 |
save.params=F, |
|
|
289 |
save.model=F, |
|
|
290 |
report.summary=config.summary, |
|
|
291 |
predictOnly=flock_common_state$predictOnly |
|
|
292 |
) |
|
|
293 |
} |
|
|
294 |
|
|
|
295 |
list(targetID=targetID, |
|
|
296 |
result=res, |
|
|
297 |
permute.seed=permute.seed, |
|
|
298 |
output.prefix=output.prefix, |
|
|
299 |
group.name=group.name) |
|
|
300 |
|
|
|
301 |
} |
|
|
302 |
|
|
|
303 |
task.results <- lapply(flock_per_task_state, fit.single.target) |
|
|
304 |
|
|
|
305 |
save(task.results, file=output.file) |
|
|
306 |
} |
|
|
307 |
|
|
|
308 |
zscore <- function(x) { |
|
|
309 |
(x-mean(x, na.rm=T))/sd(x, na.rm=T) |
|
|
310 |
} |
|
|
311 |
|
|
|
312 |
|
|
|
313 |
gather <- function(per.task.state, common.state, output.file, run) { |
|
|
314 |
task.results <- do.call(c, lapply(flock_per_task_state, function(job.details) { |
|
|
315 |
e <- new.env() |
|
|
316 |
load(job.details$flock_output_file, envir=e) |
|
|
317 |
e$task.results |
|
|
318 |
})) |
|
|
319 |
|
|
|
320 |
is.permuted <- sapply(task.results, function(x) { !is.null(x$permute.seed) } ) |
|
|
321 |
|
|
|
322 |
null.dist.details <- foreach(task.result=task.results[is.permuted], .combine=rbind) %do% { |
|
|
323 |
load(task.result$result$fit.file) |
|
|
324 |
if(!is.null(fit.info$OOB$weighted.cor)) { |
|
|
325 |
data.frame(quality=fit.info$OOB$quality, weighted.cor=fit.info$OOB$weighted.cor, weighted.cor.R2=fit.info$OOB$weighted.cor.R2, with.more.weight=sum(min(fit.info$OOB$weights) != fit.info$OOB$weights), sample.count=length(fit.info$targetVec), failure.reason=NA, targetID=task.result$targetID, nfeatures=fit.info$OOB$nfeatures) |
|
|
326 |
} else { |
|
|
327 |
data.frame(quality=NA, weighted.cor=NA, weighted.cor.R2=NA, with.more.weight=NA, sample.count=NA, failure.reason=fit.info$failure.reason, targetID=task.result$targetID, nfeatures=NA) |
|
|
328 |
} |
|
|
329 |
} |
|
|
330 |
|
|
|
331 |
null.distribution.values <- na.omit(null.dist.details$quality[!is.na(null.dist.details$quality)]) |
|
|
332 |
pval.from.null.distribution <- function(x) { (sum(null.distribution.values >= x)+1)/(length(null.distribution.values)+1) } |
|
|
333 |
|
|
|
334 |
write.table(null.distribution.values, file=flock_common_state$quality.dist.file, col.names=F, row.names=F) |
|
|
335 |
write.csv(null.dist.details, file=paste(flock_common_state$quality.dist.file,"-details.csv",sep=''),row.names=F) |
|
|
336 |
|
|
|
337 |
# summarize the runs into a table |
|
|
338 |
|
|
|
339 |
fits <- lapply(task.results[!is.permuted], function(job) { |
|
|
340 |
if(!is.null(job$result) && !is.null(job$result$fit.file)) { |
|
|
341 |
load(job$result$fit.file) |
|
|
342 |
# reduce memory by dropping unused fields |
|
|
343 |
top.feature <- names(which.max(fit.info$varImp)) |
|
|
344 |
if(!is.null(top.feature)) { |
|
|
345 |
top.feature.values <- fit.info$predData[,top.feature] |
|
|
346 |
wt.cor.top.feature <- atlantis:::weighted.cor(top.feature.values, fit.info$targetVec, fit.info$OOB$weights) |
|
|
347 |
target.top.pred.corr <- cor(fit.info$targetVec, top.feature.values, use='pairwise.complete') |
|
|
348 |
lines.with.more.weight <- which(min(fit.info$OOB$weights) < fit.info$OOB$weights) |
|
|
349 |
sensitive.lines.zmean.top.feature <- mean(zscore(top.feature.values)[lines.with.more.weight], na.rm=T) |
|
|
350 |
} else { |
|
|
351 |
target.top.pred.corr <- NA |
|
|
352 |
wt.cor.top.feature <- NA |
|
|
353 |
sensitive.lines.zmean.top.feature <- NA |
|
|
354 |
} |
|
|
355 |
fit.info$top.feature <- top.feature |
|
|
356 |
fit.info$target.top.pred.corr <- target.top.pred.corr |
|
|
357 |
fit.info$wt.cor.top.feature <- wt.cor.top.feature |
|
|
358 |
fit.info$sensitive.lines.zmean.top.feature <- sensitive.lines.zmean.top.feature |
|
|
359 |
fit.info$predData <- NULL |
|
|
360 |
fit.info$predictors <- NULL |
|
|
361 |
fit.info |
|
|
362 |
} else { |
|
|
363 |
NULL |
|
|
364 |
} |
|
|
365 |
}) |
|
|
366 |
|
|
|
367 |
taskResults <- task.results[!is.permuted] |
|
|
368 |
|
|
|
369 |
varsPerModel <- foreach(taskResult=taskResults, fit.info=fits, .combine=rbind) %do% { |
|
|
370 |
if(length(fit.info$varImp) == 0) { |
|
|
371 |
NULL |
|
|
372 |
} else { |
|
|
373 |
d <- data.frame(variable=names(fit.info$varImp), varImportance=fit.info$varImp, targetID=taskResult$targetID, group.name=taskResult$group.name) |
|
|
374 |
rownames(d) <- NULL |
|
|
375 |
d |
|
|
376 |
} |
|
|
377 |
} |
|
|
378 |
|
|
|
379 |
coerce.to.na <- function(x) { ifelse(is.null(x), NA, x) } |
|
|
380 |
|
|
|
381 |
qualPerModel <- foreach(taskResult=taskResults, fit.info=fits, .combine=rbind) %do% { |
|
|
382 |
targetID <- taskResult$targetID |
|
|
383 |
Rsquared <- coerce.to.na(fit.info$OOB$quality) |
|
|
384 |
if(!is.null(fit.info$failure.reason)) { |
|
|
385 |
Rsquared <- NA |
|
|
386 |
} |
|
|
387 |
target.top.pred.corr <- fit.info$target.top.pred.corr |
|
|
388 |
top.feature <- fit.info$top.feature |
|
|
389 |
wt.Rsquared <- fit.info$OOB$weighted.cor.R2 |
|
|
390 |
Pvalue <- pval.from.null.distribution(Rsquared) |
|
|
391 |
n <- names(fit.info$varImp[top.feature]) |
|
|
392 |
group.name <- taskResult$group.name |
|
|
393 |
if(is.null(group.name)) { |
|
|
394 |
group.name <- NA |
|
|
395 |
} |
|
|
396 |
output.prefix <- taskResult$output.prefix |
|
|
397 |
target.min <- min(fit.info$targetVec, na.rm=T) |
|
|
398 |
sample.count <- length(fit.info$targetVec) |
|
|
399 |
with.more.weight <- sum(min(fit.info$OOB$weights) != fit.info$OOB$weights) |
|
|
400 |
nfeatures=fit.info$OOB$nfeatures |
|
|
401 |
if(is.null(nfeatures)) { |
|
|
402 |
nfeatures <- NA |
|
|
403 |
} |
|
|
404 |
print("nfeatures") |
|
|
405 |
print(nfeatures) |
|
|
406 |
|
|
|
407 |
#stopifnot(!is.null(Rsquared)) |
|
|
408 |
#stopifnot(!is.null(targetID)) |
|
|
409 |
|
|
|
410 |
data.frame(Rsquared=Rsquared, |
|
|
411 |
targetID=targetID, |
|
|
412 |
topFeature=coerce.to.na(n), |
|
|
413 |
Pvalue=coerce.to.na(Pvalue), |
|
|
414 |
topFeatureCor=target.top.pred.corr, |
|
|
415 |
targetMin=target.min, |
|
|
416 |
group.name=group.name, |
|
|
417 |
output.prefix=output.prefix, |
|
|
418 |
wt.Rsquared=coerce.to.na(wt.Rsquared), |
|
|
419 |
sample.count=sample.count, |
|
|
420 |
with.more.weight=with.more.weight, |
|
|
421 |
failure.reason=coerce.to.na(fit.info$failure.reason), |
|
|
422 |
nfeatures=nfeatures, |
|
|
423 |
sensitive.lines.zmean.top.feature=fit.info$sensitive.lines.zmean.top.feature, |
|
|
424 |
wt.cor.top.feature=fit.info$wt.cor.top.feature) |
|
|
425 |
} |
|
|
426 |
|
|
|
427 |
qualPerModel$percentile <- rank(qualPerModel$Rsquared,na.last=F)/nrow(qualPerModel) |
|
|
428 |
qualPerModel$fdr <- p.adjust(qualPerModel$Pvalue, method="fdr") |
|
|
429 |
|
|
|
430 |
cat("writing summary\n") |
|
|
431 |
write.csv(varsPerModel, file=paste(flock_run_dir,'/results/varsPerModel.csv',sep=''), row.names=F) |
|
|
432 |
write.csv(qualPerModel, file=paste(flock_run_dir,'/results/qualPerModel.csv',sep=''), row.names=F) |
|
|
433 |
|
|
|
434 |
annTable.file <- flock_common_state$anno.file |
|
|
435 |
|
|
|
436 |
# make report summary plot with: |
|
|
437 |
# 1. BRAF, KRAS, CTNNB1, ESR1 |
|
|
438 |
# 2. random 4 out of the top 20 models of that bulk run in terms of R^2 |
|
|
439 |
# 3. random 4 models with p-value > 0.1 |
|
|
440 |
|
|
|
441 |
if(length(qualPerModel) > 4) { |
|
|
442 |
|
|
|
443 |
sample.at.most <- function(v, n) { |
|
|
444 |
sample(v, min(length(v), n)) |
|
|
445 |
} |
|
|
446 |
|
|
|
447 |
randTop4 <- sample.at.most(qualPerModel$targetID[rank(-qualPerModel$Rsquared) <= 20], 4) |
|
|
448 |
randBad4 <- sample.at.most(qualPerModel$targetID[qualPerModel$Pvalue > 0.1], 4) |
|
|
449 |
|
|
|
450 |
add.atlantis.plots <- function(targetIds) { |
|
|
451 |
for(targetID in targetIds) { |
|
|
452 |
e <- new.env() |
|
|
453 |
fi <- sprintf("%s/results/%s_fit.info_cforest.Rdata", flock_run_dir, targetID) |
|
|
454 |
sum.file <- sprintf("%s/results/%s_ATLANTIS_Summary.Rdata", flock_run_dir, targetID) |
|
|
455 |
if(file.exists(fi) && file.exists(sum.file)) { |
|
|
456 |
load(fi, envir=e) |
|
|
457 |
PlotATLANTISresults(e$fit.info, sum.file, |
|
|
458 |
NA, NULL, annTable.file) |
|
|
459 |
} else { |
|
|
460 |
# write something to file to indicate target not present |
|
|
461 |
} |
|
|
462 |
} |
|
|
463 |
} |
|
|
464 |
|
|
|
465 |
pdf(file.path(flock_run_dir, 'results/summary.pdf'), w=11*1.5, h=8.5*1.5) |
|
|
466 |
pardefault <- par() |
|
|
467 |
grid.table(head(qualPerModel[order(qualPerModel$Rsquared, decreasing=T),],20)) |
|
|
468 |
par(pardefault) |
|
|
469 |
add.atlantis.plots(c("GS_BRAF", "GS_KRAS", "GS_CTNNB1", "GS_ESR1")) |
|
|
470 |
par(pardefault) |
|
|
471 |
add.atlantis.plots(randTop4) |
|
|
472 |
par(pardefault) |
|
|
473 |
add.atlantis.plots(randBad4) |
|
|
474 |
par(pardefault) |
|
|
475 |
if(length(null.distribution.values) > 0) { |
|
|
476 |
hist(null.distribution.values) |
|
|
477 |
} |
|
|
478 |
par(pardefault) |
|
|
479 |
dev.off() |
|
|
480 |
} |
|
|
481 |
} |
|
|
482 |
|
|
|
483 |
# given a feature ID (a compound string that includes a feature type and a feature name), |
|
|
484 |
# returns the feature name |
|
|
485 |
getFeatureNameFromFeatureID <- function(ID) { |
|
|
486 |
res <- strsplit(ID, "[_:]")[[1]] |
|
|
487 |
if (length(res) < 2) |
|
|
488 |
NULL |
|
|
489 |
else |
|
|
490 |
res[[2]] |
|
|
491 |
} |