|
a |
|
b/R/internal-functions-samr-adapted.R |
|
|
1 |
# This script contains customized versions of functions found in the samr |
|
|
2 |
# package. This is necessary because samr seems to have been abandoned, so an |
|
|
3 |
# upstream collaboration doesn't seem possible at the time of writing. |
|
|
4 |
# ATTENTION: The source code in this file is licensed under LGPL-3. |
|
|
5 |
|
|
|
6 |
# ============================================================================== |
|
|
7 |
# Constants |
|
|
8 |
# ============================================================================== |
|
|
9 |
samr.const.twoclass.unpaired.response <- "Two class unpaired" |
|
|
10 |
samr.const.twoclass.paired.response <- "Two class paired" |
|
|
11 |
samr.const.oneclass.response <- "One class" |
|
|
12 |
samr.const.quantitative.response <- "Quantitative" |
|
|
13 |
samr.const.multiclass.response <- "Multiclass" |
|
|
14 |
samr.const.twoclass.unpaired.timecourse.response <- |
|
|
15 |
"Two class unpaired timecourse" |
|
|
16 |
samr.const.twoclass.paired.timecourse.response <- "Two class paired timecourse" |
|
|
17 |
samr.const.oneclass.timecourse.response <- "One class timecourse" |
|
|
18 |
samr.const.survival.response <- "Survival" |
|
|
19 |
samr.const.patterndiscovery.response <- "Pattern discovery" |
|
|
20 |
|
|
|
21 |
# ============================================================================== |
|
|
22 |
# Functions |
|
|
23 |
# ============================================================================== |
|
|
24 |
|
|
|
25 |
#' @title Significance analysis of microarrays |
|
|
26 |
#' @description This function is an adaptation of `samr::samr` |
|
|
27 |
#' @param data Data object with components x- p by n matrix of features, one |
|
|
28 |
#' observation per column (missing values allowed); y- n-vector of outcome |
|
|
29 |
#' measurements; censoring.status- n-vector of censoring censoring.status |
|
|
30 |
#' (1= died or event occurred, 0=survived, or event was censored), needed for a |
|
|
31 |
#' censored survival outcome |
|
|
32 |
#' @param resp.type Problem type: "Quantitative" for a continuous parameter |
|
|
33 |
#' (Available for both array and sequencing data); "Two class unpaired" (for |
|
|
34 |
#' both array and sequencing data); "Survival" for censored survival outcome |
|
|
35 |
#' (for both array and sequencingdata); "Multiclass": more than 2 groups (for |
|
|
36 |
#' both array and sequencing data); "One class" for a single group (only for |
|
|
37 |
#' array data); "Two class paired" for two classes with paired observations |
|
|
38 |
#' (for both array and sequencing data); "Two class unpaired timecourse" (only |
|
|
39 |
#' for array data), "One class time course" (only for array data), |
|
|
40 |
#' "Two class.paired timecourse" (only for array data), or "Pattern discovery" |
|
|
41 |
#' (only for array data) |
|
|
42 |
#' @param assay.type Assay type: "array" for microarray data, "seq" for counts |
|
|
43 |
#' from sequencing |
|
|
44 |
#' @param s0 Exchangeability factor for denominator of test statistic; Default |
|
|
45 |
#' is automatic choice. Only used for array data. |
|
|
46 |
#' @param s0.perc Percentile of standard deviation values to use for s0; default |
|
|
47 |
#' is automatic choice; -1 means s0=0 (different from s0.perc=0, meaning |
|
|
48 |
#' s0=zeroeth percentile of standard deviation values= min of sd values. |
|
|
49 |
#' Only used for array data. |
|
|
50 |
#' @param nperms Number of permutations used to estimate false discovery rates |
|
|
51 |
#' @param center.arrays Should the data for each sample (array) be median |
|
|
52 |
#' centered at the outset? Default =FALSE. Only used for array data. |
|
|
53 |
#' @param testStatistic Test statistic to use in two class unpaired case.Either |
|
|
54 |
#' "standard" (t-statistic) or ,"wilcoxon" (Two-sample wilcoxon or Mann-Whitney |
|
|
55 |
#' test). Only used for array data. |
|
|
56 |
#' @param time.summary.type Summary measure for each time course: "slope", or |
|
|
57 |
#' "signed.area"). Only used for array data. |
|
|
58 |
#' @param regression.method Regression method for quantitative case: "standard", |
|
|
59 |
#' (linear least squares) or "ranks" (linear least squares on ranked data). |
|
|
60 |
#' Only used for array data. |
|
|
61 |
#' @param return.x Should the matrix of feature values be returned? Only useful |
|
|
62 |
#' for time course data, where x contains summaries of the features over time. |
|
|
63 |
#' Otherwise x is the same as the input data |
|
|
64 |
#' @param knn.neighbors Number of nearest neighbors to use for imputation of |
|
|
65 |
#' missing features values. Only used for array data. |
|
|
66 |
#' @param random.seed Optional initial seed for random number generator |
|
|
67 |
#' (integer) |
|
|
68 |
#' @param nresamp For assay.type="seq", number of resamples used to construct |
|
|
69 |
#' test statistic. Default 20. Only used for sequencing data. |
|
|
70 |
#' @param nresamp.perm For assay.type="seq", number of resamples used to |
|
|
71 |
#' construct test statistic for permutations. Default is equal to nresamp and it |
|
|
72 |
#' must be at most nresamp. Only used for sequencing data. |
|
|
73 |
#' @param xl.mode Used by Excel interface |
|
|
74 |
#' @param xl.time Used by Excel interface |
|
|
75 |
#' @param xl.prevfit Used by Excel interface |
|
|
76 |
#' @importFrom impute impute.knn |
|
|
77 |
sammy <- function( |
|
|
78 |
data, resp.type = c( |
|
|
79 |
"Quantitative", "Two class unpaired", |
|
|
80 |
"Survival", "Multiclass", "One class", "Two class paired", |
|
|
81 |
"Two class unpaired timecourse", "One class timecourse", |
|
|
82 |
"Two class paired timecourse", "Pattern discovery" |
|
|
83 |
), assay.type = c( |
|
|
84 |
"array", |
|
|
85 |
"seq" |
|
|
86 |
), s0 = NULL, s0.perc = NULL, nperms = 100, center.arrays = FALSE, |
|
|
87 |
testStatistic = c("standard", "wilcoxon"), time.summary.type = c( |
|
|
88 |
"slope", |
|
|
89 |
"signed.area" |
|
|
90 |
), regression.method = c("standard", "ranks"), |
|
|
91 |
return.x = FALSE, knn.neighbors = 10, random.seed = NULL, |
|
|
92 |
nresamp = 20, nresamp.perm = NULL, xl.mode = c( |
|
|
93 |
"regular", |
|
|
94 |
"firsttime", "next20", "lasttime" |
|
|
95 |
), xl.time = NULL, xl.prevfit = NULL) { |
|
|
96 |
this.call <- match.call() |
|
|
97 |
resp.type.arg <- match.arg(resp.type) |
|
|
98 |
assay.type <- match.arg(assay.type) |
|
|
99 |
xl.mode <- match.arg(xl.mode) |
|
|
100 |
set.seed(random.seed) |
|
|
101 |
if (is.null(nresamp.perm)) nresamp.perm <- nresamp |
|
|
102 |
nresamp.perm <- min(nresamp, nresamp.perm) |
|
|
103 |
if (xl.mode == "regular" | xl.mode == "firsttime") { |
|
|
104 |
x <- NULL |
|
|
105 |
xresamp <- NULL |
|
|
106 |
ttstar0 <- NULL |
|
|
107 |
evo <- NULL |
|
|
108 |
ystar <- NULL |
|
|
109 |
sdstar.keep <- NULL |
|
|
110 |
censoring.status <- NULL |
|
|
111 |
pi0 <- NULL |
|
|
112 |
stand.contrasts <- NULL |
|
|
113 |
stand.contrasts.star <- NULL |
|
|
114 |
stand.contrasts.95 <- NULL |
|
|
115 |
foldchange <- NULL |
|
|
116 |
foldchange.star <- NULL |
|
|
117 |
perms <- NULL |
|
|
118 |
permsy <- NULL |
|
|
119 |
eigengene <- NULL |
|
|
120 |
eigengene.number <- NULL |
|
|
121 |
testStatistic <- match.arg(testStatistic) |
|
|
122 |
time.summary.type <- match.arg(time.summary.type) |
|
|
123 |
regression.method <- match.arg(regression.method) |
|
|
124 |
x <- data$x |
|
|
125 |
y <- data$y |
|
|
126 |
argy <- y |
|
|
127 |
if (!is.null(data$eigengene.number)) { |
|
|
128 |
eigengene.number <- data$eigengene.number |
|
|
129 |
} |
|
|
130 |
if (sum(is.na(x)) > 0) { |
|
|
131 |
x <- impute.knn(x, k = knn.neighbors) |
|
|
132 |
if (!is.matrix(x)) { |
|
|
133 |
x <- x$data |
|
|
134 |
} |
|
|
135 |
} |
|
|
136 |
are.blocks.specified <- FALSE |
|
|
137 |
cond <- resp.type %in% c( |
|
|
138 |
"One class", "Two class unpaired timecourse", |
|
|
139 |
"One class unpaired timecourse", "Two class paired timecourse", |
|
|
140 |
"Pattern discovery" |
|
|
141 |
) |
|
|
142 |
if (assay.type == "seq" & cond) { |
|
|
143 |
stop(paste("Resp.type=", resp.type, " not allowed when assay.type='seq'")) |
|
|
144 |
} |
|
|
145 |
if (assay.type == "seq" & min(x) < 0) { |
|
|
146 |
stop(paste("Negative values not allowed when assay.type='seq'")) |
|
|
147 |
} |
|
|
148 |
if (assay.type == "seq" & (sum(x %% 1 != 0) != 0)) { |
|
|
149 |
stop("Non-integer values not alled when assay.type='seq'") |
|
|
150 |
} |
|
|
151 |
if (assay.type == "seq" & center.arrays) { |
|
|
152 |
stop(paste("Centering not allowed when assay.type='seq'")) |
|
|
153 |
} |
|
|
154 |
if (assay.type == "seq" & regression.method == "ranks") { |
|
|
155 |
stop(paste("regression.method==ranks not allowed when assay.type='seq'")) |
|
|
156 |
} |
|
|
157 |
if (center.arrays) { |
|
|
158 |
x <- scale(x, center = apply(x, 2, median), scale = FALSE) |
|
|
159 |
} |
|
|
160 |
depth <- rep(NA, ncol(x)) |
|
|
161 |
if (assay.type == "seq") { |
|
|
162 |
message("Estimating sequencing depths...") |
|
|
163 |
depth <- samr.estimate.depth(x) |
|
|
164 |
message("Resampling to get new data matrices...") |
|
|
165 |
xresamp <- resa(x, depth, nresamp = nresamp) |
|
|
166 |
} |
|
|
167 |
if (resp.type == samr.const.twoclass.unpaired.response) { |
|
|
168 |
if (substring(y[1], 2, 6) == "Block" | substring( |
|
|
169 |
y[1], |
|
|
170 |
2, 6 |
|
|
171 |
) == "block") { |
|
|
172 |
junk <- parse.block.labels.for.2classes(y) |
|
|
173 |
y <- junk$y |
|
|
174 |
blocky <- junk$blocky |
|
|
175 |
are.blocks.specified <- TRUE |
|
|
176 |
} |
|
|
177 |
} |
|
|
178 |
if (resp.type == samr.const.twoclass.unpaired.response | |
|
|
179 |
resp.type == samr.const.twoclass.paired.response | |
|
|
180 |
resp.type == samr.const.oneclass.response | |
|
|
181 |
resp.type == samr.const.quantitative.response | |
|
|
182 |
resp.type == samr.const.multiclass.response |
|
|
183 |
) { |
|
|
184 |
y <- as.numeric(y) |
|
|
185 |
} |
|
|
186 |
sd.internal <- NULL |
|
|
187 |
if (resp.type == samr.const.twoclass.unpaired.timecourse.response | |
|
|
188 |
resp.type == samr.const.twoclass.paired.timecourse.response | |
|
|
189 |
resp.type == samr.const.oneclass.timecourse.response) { |
|
|
190 |
junk <- parse.time.labels.and.summarize.data( |
|
|
191 |
x, y, |
|
|
192 |
resp.type, time.summary.type |
|
|
193 |
) |
|
|
194 |
y <- junk$y |
|
|
195 |
x <- junk$x |
|
|
196 |
sd.internal <- sqrt(rowMeans(junk$sd^2)) |
|
|
197 |
if (min(table(y)) == 1) { |
|
|
198 |
warning( |
|
|
199 |
"Only one timecourse in one or more classes;\n", |
|
|
200 |
"SAM plot and FDRs will be unreliable;", |
|
|
201 |
"only gene scores are informative" |
|
|
202 |
) |
|
|
203 |
} |
|
|
204 |
} |
|
|
205 |
if (resp.type == samr.const.twoclass.unpaired.timecourse.response) { |
|
|
206 |
resp.type <- samr.const.twoclass.unpaired.response |
|
|
207 |
} |
|
|
208 |
if (resp.type == samr.const.twoclass.paired.timecourse.response) { |
|
|
209 |
resp.type <- samr.const.twoclass.paired.response |
|
|
210 |
} |
|
|
211 |
if (resp.type == samr.const.oneclass.timecourse.response) { |
|
|
212 |
resp.type <- samr.const.oneclass.response |
|
|
213 |
} |
|
|
214 |
stand.contrasts <- NULL |
|
|
215 |
stand.contrasts.95 <- NULL |
|
|
216 |
if (resp.type == samr.const.survival.response) { |
|
|
217 |
censoring.status <- data$censoring.status |
|
|
218 |
} |
|
|
219 |
check.format( |
|
|
220 |
y, |
|
|
221 |
resp.type = resp.type, censoring.status = censoring.status |
|
|
222 |
) |
|
|
223 |
if (resp.type == samr.const.quantitative.response & regression.method == |
|
|
224 |
"ranks") { |
|
|
225 |
y <- rank(y) |
|
|
226 |
x <- t(apply(x, 1, rank)) |
|
|
227 |
} |
|
|
228 |
n <- nrow(x) |
|
|
229 |
sd <- NULL |
|
|
230 |
numer <- NULL |
|
|
231 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
232 |
testStatistic == "standard" & assay.type == "array") { |
|
|
233 |
init.fit <- ttest.func(x, y, sd = sd.internal) |
|
|
234 |
numer <- init.fit$numer |
|
|
235 |
sd <- init.fit$sd |
|
|
236 |
} |
|
|
237 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
238 |
testStatistic == "wilcoxon" & assay.type == "array") { |
|
|
239 |
init.fit <- wilcoxon.func(x, y) |
|
|
240 |
numer <- init.fit$numer |
|
|
241 |
sd <- init.fit$sd |
|
|
242 |
} |
|
|
243 |
if (resp.type == samr.const.oneclass.response & assay.type == |
|
|
244 |
"array") { |
|
|
245 |
init.fit <- onesample.ttest.func(x, y, sd = sd.internal) |
|
|
246 |
numer <- init.fit$numer |
|
|
247 |
sd <- init.fit$sd |
|
|
248 |
} |
|
|
249 |
if (resp.type == samr.const.twoclass.paired.response & |
|
|
250 |
assay.type == "array") { |
|
|
251 |
init.fit <- paired.ttest.func(x, y, sd = sd.internal) |
|
|
252 |
numer <- init.fit$numer |
|
|
253 |
sd <- init.fit$sd |
|
|
254 |
} |
|
|
255 |
if (resp.type == samr.const.survival.response & assay.type == |
|
|
256 |
"array") { |
|
|
257 |
init.fit <- cox.func(x, y, censoring.status) |
|
|
258 |
numer <- init.fit$numer |
|
|
259 |
sd <- init.fit$sd |
|
|
260 |
} |
|
|
261 |
if (resp.type == samr.const.multiclass.response & assay.type == |
|
|
262 |
"array") { |
|
|
263 |
init.fit <- multiclass.func(x, y) |
|
|
264 |
numer <- init.fit$numer |
|
|
265 |
sd <- init.fit$sd |
|
|
266 |
} |
|
|
267 |
if (resp.type == samr.const.quantitative.response & assay.type == |
|
|
268 |
"array") { |
|
|
269 |
init.fit <- quantitative.func(x, y) |
|
|
270 |
numer <- init.fit$numer |
|
|
271 |
sd <- init.fit$sd |
|
|
272 |
} |
|
|
273 |
if (resp.type == samr.const.patterndiscovery.response & |
|
|
274 |
assay.type == "array") { |
|
|
275 |
init.fit <- patterndiscovery.func(x) |
|
|
276 |
numer <- init.fit$numer |
|
|
277 |
sd <- init.fit$sd |
|
|
278 |
} |
|
|
279 |
if ((resp.type == samr.const.quantitative.response & |
|
|
280 |
(testStatistic == "wilcoxon" | regression.method == |
|
|
281 |
"ranks" & assay.type == "array") | resp.type == |
|
|
282 |
samr.const.patterndiscovery.response) | resp.type == |
|
|
283 |
samr.const.twoclass.unpaired.response & assay.type == |
|
|
284 |
"array" & testStatistic == "wilcoxon" | (nrow(x) < |
|
|
285 |
500) & is.null(s0) & is.null(s0.perc)) { |
|
|
286 |
s0 <- quantile(sd, 0.05) |
|
|
287 |
s0.perc <- 0.05 |
|
|
288 |
} |
|
|
289 |
if (is.null(s0) & assay.type == "array") { |
|
|
290 |
if (!is.null(s0.perc)) { |
|
|
291 |
if ((s0.perc != -1 & s0.perc < 0) | s0.perc > |
|
|
292 |
100) { |
|
|
293 |
stop( |
|
|
294 |
"Illegal value for s0.perc: must be between 0 and 100, ", |
|
|
295 |
"or equal\nto (-1) (meaning that s0 should be set to zero)" |
|
|
296 |
) |
|
|
297 |
} |
|
|
298 |
if (s0.perc == -1) { |
|
|
299 |
s0 <- 0 |
|
|
300 |
} |
|
|
301 |
if (s0.perc >= 0) { |
|
|
302 |
s0 <- quantile(init.fit$sd, s0.perc / 100) |
|
|
303 |
} |
|
|
304 |
} |
|
|
305 |
if (is.null(s0.perc)) { |
|
|
306 |
s0 <- est.s0(init.fit$tt, init.fit$sd)$s0.hat |
|
|
307 |
s0.perc <- 100 * sum(init.fit$sd < s0) / length(init.fit$sd) |
|
|
308 |
} |
|
|
309 |
} |
|
|
310 |
if (assay.type == "seq") { |
|
|
311 |
s0 <- 0 |
|
|
312 |
s0.perc <- 0 |
|
|
313 |
} |
|
|
314 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
315 |
testStatistic == "standard" & assay.type == "array") { |
|
|
316 |
tt <- ttest.func(x, y, s0 = s0, sd = sd.internal)$tt |
|
|
317 |
} |
|
|
318 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
319 |
testStatistic == "wilcoxon" & assay.type == "array") { |
|
|
320 |
tt <- wilcoxon.func(x, y, s0 = s0)$tt |
|
|
321 |
} |
|
|
322 |
if (resp.type == samr.const.oneclass.response & assay.type == |
|
|
323 |
"array") { |
|
|
324 |
tt <- onesample.ttest.func(x, y, s0 = s0, sd = sd.internal)$tt |
|
|
325 |
} |
|
|
326 |
if (resp.type == samr.const.twoclass.paired.response & |
|
|
327 |
assay.type == "array") { |
|
|
328 |
tt <- paired.ttest.func(x, y, s0 = s0, sd = sd.internal)$tt |
|
|
329 |
} |
|
|
330 |
if (resp.type == samr.const.survival.response & assay.type == |
|
|
331 |
"array") { |
|
|
332 |
tt <- cox.func(x, y, censoring.status, s0 = s0)$tt |
|
|
333 |
} |
|
|
334 |
if (resp.type == samr.const.multiclass.response & assay.type == |
|
|
335 |
"array") { |
|
|
336 |
junk2 <- multiclass.func(x, y, s0 = s0) |
|
|
337 |
tt <- junk2$tt |
|
|
338 |
stand.contrasts <- junk2$stand.contrasts |
|
|
339 |
} |
|
|
340 |
if (resp.type == samr.const.quantitative.response & assay.type == |
|
|
341 |
"array") { |
|
|
342 |
tt <- quantitative.func(x, y, s0 = s0)$tt |
|
|
343 |
} |
|
|
344 |
if (resp.type == samr.const.patterndiscovery.response & |
|
|
345 |
assay.type == "array") { |
|
|
346 |
junk <- patterndiscovery.func( |
|
|
347 |
x, s0 = s0, eigengene.number = eigengene.number |
|
|
348 |
) |
|
|
349 |
tt <- junk$tt |
|
|
350 |
eigengene <- junk$eigengene |
|
|
351 |
} |
|
|
352 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
353 |
assay.type == "seq") { |
|
|
354 |
junk <- wilcoxon.unpaired.seq.func(xresamp, y) |
|
|
355 |
tt <- junk$tt |
|
|
356 |
numer <- junk$numer |
|
|
357 |
sd <- junk$sd |
|
|
358 |
} |
|
|
359 |
if (resp.type == samr.const.twoclass.paired.response & |
|
|
360 |
assay.type == "seq") { |
|
|
361 |
junk <- wilcoxon.paired.seq.func(xresamp, y) |
|
|
362 |
tt <- junk$tt |
|
|
363 |
numer <- junk$numer |
|
|
364 |
sd <- junk$sd |
|
|
365 |
} |
|
|
366 |
if (resp.type == samr.const.quantitative.response & assay.type == |
|
|
367 |
"seq") { |
|
|
368 |
junk <- quantitative.seq.func(xresamp, y) |
|
|
369 |
tt <- junk$tt |
|
|
370 |
numer <- junk$numer |
|
|
371 |
sd <- junk$sd |
|
|
372 |
} |
|
|
373 |
if (resp.type == samr.const.survival.response & assay.type == |
|
|
374 |
"seq") { |
|
|
375 |
junk <- cox.seq.func(xresamp, y, censoring.status) |
|
|
376 |
tt <- junk$tt |
|
|
377 |
numer <- junk$numer |
|
|
378 |
sd <- junk$sd |
|
|
379 |
} |
|
|
380 |
if (resp.type == samr.const.multiclass.response & assay.type == |
|
|
381 |
"seq") { |
|
|
382 |
junk2 <- multiclass.seq.func(xresamp, y) |
|
|
383 |
tt <- junk2$tt |
|
|
384 |
numer <- junk2$numer |
|
|
385 |
sd <- junk2$sd |
|
|
386 |
stand.contrasts <- junk2$stand.contrasts |
|
|
387 |
} |
|
|
388 |
if ( |
|
|
389 |
resp.type == samr.const.quantitative.response | |
|
|
390 |
resp.type == samr.const.multiclass.response | |
|
|
391 |
resp.type == samr.const.survival.response |
|
|
392 |
) { |
|
|
393 |
junk <- getperms(y, nperms) |
|
|
394 |
perms <- junk$perms |
|
|
395 |
all.perms.flag <- junk$all.perms.flag |
|
|
396 |
nperms.act <- junk$nperms.act |
|
|
397 |
} |
|
|
398 |
if (resp.type == samr.const.twoclass.unpaired.response) { |
|
|
399 |
if (are.blocks.specified) { |
|
|
400 |
junk <- compute.block.perms(y, blocky, nperms) |
|
|
401 |
permsy <- matrix(junk$permsy, ncol = length(y)) |
|
|
402 |
all.perms.flag <- junk$all.perms.flag |
|
|
403 |
nperms.act <- junk$nperms.act |
|
|
404 |
} else { |
|
|
405 |
junk <- getperms(y, nperms) |
|
|
406 |
permsy <- matrix(y[junk$perms], ncol = length(y)) |
|
|
407 |
all.perms.flag <- junk$all.perms.flag |
|
|
408 |
nperms.act <- junk$nperms.act |
|
|
409 |
} |
|
|
410 |
} |
|
|
411 |
if (resp.type == samr.const.oneclass.response) { |
|
|
412 |
if ((length(y) * log(2)) < log(nperms)) { |
|
|
413 |
allii <- 0:((2^length(y)) - 1) |
|
|
414 |
nperms.act <- 2^length(y) |
|
|
415 |
all.perms.flag <- 1 |
|
|
416 |
} else { |
|
|
417 |
nperms.act <- nperms |
|
|
418 |
all.perms.flag <- 0 |
|
|
419 |
} |
|
|
420 |
permsy <- matrix(NA, nrow = nperms.act, ncol = length(y)) |
|
|
421 |
if (all.perms.flag == 1) { |
|
|
422 |
k <- 0 |
|
|
423 |
for (i in allii) { |
|
|
424 |
junk <- integer.base.b(i, b = 2) |
|
|
425 |
if (length(junk) < length(y)) { |
|
|
426 |
junk <- c( |
|
|
427 |
rep(0, length(y) - length(junk)), |
|
|
428 |
junk |
|
|
429 |
) |
|
|
430 |
} |
|
|
431 |
k <- k + 1 |
|
|
432 |
permsy[k, ] <- y * (2 * junk - 1) |
|
|
433 |
} |
|
|
434 |
} else { |
|
|
435 |
for (i in 1:nperms.act) { |
|
|
436 |
permsy[i, ] <- sample(c(-1, 1), |
|
|
437 |
size = length(y), |
|
|
438 |
replace = TRUE |
|
|
439 |
) |
|
|
440 |
} |
|
|
441 |
} |
|
|
442 |
} |
|
|
443 |
if (resp.type == samr.const.twoclass.paired.response) { |
|
|
444 |
junk <- compute.block.perms(y, abs(y), nperms) |
|
|
445 |
permsy <- junk$permsy |
|
|
446 |
all.perms.flag <- junk$all.perms.flag |
|
|
447 |
nperms.act <- junk$nperms.act |
|
|
448 |
} |
|
|
449 |
if (resp.type == samr.const.patterndiscovery.response) { |
|
|
450 |
nperms.act <- nperms |
|
|
451 |
perms <- NULL |
|
|
452 |
permsy <- NULL |
|
|
453 |
all.perms.flag <- FALSE |
|
|
454 |
} |
|
|
455 |
sdstar.keep <- NULL |
|
|
456 |
if (assay.type != "seq") { |
|
|
457 |
sdstar.keep <- matrix(0, ncol = nperms.act, nrow = nrow(x)) |
|
|
458 |
} |
|
|
459 |
ttstar <- matrix(0, nrow = nrow(x), ncol = nperms.act) |
|
|
460 |
foldchange.star <- NULL |
|
|
461 |
if (resp.type == samr.const.twoclass.unpaired.response | |
|
|
462 |
resp.type == samr.const.twoclass.paired.response) { |
|
|
463 |
foldchange.star <- matrix(0, nrow = nrow(x), ncol = nperms.act) |
|
|
464 |
} |
|
|
465 |
if (resp.type == samr.const.multiclass.response) { |
|
|
466 |
stand.contrasts.star <- array(NA, c( |
|
|
467 |
nrow(x), length(table(y)), |
|
|
468 |
nperms.act |
|
|
469 |
)) |
|
|
470 |
} |
|
|
471 |
} |
|
|
472 |
if (xl.mode == "next20" | xl.mode == "lasttime") { |
|
|
473 |
evo <- xl.prevfit$evo |
|
|
474 |
tt <- xl.prevfit$tt |
|
|
475 |
numer <- xl.prevfit$numer |
|
|
476 |
eigengene <- xl.prevfit$eigengene |
|
|
477 |
eigengene.number <- xl.prevfit$eigengene.number |
|
|
478 |
sd <- xl.prevfit$sd - xl.prevfit$s0 |
|
|
479 |
sd.internal <- xl.prevfit$sd.internal |
|
|
480 |
ttstar <- xl.prevfit$ttstar |
|
|
481 |
ttstar0 <- xl.prevfit$ttstar0 |
|
|
482 |
n <- xl.prevfit$n |
|
|
483 |
pi0 <- xl.prevfit$pi0 |
|
|
484 |
foldchange <- xl.prevfit$foldchange |
|
|
485 |
y <- xl.prevfit$y |
|
|
486 |
x <- xl.prevfit$x |
|
|
487 |
xresamp <- xl.prevfit$xresamp |
|
|
488 |
censoring.status <- xl.prevfit$censoring.status |
|
|
489 |
argy <- xl.prevfit$argy |
|
|
490 |
testStatistic <- xl.prevfit$testStatistic |
|
|
491 |
foldchange.star <- xl.prevfit$foldchange.star |
|
|
492 |
s0 <- xl.prevfit$s0 |
|
|
493 |
s0.perc <- xl.prevfit$s0.perc |
|
|
494 |
resp.type <- xl.prevfit$resp.type |
|
|
495 |
resp.type.arg <- xl.prevfit$resp.type.arg |
|
|
496 |
assay.type <- xl.prevfit$assay.type |
|
|
497 |
sdstar.keep <- xl.prevfit$sdstar.keep |
|
|
498 |
resp.type <- xl.prevfit$resp.type |
|
|
499 |
stand.contrasts <- xl.prevfit$stand.contrasts |
|
|
500 |
stand.contrasts.star <- xl.prevfit$stand.contrasts.star |
|
|
501 |
stand.contrasts.95 <- xl.prevfit$stand.contrasts.95 |
|
|
502 |
perms <- xl.prevfit$perms |
|
|
503 |
permsy <- xl.prevfit$permsy |
|
|
504 |
nperms <- xl.prevfit$nperms |
|
|
505 |
nperms.act <- xl.prevfit$nperms.act |
|
|
506 |
all.perms.flag <- xl.prevfit$all.perms.flag |
|
|
507 |
depth <- xl.prevfit$depth |
|
|
508 |
nresamp <- xl.prevfit$nresamp |
|
|
509 |
nresamp.perm <- xl.prevfit$nresamp.perm |
|
|
510 |
} |
|
|
511 |
if (xl.mode == "regular") { |
|
|
512 |
first <- 1 |
|
|
513 |
last <- nperms.act |
|
|
514 |
} |
|
|
515 |
if (xl.mode == "firsttime") { |
|
|
516 |
first <- 1 |
|
|
517 |
last <- 1 |
|
|
518 |
} |
|
|
519 |
if (xl.mode == "next20") { |
|
|
520 |
first <- xl.time |
|
|
521 |
last <- min(xl.time + 19, nperms.act - 1) |
|
|
522 |
} |
|
|
523 |
if (xl.mode == "lasttime") { |
|
|
524 |
first <- nperms.act |
|
|
525 |
last <- nperms.act |
|
|
526 |
} |
|
|
527 |
for (b in first:last) { |
|
|
528 |
message(c("perm = ", b)) |
|
|
529 |
if (assay.type == "array") { |
|
|
530 |
xstar <- x |
|
|
531 |
} |
|
|
532 |
if (assay.type == "seq") { |
|
|
533 |
xstar <- xresamp[, , 1:nresamp.perm] |
|
|
534 |
} |
|
|
535 |
if (resp.type == samr.const.oneclass.response) { |
|
|
536 |
ystar <- permsy[b, ] |
|
|
537 |
if (testStatistic == "standard") { |
|
|
538 |
ttstar[, b] <- onesample.ttest.func(xstar, ystar, |
|
|
539 |
s0 = s0, sd = sd.internal |
|
|
540 |
)$tt |
|
|
541 |
} |
|
|
542 |
} |
|
|
543 |
if (resp.type == samr.const.twoclass.paired.response) { |
|
|
544 |
ystar <- permsy[b, ] |
|
|
545 |
if (assay.type == "array") { |
|
|
546 |
ttstar[, b] <- paired.ttest.func(xstar, ystar, |
|
|
547 |
s0 = s0, sd = sd.internal |
|
|
548 |
)$tt |
|
|
549 |
foldchange.star[, b] <- foldchange.paired( |
|
|
550 |
xstar, |
|
|
551 |
ystar, data$logged2 |
|
|
552 |
) |
|
|
553 |
} |
|
|
554 |
if (assay.type == "seq") { |
|
|
555 |
ttstar[, b] <- wilcoxon.paired.seq.func( |
|
|
556 |
xstar, |
|
|
557 |
ystar |
|
|
558 |
)$tt |
|
|
559 |
foldchange.star[, b] <- foldchange.seq.twoclass.paired( |
|
|
560 |
x, |
|
|
561 |
ystar, depth |
|
|
562 |
) |
|
|
563 |
} |
|
|
564 |
} |
|
|
565 |
if (resp.type == samr.const.twoclass.unpaired.response) { |
|
|
566 |
ystar <- permsy[b, ] |
|
|
567 |
if (assay.type == "array") { |
|
|
568 |
if (testStatistic == "standard") { |
|
|
569 |
junk <- ttest.func(xstar, ystar, s0 = s0, sd = sd.internal) |
|
|
570 |
} |
|
|
571 |
if (testStatistic == "wilcoxon") { |
|
|
572 |
junk <- wilcoxon.func(xstar, ystar, s0 = s0) |
|
|
573 |
} |
|
|
574 |
ttstar[, b] <- junk$tt |
|
|
575 |
sdstar.keep[, b] <- junk$sd |
|
|
576 |
foldchange.star[, b] <- foldchange.twoclass( |
|
|
577 |
xstar, |
|
|
578 |
ystar, data$logged2 |
|
|
579 |
) |
|
|
580 |
} |
|
|
581 |
if (assay.type == "seq") { |
|
|
582 |
ttstar[, b] <- wilcoxon.unpaired.seq.func( |
|
|
583 |
xstar, |
|
|
584 |
ystar |
|
|
585 |
)$tt |
|
|
586 |
foldchange.star[, b] <- foldchange.seq.twoclass.unpaired( |
|
|
587 |
x, |
|
|
588 |
ystar, depth |
|
|
589 |
) |
|
|
590 |
} |
|
|
591 |
} |
|
|
592 |
if (resp.type == samr.const.survival.response) { |
|
|
593 |
o <- perms[b, ] |
|
|
594 |
if (assay.type == "array") { |
|
|
595 |
ttstar[, b] <- cox.func( |
|
|
596 |
xstar, y[o], |
|
|
597 |
censoring.status = censoring.status[o], s0 = s0 |
|
|
598 |
)$tt |
|
|
599 |
} |
|
|
600 |
if (assay.type == "seq") { |
|
|
601 |
ttstar[, b] <- cox.seq.func( |
|
|
602 |
xstar, y[o], |
|
|
603 |
censoring.status = censoring.status[o] |
|
|
604 |
)$tt |
|
|
605 |
} |
|
|
606 |
} |
|
|
607 |
if (resp.type == samr.const.multiclass.response) { |
|
|
608 |
ystar <- y[perms[b, ]] |
|
|
609 |
if (assay.type == "array") { |
|
|
610 |
junk <- multiclass.func(xstar, ystar, s0 = s0) |
|
|
611 |
ttstar[, b] <- junk$tt |
|
|
612 |
sdstar.keep[, b] <- junk$sd |
|
|
613 |
stand.contrasts.star[, , b] <- junk$stand.contrasts |
|
|
614 |
} |
|
|
615 |
if (assay.type == "seq") { |
|
|
616 |
junk <- multiclass.seq.func(xstar, ystar) |
|
|
617 |
ttstar[, b] <- junk$tt |
|
|
618 |
stand.contrasts.star[, , b] <- junk$stand.contrasts |
|
|
619 |
} |
|
|
620 |
} |
|
|
621 |
if (resp.type == samr.const.quantitative.response) { |
|
|
622 |
ystar <- y[perms[b, ]] |
|
|
623 |
if (assay.type == "array") { |
|
|
624 |
junk <- quantitative.func(xstar, ystar, s0 = s0) |
|
|
625 |
ttstar[, b] <- junk$tt |
|
|
626 |
sdstar.keep[, b] <- junk$sd |
|
|
627 |
} |
|
|
628 |
if (assay.type == "seq") { |
|
|
629 |
junk <- quantitative.seq.func(xstar, ystar) |
|
|
630 |
ttstar[, b] <- junk$tt |
|
|
631 |
} |
|
|
632 |
} |
|
|
633 |
if (resp.type == samr.const.patterndiscovery.response) { |
|
|
634 |
xstar <- permute.rows(x) |
|
|
635 |
junk <- patterndiscovery.func( |
|
|
636 |
xstar, |
|
|
637 |
s0 = s0, eigengene.number = eigengene.number |
|
|
638 |
) |
|
|
639 |
ttstar[, b] <- junk$tt |
|
|
640 |
sdstar.keep[, b] <- junk$sd |
|
|
641 |
} |
|
|
642 |
} |
|
|
643 |
if (xl.mode == "regular" | xl.mode == "lasttime") { |
|
|
644 |
ttstar0 <- ttstar |
|
|
645 |
for (j in seq_len(ncol(ttstar))) { |
|
|
646 |
ttstar[, j] <- -1 * sort(-1 * ttstar[, j]) |
|
|
647 |
} |
|
|
648 |
for (i in seq_len(nrow(ttstar))) { |
|
|
649 |
ttstar[i, ] <- sort(ttstar[i, ]) |
|
|
650 |
} |
|
|
651 |
evo <- apply(ttstar, 1, mean) |
|
|
652 |
evo <- evo[seq(length(evo), 1)] |
|
|
653 |
pi0 <- 1 |
|
|
654 |
if (resp.type != samr.const.multiclass.response) { |
|
|
655 |
qq <- quantile(ttstar, c(0.25, 0.75)) |
|
|
656 |
} |
|
|
657 |
if (resp.type == samr.const.multiclass.response) { |
|
|
658 |
qq <- quantile(ttstar, c(0, 0.5)) |
|
|
659 |
} |
|
|
660 |
pi0 <- sum(tt > qq[1] & tt < qq[2]) / (0.5 * length(tt)) |
|
|
661 |
foldchange <- NULL |
|
|
662 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
663 |
assay.type == "array") { |
|
|
664 |
foldchange <- foldchange.twoclass(x, y, data$logged2) |
|
|
665 |
} |
|
|
666 |
if (resp.type == samr.const.twoclass.paired.response & |
|
|
667 |
assay.type == "array") { |
|
|
668 |
foldchange <- foldchange.paired(x, y, data$logged2) |
|
|
669 |
} |
|
|
670 |
if (resp.type == samr.const.oneclass.response & assay.type == |
|
|
671 |
"array") { |
|
|
672 |
} |
|
|
673 |
stand.contrasts.95 <- NULL |
|
|
674 |
if (resp.type == samr.const.multiclass.response) { |
|
|
675 |
stand.contrasts.95 <- quantile( |
|
|
676 |
stand.contrasts.star, |
|
|
677 |
c(0.025, 0.975) |
|
|
678 |
) |
|
|
679 |
} |
|
|
680 |
if (resp.type == samr.const.twoclass.unpaired.response & |
|
|
681 |
assay.type == "seq") { |
|
|
682 |
foldchange <- foldchange.seq.twoclass.unpaired( |
|
|
683 |
x, |
|
|
684 |
y, depth |
|
|
685 |
) |
|
|
686 |
} |
|
|
687 |
if (resp.type == samr.const.twoclass.paired.response & |
|
|
688 |
assay.type == "seq") { |
|
|
689 |
foldchange <- foldchange.seq.twoclass.paired( |
|
|
690 |
x, y, |
|
|
691 |
depth |
|
|
692 |
) |
|
|
693 |
} |
|
|
694 |
if (return.x == FALSE) { |
|
|
695 |
x <- NULL |
|
|
696 |
} |
|
|
697 |
} |
|
|
698 |
return( |
|
|
699 |
list( |
|
|
700 |
n = n, x = x, xresamp = xresamp, y = y, argy = argy, |
|
|
701 |
censoring.status = censoring.status, testStatistic = testStatistic, |
|
|
702 |
nperms = nperms, nperms.act = nperms.act, tt = tt, numer = numer, |
|
|
703 |
sd = sd + s0, sd.internal = sd.internal, s0 = s0, s0.perc = s0.perc, |
|
|
704 |
evo = evo, perms = perms, permsy = permsy, nresamp = nresamp, |
|
|
705 |
nresamp.perm = nresamp.perm, all.perms.flag = all.perms.flag, |
|
|
706 |
ttstar = ttstar, ttstar0 = ttstar0, eigengene = eigengene, |
|
|
707 |
eigengene.number = eigengene.number, pi0 = pi0, foldchange = foldchange, |
|
|
708 |
foldchange.star = foldchange.star, sdstar.keep = sdstar.keep, |
|
|
709 |
resp.type = resp.type, resp.type.arg = resp.type.arg, |
|
|
710 |
assay.type = assay.type, stand.contrasts = stand.contrasts, |
|
|
711 |
stand.contrasts.star = stand.contrasts.star, |
|
|
712 |
stand.contrasts.95 = stand.contrasts.95, |
|
|
713 |
depth = depth, call = this.call |
|
|
714 |
) |
|
|
715 |
) |
|
|
716 |
} |
|
|
717 |
|
|
|
718 |
#' @title Estimate sequencing depths |
|
|
719 |
#' @param x data matrix. nrow=#gene, ncol=#sample |
|
|
720 |
#' @return depth: estimated sequencing depth. a vector with len sample. |
|
|
721 |
samr.estimate.depth <- function(x) { |
|
|
722 |
iter <- 5 |
|
|
723 |
cmeans <- colSums(x) / sum(x) |
|
|
724 |
for (i in 1:iter) { |
|
|
725 |
n0 <- rowSums(x) %*% t(cmeans) |
|
|
726 |
prop <- rowSums((x - n0)^2 / (n0 + 1e-08)) |
|
|
727 |
qs <- quantile(prop, c(0.25, 0.75)) |
|
|
728 |
keep <- (prop >= qs[1]) & (prop <= qs[2]) |
|
|
729 |
cmeans <- colMeans(x[keep, ]) |
|
|
730 |
cmeans <- cmeans / sum(cmeans) |
|
|
731 |
} |
|
|
732 |
depth <- cmeans / mean(cmeans) |
|
|
733 |
return(depth) |
|
|
734 |
} |
|
|
735 |
|
|
|
736 |
#' @title Resampling |
|
|
737 |
#' @param x data matrix. nrow=#gene, ncol=#sample |
|
|
738 |
#' @param d estimated sequencing depth |
|
|
739 |
#' @param nresamp number of resamplings |
|
|
740 |
#' @return xresamp: an rank array with dim #gene*#sample*nresamp |
|
|
741 |
#' @description Corresponds to `samr::resample` |
|
|
742 |
#' @importFrom stats rpois runif |
|
|
743 |
resa <- function(x, d, nresamp = 20) { |
|
|
744 |
ng <- nrow(x) |
|
|
745 |
ns <- ncol(x) |
|
|
746 |
dbar <- exp(mean(log(d))) |
|
|
747 |
xresamp <- array(0, dim = c(ng, ns, nresamp)) |
|
|
748 |
for (k in 1:nresamp) { |
|
|
749 |
for (j in 1:ns) { |
|
|
750 |
xresamp[, j, k] <- rpois(n = ng, lambda = (dbar / d[j]) * |
|
|
751 |
x[, j]) + runif(ng) * 0.1 |
|
|
752 |
} |
|
|
753 |
} |
|
|
754 |
for (k in 1:nresamp) { |
|
|
755 |
xresamp[, , k] <- t(rankcols(t(xresamp[, , k]))) |
|
|
756 |
} |
|
|
757 |
return(xresamp) |
|
|
758 |
} |
|
|
759 |
|
|
|
760 |
#' @title Rank columns |
|
|
761 |
#' @description Ranks the elements within each col of the matrix x and returns |
|
|
762 |
#' these ranks in a matrix |
|
|
763 |
#' @note this function is equivalent to `samr::rankcol`, but uses `apply` to |
|
|
764 |
#' rank the colums instead of a compiled Fortran function which was causing our |
|
|
765 |
#' DEGanalysis functions to freeze in large datasets. |
|
|
766 |
#' @param x x |
|
|
767 |
rankcols <- function(x) { |
|
|
768 |
# ranks the elements within each col of the matrix x |
|
|
769 |
# and returns these ranks in a matrix |
|
|
770 |
n <- nrow(x) |
|
|
771 |
p <- ncol(x) |
|
|
772 |
mode(n) <- "integer" |
|
|
773 |
mode(p) <- "integer" |
|
|
774 |
mode(x) <- "double" |
|
|
775 |
xr <- apply(x, 2, rank) |
|
|
776 |
return(xr) |
|
|
777 |
} |
|
|
778 |
|
|
|
779 |
#' @title Check format |
|
|
780 |
#' @param y y |
|
|
781 |
#' @param resp.type resp type |
|
|
782 |
#' @param censoring.status censoring status |
|
|
783 |
check.format <- function(y, resp.type, censoring.status = NULL) { |
|
|
784 |
# here i do some format checks for the input data$y |
|
|
785 |
# note that checks for time course data are done in the |
|
|
786 |
# parse function for time course; |
|
|
787 |
# we then check the output from the parser in this function |
|
|
788 |
if (resp.type == samr.const.twoclass.unpaired.response | |
|
|
789 |
resp.type == samr.const.twoclass.unpaired.timecourse.response) { |
|
|
790 |
if (sum(y == 1) + sum(y == 2) != length(y)) { |
|
|
791 |
stop(paste( |
|
|
792 |
"Error in input response data: response type ", |
|
|
793 |
resp.type, " specified; values must be 1 or 2" |
|
|
794 |
)) |
|
|
795 |
} |
|
|
796 |
} |
|
|
797 |
if (resp.type == samr.const.twoclass.paired.response | resp.type == |
|
|
798 |
samr.const.twoclass.paired.timecourse.response) { |
|
|
799 |
if (sum(y) != 0) { |
|
|
800 |
stop(paste( |
|
|
801 |
"Error in input response data: response type ", |
|
|
802 |
resp.type, " specified; values must be -1, 1, -2, 2, etc" |
|
|
803 |
)) |
|
|
804 |
} |
|
|
805 |
if (sum(table(y[y > 0]) != abs(table(y[y < 0])))) { |
|
|
806 |
stop(paste( |
|
|
807 |
"Error in input response data: response type ", |
|
|
808 |
resp.type, " specified; values must be -1, 1, -2, 2, etc" |
|
|
809 |
)) |
|
|
810 |
} |
|
|
811 |
} |
|
|
812 |
if (resp.type == samr.const.oneclass.response | resp.type == |
|
|
813 |
samr.const.oneclass.timecourse.response) { |
|
|
814 |
if (sum(y == 1) != length(y)) { |
|
|
815 |
stop(paste( |
|
|
816 |
"Error in input response data: response type ", |
|
|
817 |
resp.type, " specified; values must all be 1" |
|
|
818 |
)) |
|
|
819 |
} |
|
|
820 |
} |
|
|
821 |
if (resp.type == samr.const.multiclass.response) { |
|
|
822 |
tt <- table(y) |
|
|
823 |
nc <- length(tt) |
|
|
824 |
if (sum(y <= nc & y > 0) < length(y)) { |
|
|
825 |
stop( |
|
|
826 |
"Error in input response data: response type ", resp.type, |
|
|
827 |
" specified; values must be 1,2, ... number of classes" |
|
|
828 |
) |
|
|
829 |
} |
|
|
830 |
for (k in 1:nc) { |
|
|
831 |
if (sum(y == k) < 2) { |
|
|
832 |
stop(paste( |
|
|
833 |
"Error in input response data: response type ", |
|
|
834 |
resp.type, " specified; there must be >1 sample per class" |
|
|
835 |
)) |
|
|
836 |
} |
|
|
837 |
} |
|
|
838 |
} |
|
|
839 |
if (resp.type == samr.const.quantitative.response) { |
|
|
840 |
if (!is.numeric(y)) { |
|
|
841 |
stop(paste( |
|
|
842 |
"Error in input response data: response type", |
|
|
843 |
resp.type, " specified; values must be numeric" |
|
|
844 |
)) |
|
|
845 |
} |
|
|
846 |
} |
|
|
847 |
if (resp.type == samr.const.survival.response) { |
|
|
848 |
if (is.null(censoring.status)) { |
|
|
849 |
stop(paste( |
|
|
850 |
"Error in input response data: response type ", |
|
|
851 |
resp.type, " specified; error in censoring indicator" |
|
|
852 |
)) |
|
|
853 |
} |
|
|
854 |
if (!is.numeric(y) | sum(y < 0) > 0) { |
|
|
855 |
stop( |
|
|
856 |
"Error in input response data: response type ", resp.type, |
|
|
857 |
" specified; survival times must be numeric and nonnegative" |
|
|
858 |
) |
|
|
859 |
if (sum(censoring.status == 0) + sum(censoring.status == |
|
|
860 |
1) != length(censoring.status)) { |
|
|
861 |
stop( |
|
|
862 |
"Error in input response data: response type ", resp.type, |
|
|
863 |
" specified; censoring indicators must be ", |
|
|
864 |
"0 (censored) or 1 (failed)" |
|
|
865 |
) |
|
|
866 |
} |
|
|
867 |
} |
|
|
868 |
if (sum(censoring.status == 1) < 1) { |
|
|
869 |
stop(paste( |
|
|
870 |
"Error in input response data: response type ", |
|
|
871 |
resp.type, " specified; there are no uncensored observations" |
|
|
872 |
)) |
|
|
873 |
} |
|
|
874 |
} |
|
|
875 |
return() |
|
|
876 |
} |
|
|
877 |
|
|
|
878 |
#' @title Twoclass Wilcoxon statistics |
|
|
879 |
#' @param xresamp an rank array with dim #gene*#sample*nresamp |
|
|
880 |
#' @param y outcome vector of values 1 and 2 |
|
|
881 |
#' @return the statistic. |
|
|
882 |
wilcoxon.unpaired.seq.func <- function(xresamp, y) { |
|
|
883 |
tt <- rep(0, dim(xresamp)[1]) |
|
|
884 |
for (i in seq_len(dim(xresamp)[3])) { |
|
|
885 |
tt <- tt + rowSums(xresamp[, y == 2, i]) - sum(y == 2) * |
|
|
886 |
(length(y) + 1) / 2 |
|
|
887 |
} |
|
|
888 |
tt <- tt / dim(xresamp)[3] |
|
|
889 |
return(list(tt = tt, numer = tt, sd = rep(1, length(tt)))) |
|
|
890 |
} |
|
|
891 |
wilcoxon.paired.seq.func <- function(xresamp, y) { |
|
|
892 |
tt <- rep(0, dim(xresamp)[1]) |
|
|
893 |
for (i in seq_len(dim(xresamp)[3])) { |
|
|
894 |
tt <- tt + rowSums(xresamp[, y > 0, i]) - sum(y > 0) * |
|
|
895 |
(length(y) + 1) / 2 |
|
|
896 |
} |
|
|
897 |
tt <- tt / dim(xresamp)[3] |
|
|
898 |
return(list(tt = tt, numer = tt, sd = rep(1, length(tt)))) |
|
|
899 |
} |
|
|
900 |
getperms <- function(y, nperms) { |
|
|
901 |
total.perms <- factorial(length(y)) |
|
|
902 |
if (total.perms <= nperms) { |
|
|
903 |
perms <- permute(seq_len(length(y))) |
|
|
904 |
all.perms.flag <- 1 |
|
|
905 |
nperms.act <- total.perms |
|
|
906 |
} |
|
|
907 |
if (total.perms > nperms) { |
|
|
908 |
perms <- matrix(NA, nrow = nperms, ncol = length(y)) |
|
|
909 |
for (i in 1:nperms) { |
|
|
910 |
perms[i, ] <- sample(seq_len(length(y)), size = length(y)) |
|
|
911 |
} |
|
|
912 |
all.perms.flag <- 0 |
|
|
913 |
nperms.act <- nperms |
|
|
914 |
} |
|
|
915 |
return(list( |
|
|
916 |
perms = perms, all.perms.flag = all.perms.flag, |
|
|
917 |
nperms.act = nperms.act |
|
|
918 |
)) |
|
|
919 |
} |
|
|
920 |
foldchange.twoclass <- function(x, y, logged2) { |
|
|
921 |
m1 <- rowMeans(x[, y == 1, drop = FALSE]) |
|
|
922 |
m2 <- rowMeans(x[, y == 2, drop = FALSE]) |
|
|
923 |
if (!logged2) { |
|
|
924 |
fc <- m2 / m1 |
|
|
925 |
} |
|
|
926 |
if (logged2) { |
|
|
927 |
fc <- 2^{ |
|
|
928 |
m2 - m1 |
|
|
929 |
} |
|
|
930 |
} |
|
|
931 |
return(fc) |
|
|
932 |
} |
|
|
933 |
#' @title Foldchange of twoclass unpaired sequencing data |
|
|
934 |
#' @param x x |
|
|
935 |
#' @param y y |
|
|
936 |
#' @param depth depth |
|
|
937 |
foldchange.seq.twoclass.unpaired <- function(x, y, depth) { |
|
|
938 |
x.norm <- scale(x, center = FALSE, scale = depth) + 1e-08 |
|
|
939 |
fc <- apply(x.norm[, y == 2], 1, median) / |
|
|
940 |
apply(x.norm[, y == |
|
|
941 |
1], 1, median) |
|
|
942 |
return(fc) |
|
|
943 |
} |
|
|
944 |
foldchange.seq.twoclass.paired <- function(x, y, depth) { |
|
|
945 |
nc <- ncol(x) / 2 |
|
|
946 |
o1 <- o2 <- rep(0, nc) |
|
|
947 |
for (j in 1:nc) { |
|
|
948 |
o1[j] <- which(y == -j) |
|
|
949 |
o2[j] <- which(y == j) |
|
|
950 |
} |
|
|
951 |
x.norm <- scale(x, center = FALSE, scale = depth) + 1e-08 |
|
|
952 |
d <- x.norm[, o2, drop = FALSE] / x.norm[, o1, drop = FALSE] |
|
|
953 |
fc <- lapply(d, 1, function(x) median(x, na.rm = TRUE)) |
|
|
954 |
return(fc) |
|
|
955 |
} |
|
|
956 |
permute <- function(elem) { |
|
|
957 |
# generates all perms of the vector elem |
|
|
958 |
if (!missing(elem)) { |
|
|
959 |
if (length(elem) == 2) { |
|
|
960 |
return(matrix(c(elem, elem[2], elem[1]), nrow = 2)) |
|
|
961 |
} |
|
|
962 |
last.matrix <- permute(elem[-1]) |
|
|
963 |
dim.last <- dim(last.matrix) |
|
|
964 |
new.matrix <- matrix(0, nrow = dim.last[1] * (dim.last[2] + |
|
|
965 |
1), ncol = dim.last[2] + 1) |
|
|
966 |
for (row in 1:(dim.last[1])) { |
|
|
967 |
for (col in 1:(dim.last[2] + 1)) { |
|
|
968 |
new.matrix[row + (col - 1) * dim.last[1], ] <- |
|
|
969 |
insert.value(last.matrix[row, ], elem[1], col) |
|
|
970 |
} |
|
|
971 |
} |
|
|
972 |
return(new.matrix) |
|
|
973 |
} else { |
|
|
974 |
message("Usage: permute(elem)\n\twhere elem is a vector") |
|
|
975 |
} |
|
|
976 |
} |
|
|
977 |
insert.value <- function(vec, newval, pos) { |
|
|
978 |
if (pos == 1) { |
|
|
979 |
return(c(newval, vec)) |
|
|
980 |
} |
|
|
981 |
lvec <- length(vec) |
|
|
982 |
if (pos > lvec) { |
|
|
983 |
return(c(vec, newval)) |
|
|
984 |
} |
|
|
985 |
return(c(vec[1:pos - 1], newval, vec[pos:lvec])) |
|
|
986 |
} |
|
|
987 |
parse.block.labels.for.2classes <- function(y) { |
|
|
988 |
# this only works for 2 class case- having form jBlockn, |
|
|
989 |
# where j=1 or 2 |
|
|
990 |
n <- length(y) |
|
|
991 |
y.act <- rep(NA, n) |
|
|
992 |
blocky <- rep(NA, n) |
|
|
993 |
for (i in 1:n) { |
|
|
994 |
blocky[i] <- as.numeric(substring(y[i], 7, nchar(y[i]))) |
|
|
995 |
y.act[i] <- as.numeric(substring(y[i], 1, 1)) |
|
|
996 |
} |
|
|
997 |
return(list(y.act = y.act, blocky = blocky)) |
|
|
998 |
} |
|
|
999 |
parse.time.labels.and.summarize.data <- function( |
|
|
1000 |
x, |
|
|
1001 |
y, resp.type, time.summary.type) { |
|
|
1002 |
# parse time labels, and summarize time data for each |
|
|
1003 |
# person, via a slope or area |
|
|
1004 |
# does some error checking too |
|
|
1005 |
n <- length(y) |
|
|
1006 |
last5char <- rep(NA, n) |
|
|
1007 |
last3char <- rep(NA, n) |
|
|
1008 |
for (i in 1:n) { |
|
|
1009 |
last3char[i] <- substring(y[i], nchar(y[i]) - 2, nchar(y[i])) |
|
|
1010 |
last5char[i] <- substring(y[i], nchar(y[i]) - 4, nchar(y[i])) |
|
|
1011 |
} |
|
|
1012 |
if (sum(last3char == "End") != sum(last5char == "Start")) { |
|
|
1013 |
stop("Error in format of time course data: a Start or End tag is missing") |
|
|
1014 |
} |
|
|
1015 |
y.act <- rep(NA, n) |
|
|
1016 |
timey <- rep(NA, n) |
|
|
1017 |
person.id <- rep(NA, n) |
|
|
1018 |
k <- 1 |
|
|
1019 |
end.flag <- FALSE |
|
|
1020 |
person.id[1] <- 1 |
|
|
1021 |
if (substring(y[1], nchar(y[1]) - 4, nchar(y[1])) != "Start") { |
|
|
1022 |
stop( |
|
|
1023 |
"Error in format of time course data: ", |
|
|
1024 |
"first cell should have a Start tag" |
|
|
1025 |
) |
|
|
1026 |
} |
|
|
1027 |
for (i in 1:n) { |
|
|
1028 |
message(i) |
|
|
1029 |
j <- 1 |
|
|
1030 |
while (substring(y[i], j, j) != "T") { |
|
|
1031 |
j <- j + 1 |
|
|
1032 |
} |
|
|
1033 |
end.of.y <- j - 1 |
|
|
1034 |
y.act[i] <- as.numeric(substring(y[i], 1, end.of.y)) |
|
|
1035 |
timey[i] <- substring(y[i], end.of.y + 5, nchar(y[i])) |
|
|
1036 |
if (nchar(timey[i]) > 3 & substring(timey[i], nchar(timey[i]) - |
|
|
1037 |
2, nchar(timey[i])) == "End") { |
|
|
1038 |
end.flag <- TRUE |
|
|
1039 |
timey[i] <- substring(timey[i], 1, nchar(timey[i]) - |
|
|
1040 |
3) |
|
|
1041 |
} |
|
|
1042 |
if (nchar(timey[i]) > 3 & substring(timey[i], nchar(timey[i]) - |
|
|
1043 |
4, nchar(timey[i])) == "Start") { |
|
|
1044 |
timey[i] <- substring(timey[i], 1, nchar(timey[i]) - |
|
|
1045 |
5) |
|
|
1046 |
} |
|
|
1047 |
if (i < n & !end.flag) { |
|
|
1048 |
person.id[i + 1] <- k |
|
|
1049 |
} |
|
|
1050 |
if (i < n & end.flag) { |
|
|
1051 |
k <- k + 1 |
|
|
1052 |
person.id[i + 1] <- k |
|
|
1053 |
} |
|
|
1054 |
end.flag <- FALSE |
|
|
1055 |
} |
|
|
1056 |
timey <- as.numeric(timey) |
|
|
1057 |
# do a check that the format was correct |
|
|
1058 |
tt <- table(person.id, y.act) |
|
|
1059 |
junk <- function(x) { |
|
|
1060 |
sum(x != 0) |
|
|
1061 |
} |
|
|
1062 |
if (sum(apply(tt, 1, junk) != 1) > 0) { |
|
|
1063 |
num <- seq_len(nrow(tt))[apply(tt, 1, junk) > 1] |
|
|
1064 |
stop(paste( |
|
|
1065 |
"Error in format of time course data, timecourse #", |
|
|
1066 |
as.character(num) |
|
|
1067 |
)) |
|
|
1068 |
} |
|
|
1069 |
npeople <- length(unique(person.id)) |
|
|
1070 |
newx <- matrix(NA, nrow = nrow(x), ncol = npeople) |
|
|
1071 |
sd <- matrix(NA, nrow = nrow(x), ncol = npeople) |
|
|
1072 |
for (j in 1:npeople) { |
|
|
1073 |
jj <- person.id == j |
|
|
1074 |
tim <- timey[jj] |
|
|
1075 |
xc <- t(scale(t(x[, jj, drop = FALSE]), center = TRUE, scale = FALSE)) |
|
|
1076 |
if (time.summary.type == "slope") { |
|
|
1077 |
junk <- quantitative.func(xc, tim - mean(tim)) |
|
|
1078 |
newx[, j] <- junk$numer |
|
|
1079 |
sd[, j] <- junk$sd |
|
|
1080 |
} |
|
|
1081 |
if (time.summary.type == "signed.area") { |
|
|
1082 |
junk <- timearea.func(x[, jj, drop = FALSE], tim) |
|
|
1083 |
newx[, j] <- junk$numer |
|
|
1084 |
sd[, j] <- junk$sd |
|
|
1085 |
} |
|
|
1086 |
} |
|
|
1087 |
y.unique <- y.act[!duplicated(person.id)] |
|
|
1088 |
return(list(y = y.unique, x = newx, sd = sd)) |
|
|
1089 |
} |
|
|
1090 |
ttest.func <- function(x, y, s0 = 0, sd = NULL) { |
|
|
1091 |
n1 <- sum(y == 1) |
|
|
1092 |
n2 <- sum(y == 2) |
|
|
1093 |
m1 <- rowMeans(x[, y == 1, drop = FALSE]) |
|
|
1094 |
m2 <- rowMeans(x[, y == 2, drop = FALSE]) |
|
|
1095 |
if (is.null(sd)) { |
|
|
1096 |
sd <- sqrt(((n2 - 1) * varr(x[, y == 2], meanx = m2) + |
|
|
1097 |
(n1 - 1) * varr(x[, y == 1], meanx = m1)) * (1 / n1 + |
|
|
1098 |
1 / n2) / (n1 + n2 - 2)) |
|
|
1099 |
} |
|
|
1100 |
numer <- m2 - m1 |
|
|
1101 |
dif.obs <- (numer) / (sd + s0) |
|
|
1102 |
return(list(tt = dif.obs, numer = numer, sd = sd)) |
|
|
1103 |
} |
|
|
1104 |
|
|
|
1105 |
wilcoxon.func <- function(x, y, s0 = 0) { |
|
|
1106 |
n1 <- sum(y == 1) |
|
|
1107 |
n2 <- sum(y == 2) |
|
|
1108 |
p <- nrow(x) |
|
|
1109 |
r2 <- rowSums(t(apply(x, 1, rank))[, y == 2, drop = FALSE]) |
|
|
1110 |
numer <- r2 - (n2 / 2) * (n2 + 1) - (n1 * n2) / 2 |
|
|
1111 |
sd <- sqrt(n1 * n2 * (n1 + n2 + 1) / 12) |
|
|
1112 |
tt <- (numer) / (sd + s0) |
|
|
1113 |
return(list(tt = tt, numer = numer, sd = rep(sd, p))) |
|
|
1114 |
} |
|
|
1115 |
|
|
|
1116 |
onesample.ttest.func <- function(x, y, s0 = 0, sd = NULL) { |
|
|
1117 |
n <- length(y) |
|
|
1118 |
x <- x * matrix(y, nrow = nrow(x), ncol = ncol(x), byrow = TRUE) |
|
|
1119 |
m <- rowMeans(x) |
|
|
1120 |
if (is.null(sd)) { |
|
|
1121 |
sd <- sqrt(varr(x, meanx = m) / n) |
|
|
1122 |
} |
|
|
1123 |
dif.obs <- m / (sd + s0) |
|
|
1124 |
return(list(tt = dif.obs, numer = m, sd = sd)) |
|
|
1125 |
} |
|
|
1126 |
|
|
|
1127 |
patterndiscovery.func <- function(x, s0 = 0, eigengene.number = 1) { |
|
|
1128 |
a <- mysvd(x, n.components = eigengene.number) |
|
|
1129 |
v <- a$v[, eigengene.number] |
|
|
1130 |
# here we try to guess the most interpretable orientation |
|
|
1131 |
# for the eigengene |
|
|
1132 |
om <- abs(a$u[, eigengene.number]) > quantile( |
|
|
1133 |
abs(a$u[, eigengene.number]), |
|
|
1134 |
0.95 |
|
|
1135 |
) |
|
|
1136 |
if (median(a$u[om, eigengene.number]) < 0) { |
|
|
1137 |
v <- -1 * v |
|
|
1138 |
} |
|
|
1139 |
aa <- quantitative.func(x, v, s0 = s0) |
|
|
1140 |
eigengene <- cbind(seq_len(nrow(a$v)), v) |
|
|
1141 |
dimnames(eigengene) <- list(NULL, c("sample number", "value")) |
|
|
1142 |
return( |
|
|
1143 |
list(tt = aa$tt, numer = aa$numer, sd = aa$sd, eigengene = eigengene) |
|
|
1144 |
) |
|
|
1145 |
} |
|
|
1146 |
|
|
|
1147 |
paired.ttest.func <- function(x, y, s0 = 0, sd = NULL) { |
|
|
1148 |
nc <- ncol(x) / 2 |
|
|
1149 |
o <- 1:nc |
|
|
1150 |
o1 <- rep(0, ncol(x) / 2) |
|
|
1151 |
o2 <- o1 |
|
|
1152 |
for (j in 1:nc) { |
|
|
1153 |
o1[j] <- seq_len(ncol(x))[y == -o[j]] |
|
|
1154 |
} |
|
|
1155 |
for (j in 1:nc) { |
|
|
1156 |
o2[j] <- seq_len(ncol(x))[y == o[j]] |
|
|
1157 |
} |
|
|
1158 |
d <- x[, o2, drop = FALSE] - x[, o1, drop = FALSE] |
|
|
1159 |
if (is.matrix(d)) { |
|
|
1160 |
m <- rowMeans(d) |
|
|
1161 |
} |
|
|
1162 |
if (!is.matrix(d)) { |
|
|
1163 |
m <- mean(d) |
|
|
1164 |
} |
|
|
1165 |
if (is.null(sd)) { |
|
|
1166 |
if (is.matrix(d)) { |
|
|
1167 |
sd <- sqrt(varr(d, meanx = m) / nc) |
|
|
1168 |
} |
|
|
1169 |
if (!is.matrix(d)) { |
|
|
1170 |
sd <- sqrt(var(d) / nc) |
|
|
1171 |
} |
|
|
1172 |
} |
|
|
1173 |
dif.obs <- m / (sd + s0) |
|
|
1174 |
return(list(tt = dif.obs, numer = m, sd = sd)) |
|
|
1175 |
} |
|
|
1176 |
|
|
|
1177 |
cox.func <- function(x, y, censoring.status, s0 = 0) { |
|
|
1178 |
# find the index matrix |
|
|
1179 |
Dn <- sum(censoring.status == 1) |
|
|
1180 |
Dset <- seq_len(ncol(x))[censoring.status == 1] # the set of observed |
|
|
1181 |
ind <- matrix(0, ncol(x), Dn) |
|
|
1182 |
# get the matrix |
|
|
1183 |
for (i in 1:Dn) { |
|
|
1184 |
ind[y > y[Dset[i]] - 1e-08, i] <- 1 / sum(y > y[Dset[i]] - |
|
|
1185 |
1e-08) |
|
|
1186 |
} |
|
|
1187 |
ind.sums <- rowSums(ind) |
|
|
1188 |
x.ind <- x %*% ind |
|
|
1189 |
# get the derivatives |
|
|
1190 |
numer <- x %*% (censoring.status - ind.sums) |
|
|
1191 |
sd <- sqrt((x * x) %*% ind.sums - rowSums(x.ind * x.ind)) |
|
|
1192 |
tt <- numer / (sd + s0) |
|
|
1193 |
return(list(tt = tt, numer = numer, sd = sd)) |
|
|
1194 |
} |
|
|
1195 |
|
|
|
1196 |
multiclass.func <- function(x, y, s0 = 0) { |
|
|
1197 |
## assumes y is coded 1,2... |
|
|
1198 |
nn <- table(y) |
|
|
1199 |
m <- matrix(0, nrow = nrow(x), ncol = length(nn)) |
|
|
1200 |
v <- m |
|
|
1201 |
for (j in seq_len(length(nn))) { |
|
|
1202 |
m[, j] <- rowMeans(x[, y == j]) |
|
|
1203 |
v[, j] <- (nn[j] - 1) * varr(x[, y == j], meanx = m[ |
|
|
1204 |
, |
|
|
1205 |
j |
|
|
1206 |
]) |
|
|
1207 |
} |
|
|
1208 |
mbar <- rowMeans(x) |
|
|
1209 |
mm <- m - matrix(mbar, nrow = length(mbar), ncol = length(nn)) |
|
|
1210 |
fac <- (sum(nn) / prod(nn)) |
|
|
1211 |
scor <- sqrt(fac * (apply(matrix(nn, |
|
|
1212 |
nrow = nrow(m), ncol = ncol(m), |
|
|
1213 |
byrow = TRUE |
|
|
1214 |
) * mm * mm, 1, sum))) |
|
|
1215 |
sd <- sqrt(rowSums(v) * (1 / sum(nn - 1)) * sum(1 / nn)) |
|
|
1216 |
tt <- scor / (sd + s0) |
|
|
1217 |
mm.stand <- t(scale(t(mm), center = FALSE, scale = sd)) |
|
|
1218 |
return(list(tt = tt, numer = scor, sd = sd, stand.contrasts = mm.stand)) |
|
|
1219 |
} |
|
|
1220 |
|
|
|
1221 |
est.s0 <- function(tt, sd, s0.perc = seq(0, 1, by = 0.05)) { |
|
|
1222 |
## estimate s0 (exchangeability) factor for denominator. |
|
|
1223 |
## returns the actual estimate s0 (not a percentile) |
|
|
1224 |
br <- unique(quantile(sd, seq(0, 1, len = 101))) |
|
|
1225 |
nbr <- length(br) |
|
|
1226 |
a <- cut(sd, br, labels = FALSE) |
|
|
1227 |
a[is.na(a)] <- 1 |
|
|
1228 |
cv.sd <- rep(0, length(s0.perc)) |
|
|
1229 |
for (j in seq_len(length(s0.perc))) { |
|
|
1230 |
w <- quantile(sd, s0.perc[j]) |
|
|
1231 |
w[j == 1] <- 0 |
|
|
1232 |
tt2 <- tt * sd / (sd + w) |
|
|
1233 |
tt2[tt2 == Inf] <- NA |
|
|
1234 |
sds <- rep(0, nbr - 1) |
|
|
1235 |
for (i in 1:(nbr - 1)) { |
|
|
1236 |
sds[i] <- stats::mad(tt2[a == i], na.rm = TRUE) |
|
|
1237 |
} |
|
|
1238 |
cv.sd[j] <- sqrt(var(sds)) / mean(sds) |
|
|
1239 |
} |
|
|
1240 |
o <- seq_len(length(s0.perc))[cv.sd == min(cv.sd)] |
|
|
1241 |
# we don;t allow taking s0.hat to be 0th percentile when |
|
|
1242 |
# min sd is 0 |
|
|
1243 |
s0.hat <- quantile(sd[sd != 0], s0.perc[o]) |
|
|
1244 |
return(list(s0.perc = s0.perc, cv.sd = cv.sd, s0.hat = s0.hat)) |
|
|
1245 |
} |
|
|
1246 |
|
|
|
1247 |
permute.rows <- function(x) { |
|
|
1248 |
dd <- dim(x) |
|
|
1249 |
n <- dd[1] |
|
|
1250 |
p <- dd[2] |
|
|
1251 |
mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n)) |
|
|
1252 |
matrix(t(x)[order(mm)], n, p, byrow = TRUE) |
|
|
1253 |
} |
|
|
1254 |
|
|
|
1255 |
foldchange.paired <- function(x, y, logged2) { |
|
|
1256 |
nc <- ncol(x) / 2 |
|
|
1257 |
o <- 1:nc |
|
|
1258 |
o1 <- rep(0, ncol(x) / 2) |
|
|
1259 |
o2 <- o1 |
|
|
1260 |
for (j in 1:nc) { |
|
|
1261 |
o1[j] <- seq_len(ncol(x))[y == -o[j]] |
|
|
1262 |
} |
|
|
1263 |
for (j in 1:nc) { |
|
|
1264 |
o2[j] <- seq_len(ncol(x))[y == o[j]] |
|
|
1265 |
} |
|
|
1266 |
if (!logged2) { |
|
|
1267 |
d <- x[, o2, drop = FALSE] / x[, o1, drop = FALSE] |
|
|
1268 |
} |
|
|
1269 |
if (logged2) { |
|
|
1270 |
d <- x[, o2, drop = FALSE] - x[, o1, drop = FALSE] |
|
|
1271 |
} |
|
|
1272 |
if (!logged2) { |
|
|
1273 |
fc <- rowMeans(d) |
|
|
1274 |
} |
|
|
1275 |
if (logged2) { |
|
|
1276 |
fc <- 2^rowMeans(d) |
|
|
1277 |
} |
|
|
1278 |
return(fc) |
|
|
1279 |
} |
|
|
1280 |
foldchange.seq.twoclass.unpaired <- function(x, y, depth) { |
|
|
1281 |
x.norm <- scale(x, center = FALSE, scale = depth) + 1e-08 |
|
|
1282 |
fc <- apply(x.norm[, y == 2], 1, median) / |
|
|
1283 |
apply(x.norm[, y == 1], 1, median) |
|
|
1284 |
return(fc) |
|
|
1285 |
} |
|
|
1286 |
integer.base.b <- function(x, b = 2) { |
|
|
1287 |
xi <- as.integer(x) |
|
|
1288 |
if (xi == 0) { |
|
|
1289 |
return(0) |
|
|
1290 |
} |
|
|
1291 |
if (any(is.na(xi) | ((x - xi) != 0))) { |
|
|
1292 |
print(list(ERROR = "x not integer", x = x)) |
|
|
1293 |
} |
|
|
1294 |
N <- length(x) |
|
|
1295 |
xMax <- max(x) |
|
|
1296 |
ndigits <- (floor(logb(xMax, base = 2)) + 1) |
|
|
1297 |
Base.b <- array(NA, dim = c(N, ndigits)) |
|
|
1298 |
for (i in 1:ndigits) { |
|
|
1299 |
Base.b[, ndigits - i + 1] <- (x %% b) |
|
|
1300 |
x <- (x %/% b) |
|
|
1301 |
} |
|
|
1302 |
if (N == 1) { |
|
|
1303 |
Base.b[1, ] |
|
|
1304 |
} else { |
|
|
1305 |
Base.b |
|
|
1306 |
} |
|
|
1307 |
} |
|
|
1308 |
varr <- function(x, meanx = NULL) { |
|
|
1309 |
n <- ncol(x) |
|
|
1310 |
p <- nrow(x) |
|
|
1311 |
Y <- matrix(1, nrow = n, ncol = 1) |
|
|
1312 |
if (is.null(meanx)) { |
|
|
1313 |
meanx <- rowMeans(x) |
|
|
1314 |
} |
|
|
1315 |
ans <- rep(1, p) |
|
|
1316 |
xdif <- x - meanx %*% t(Y) |
|
|
1317 |
ans <- (xdif^2) %*% rep(1 / (n - 1), n) |
|
|
1318 |
ans <- drop(ans) |
|
|
1319 |
return(ans) |
|
|
1320 |
} |
|
|
1321 |
quantitative.func <- function(x, y, s0 = 0) { |
|
|
1322 |
# regression of x on y |
|
|
1323 |
my <- mean(y) |
|
|
1324 |
yy <- y - my |
|
|
1325 |
temp <- x %*% yy |
|
|
1326 |
mx <- rowMeans(x) |
|
|
1327 |
syy <- sum(yy^2) |
|
|
1328 |
scor <- temp / syy |
|
|
1329 |
b0hat <- mx - scor * my |
|
|
1330 |
ym <- matrix(y, nrow = nrow(x), ncol = ncol(x), byrow = TRUE) |
|
|
1331 |
xhat <- matrix(b0hat, nrow = nrow(x), ncol = ncol(x)) + ym * |
|
|
1332 |
matrix(scor, nrow = nrow(x), ncol = ncol(x)) |
|
|
1333 |
sigma <- sqrt(rowSums((x - xhat)^2) / (ncol(xhat) - 2)) |
|
|
1334 |
sd <- sigma / sqrt(syy) |
|
|
1335 |
tt <- scor / (sd + s0) |
|
|
1336 |
return(list(tt = tt, numer = scor, sd = sd)) |
|
|
1337 |
} |
|
|
1338 |
timearea.func <- function(x, y, s0 = 0) { |
|
|
1339 |
n <- ncol(x) |
|
|
1340 |
xx <- 0.5 * (x[, 2:n] + x[, 1:(n - 1)]) * matrix(diff(y), |
|
|
1341 |
nrow = nrow(x), ncol = n - 1, byrow = TRUE |
|
|
1342 |
) |
|
|
1343 |
numer <- rowMeans(xx) |
|
|
1344 |
sd <- sqrt(varr(xx, meanx = numer) / n) |
|
|
1345 |
tt <- numer / sqrt(sd + s0) |
|
|
1346 |
return(list(tt = tt, numer = numer, sd = sd)) |
|
|
1347 |
} |
|
|
1348 |
cox.seq.func <- function(xresamp, y, censoring.status) { |
|
|
1349 |
# get the dimensions |
|
|
1350 |
ns <- ncol(xresamp) |
|
|
1351 |
# prepare for the calculation |
|
|
1352 |
# find the index matrix |
|
|
1353 |
Dn <- sum(censoring.status == 1) |
|
|
1354 |
Dset <- seq_len(ns)[censoring.status == 1] # the set of died |
|
|
1355 |
ind <- matrix(0, ns, Dn) |
|
|
1356 |
# get the matrix |
|
|
1357 |
for (i in 1:Dn) { |
|
|
1358 |
ind[y >= y[Dset[i]] - 1e-08, i] <- 1 / sum(y >= y[Dset[i]] - |
|
|
1359 |
1e-08) |
|
|
1360 |
} |
|
|
1361 |
ind.sums <- rowSums(ind) |
|
|
1362 |
# calculate the score statistic |
|
|
1363 |
tt <- apply(xresamp, 3, function(x, cen.ind, ind.para, ind.sums.para) { |
|
|
1364 |
dev1 <- x %*% cen.ind |
|
|
1365 |
x.ind <- x %*% ind.para |
|
|
1366 |
dev2 <- (x * x) %*% ind.sums.para - rowSums(x.ind * x.ind) |
|
|
1367 |
dev1 / (sqrt(dev2) + 1e-08) |
|
|
1368 |
}, (censoring.status - ind.sums), ind, ind.sums) |
|
|
1369 |
tt <- rowMeans(tt) |
|
|
1370 |
return(list(tt = tt, numer = tt, sd = rep(1, length(tt)))) |
|
|
1371 |
} |
|
|
1372 |
compute.block.perms <- function(y, blocky, nperms) { |
|
|
1373 |
# y are the data (eg class label 1 vs 2; or -1,1, -2,2 for |
|
|
1374 |
# paired data) |
|
|
1375 |
# blocky are the block labels (abs(y) for paired daatr) |
|
|
1376 |
ny <- length(y) |
|
|
1377 |
nblocks <- length(unique(blocky)) |
|
|
1378 |
tab <- table(blocky) |
|
|
1379 |
total.nperms <- prod(factorial(tab)) |
|
|
1380 |
# block.perms is a list of all possible permutations |
|
|
1381 |
block.perms <- vector("list", nblocks) |
|
|
1382 |
# first enumerate all perms, when possible |
|
|
1383 |
if (total.nperms <= nperms) { |
|
|
1384 |
all.perms.flag <- 1 |
|
|
1385 |
nperms.act <- total.nperms |
|
|
1386 |
for (i in 1:nblocks) { |
|
|
1387 |
block.perms[[i]] <- permute(y[blocky == i]) |
|
|
1388 |
} |
|
|
1389 |
kk <- 0:(factorial(max(tab))^nblocks - 1) |
|
|
1390 |
# the rows of the matrix outerm runs through the 'outer |
|
|
1391 |
# product' |
|
|
1392 |
# first we assume that all blocks have max(tab) members; |
|
|
1393 |
# then we remove rows of outerm that |
|
|
1394 |
# are illegal (ie when a block has fewer members) |
|
|
1395 |
outerm <- matrix(0, nrow = length(kk), ncol = nblocks) |
|
|
1396 |
for (i in seq_len(length(kk))) { |
|
|
1397 |
kkkk <- integer.base.b(kk[i], b = factorial(max(tab))) |
|
|
1398 |
if (length(kkkk) > nblocks) { |
|
|
1399 |
kkkk <- kkkk[(length(kkkk) - nblocks + 1):length(kkkk)] |
|
|
1400 |
} |
|
|
1401 |
outerm[i, (nblocks - length(kkkk) + 1):nblocks] <- kkkk |
|
|
1402 |
} |
|
|
1403 |
outerm <- outerm + 1 |
|
|
1404 |
# now remove rows that are illegal perms |
|
|
1405 |
ind <- rep(TRUE, nrow(outerm)) |
|
|
1406 |
for (j in seq_len(ncol(outerm))) { |
|
|
1407 |
ind <- ind & outerm[, j] <= factorial(tab[j]) |
|
|
1408 |
} |
|
|
1409 |
outerm <- outerm[ind, , drop = FALSE] |
|
|
1410 |
# finally, construct permutation matrix from outer product |
|
|
1411 |
permsy <- matrix(NA, nrow = total.nperms, ncol = ny) |
|
|
1412 |
for (i in 1:total.nperms) { |
|
|
1413 |
junk <- NULL |
|
|
1414 |
for (j in 1:nblocks) { |
|
|
1415 |
junk <- c(junk, block.perms[[j]][outerm[i, j], ]) |
|
|
1416 |
} |
|
|
1417 |
permsy[i, ] <- junk |
|
|
1418 |
} |
|
|
1419 |
} |
|
|
1420 |
# next handle case when there are too many perms to enumerate |
|
|
1421 |
if (total.nperms > nperms) { |
|
|
1422 |
all.perms.flag <- 0 |
|
|
1423 |
nperms.act <- nperms |
|
|
1424 |
permsy <- NULL |
|
|
1425 |
block.perms <- vector("list", nblocks) |
|
|
1426 |
for (j in 1:nblocks) { |
|
|
1427 |
block.perms[[j]] <- sample.perms(y[blocky == j], nperms = nperms) |
|
|
1428 |
} |
|
|
1429 |
for (j in 1:nblocks) { |
|
|
1430 |
permsy <- cbind(permsy, block.perms[[j]]) |
|
|
1431 |
} |
|
|
1432 |
} |
|
|
1433 |
return(list( |
|
|
1434 |
permsy = permsy, all.perms.flag = all.perms.flag, |
|
|
1435 |
nperms.act = nperms.act |
|
|
1436 |
)) |
|
|
1437 |
} |
|
|
1438 |
sample.perms <- function(elem, nperms) { |
|
|
1439 |
# randomly generates nperms of the vector elem |
|
|
1440 |
res <- permute.rows(matrix(elem, |
|
|
1441 |
nrow = nperms, ncol = length(elem), |
|
|
1442 |
byrow = TRUE |
|
|
1443 |
)) |
|
|
1444 |
return(res) |
|
|
1445 |
} |
|
|
1446 |
mysvd <- function(x, n.components = NULL) { |
|
|
1447 |
# finds PCs of matrix x |
|
|
1448 |
p <- nrow(x) |
|
|
1449 |
n <- ncol(x) |
|
|
1450 |
# center the observations (rows) |
|
|
1451 |
feature.means <- rowMeans(x) |
|
|
1452 |
x <- t(scale(t(x), center = feature.means, scale = FALSE)) |
|
|
1453 |
if (is.null(n.components)) { |
|
|
1454 |
n.components <- min(n, p) |
|
|
1455 |
} |
|
|
1456 |
if (p > n) { |
|
|
1457 |
a <- eigen(t(x) %*% x) |
|
|
1458 |
v <- a$vec[, 1:n.components, drop = FALSE] |
|
|
1459 |
d <- sqrt(a$val[1:n.components, drop = FALSE]) |
|
|
1460 |
u <- scale(x %*% v, center = FALSE, scale = d) |
|
|
1461 |
return(list(u = u, d = d, v = v)) |
|
|
1462 |
} else { |
|
|
1463 |
junk <- svd(x, LINPACK = TRUE) |
|
|
1464 |
nc <- min(ncol(junk$u), n.components) |
|
|
1465 |
return(list(u = junk$u[, 1:nc], d = junk$d[1:nc], v = junk$v[ |
|
|
1466 |
, |
|
|
1467 |
1:nc |
|
|
1468 |
])) |
|
|
1469 |
} |
|
|
1470 |
} |
|
|
1471 |
quantitative.seq.func <- function(xresamp, y) { |
|
|
1472 |
tt <- rep(0, dim(xresamp)[1]) |
|
|
1473 |
for (i in seq_len(dim(xresamp)[3])) { |
|
|
1474 |
y.ranked <- rank(y, ties.method = "random") - (dim(xresamp)[2] + |
|
|
1475 |
1) / 2 |
|
|
1476 |
tt <- tt + (xresamp[, , i] - (dim(xresamp)[2] + 1) / 2) %*% |
|
|
1477 |
y.ranked |
|
|
1478 |
} |
|
|
1479 |
ns <- dim(xresamp)[2] |
|
|
1480 |
tt <- tt / (dim(xresamp)[3] * (ns^3 - ns) / 12) |
|
|
1481 |
return(list(tt = as.vector(tt), numer = as.vector(tt), sd = rep( |
|
|
1482 |
1, |
|
|
1483 |
length(tt) |
|
|
1484 |
))) |
|
|
1485 |
} |
|
|
1486 |
multiclass.seq.func <- function(xresamp, y) { |
|
|
1487 |
# number of classes and number of samples in each class |
|
|
1488 |
K <- max(y) |
|
|
1489 |
n.each <- rep(0, K) |
|
|
1490 |
for (k in 1:K) { |
|
|
1491 |
n.each[k] <- sum(y == k) |
|
|
1492 |
} |
|
|
1493 |
# the statistic |
|
|
1494 |
tt <- temp <- rep(0, dim(xresamp)[1]) |
|
|
1495 |
stand.contrasts <- matrix(0, dim(xresamp)[1], K) |
|
|
1496 |
|
|
|
1497 |
for (i in seq_len(dim(xresamp)[3])) { |
|
|
1498 |
for (k in 1:K) { |
|
|
1499 |
temp <- rowSums(xresamp[, y == k, i]) |
|
|
1500 |
tt <- tt + temp^2 / n.each[k] |
|
|
1501 |
stand.contrasts[, k] <- stand.contrasts[, k] + temp |
|
|
1502 |
} |
|
|
1503 |
} |
|
|
1504 |
# finalize |
|
|
1505 |
nresamp <- dim(xresamp)[3] |
|
|
1506 |
ns <- dim(xresamp)[2] |
|
|
1507 |
tt <- tt / nresamp * 12 / ns / (ns + 1) - 3 * (ns + 1) |
|
|
1508 |
stand.contrasts <- stand.contrasts / nresamp |
|
|
1509 |
stand.contrasts <- scale(stand.contrasts, |
|
|
1510 |
center = n.each * (ns + 1) / 2, |
|
|
1511 |
scale = sqrt(n.each * (ns - n.each) * (ns + 1) / 12) |
|
|
1512 |
) |
|
|
1513 |
return(list( |
|
|
1514 |
tt = tt, numer = tt, sd = rep(1, length(tt)), |
|
|
1515 |
stand.contrasts = stand.contrasts |
|
|
1516 |
)) |
|
|
1517 |
} |
|
|
1518 |
|
|
|
1519 |
# ============================================================================== |
|
|
1520 |
# samr.compute.delta.table |
|
|
1521 |
# ============================================================================== |
|
|
1522 |
## Jun added starts |
|
|
1523 |
samr.compute.delta.table <- function( |
|
|
1524 |
samr.obj, min.foldchange = 0, |
|
|
1525 |
dels = NULL, nvals = 50) { |
|
|
1526 |
res <- NULL |
|
|
1527 |
if (samr.obj$assay.type == "array") { |
|
|
1528 |
res <- samr.compute.delta.table.array( |
|
|
1529 |
samr.obj, min.foldchange, |
|
|
1530 |
dels, nvals |
|
|
1531 |
) |
|
|
1532 |
} else if (samr.obj$assay.type == "seq") { |
|
|
1533 |
res <- samr.compute.delta.table.seq( |
|
|
1534 |
samr.obj, min.foldchange, |
|
|
1535 |
dels |
|
|
1536 |
) |
|
|
1537 |
} |
|
|
1538 |
return(res) |
|
|
1539 |
} |
|
|
1540 |
## Jun added ends |
|
|
1541 |
|
|
|
1542 |
## Jun added the first row below, and commented the row |
|
|
1543 |
# after it |
|
|
1544 |
samr.compute.delta.table.array <- function( |
|
|
1545 |
samr.obj, |
|
|
1546 |
min.foldchange = 0, dels = NULL, nvals = 50) { |
|
|
1547 |
# samr.compute.delta.table <- function(samr.obj, |
|
|
1548 |
# min.foldchange=0, dels=NULL, nvals=50) { |
|
|
1549 |
# computes delta table, starting with samr object 'a', for |
|
|
1550 |
# nvals values of delta |
|
|
1551 |
lmax <- sqrt(max(abs(sort(samr.obj$tt) - samr.obj$evo))) |
|
|
1552 |
if (is.null(dels)) { |
|
|
1553 |
dels <- (seq(0, lmax, length = nvals)^2) |
|
|
1554 |
} |
|
|
1555 |
res1 <- NULL |
|
|
1556 |
foldchange.cond.up <- matrix(TRUE, |
|
|
1557 |
nrow = nrow(samr.obj$ttstar), |
|
|
1558 |
ncol = ncol(samr.obj$ttstar) |
|
|
1559 |
) |
|
|
1560 |
foldchange.cond.lo <- matrix(TRUE, |
|
|
1561 |
nrow = nrow(samr.obj$ttstar), |
|
|
1562 |
ncol = ncol(samr.obj$ttstar) |
|
|
1563 |
) |
|
|
1564 |
if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > |
|
|
1565 |
0)) { |
|
|
1566 |
foldchange.cond.up <- samr.obj$foldchange.star >= min.foldchange |
|
|
1567 |
foldchange.cond.lo <- samr.obj$foldchange.star <= 1 / min.foldchange |
|
|
1568 |
} |
|
|
1569 |
cutup <- rep(NA, length(dels)) |
|
|
1570 |
cutlow <- rep(NA, length(dels)) |
|
|
1571 |
g2 <- rep(NA, length(dels)) |
|
|
1572 |
errup <- matrix(NA, ncol = length(dels), nrow = ncol(samr.obj$ttstar0)) |
|
|
1573 |
errlow <- matrix(NA, ncol = length(dels), nrow = ncol(samr.obj$ttstar0)) |
|
|
1574 |
cat("", fill = TRUE) |
|
|
1575 |
cat("Computing delta table", fill = TRUE) |
|
|
1576 |
for (ii in seq_len(length(dels))) { |
|
|
1577 |
cat(ii, fill = TRUE) |
|
|
1578 |
ttt <- detec.slab(samr.obj, dels[ii], min.foldchange) |
|
|
1579 |
cutup[ii] <- 1e+10 |
|
|
1580 |
if (length(ttt$pup > 0)) { |
|
|
1581 |
cutup[ii] <- min(samr.obj$tt[ttt$pup]) |
|
|
1582 |
} |
|
|
1583 |
cutlow[ii] <- -1e+10 |
|
|
1584 |
if (length(ttt$plow) > 0) { |
|
|
1585 |
cutlow[ii] <- max(samr.obj$tt[ttt$plow]) |
|
|
1586 |
} |
|
|
1587 |
g2[ii] <- sumlengths(ttt) |
|
|
1588 |
errup[, ii] <- colSums(samr.obj$ttstar0 > cutup[ii] & |
|
|
1589 |
foldchange.cond.up) |
|
|
1590 |
errlow[, ii] <- colSums(samr.obj$ttstar0 < cutlow[ii] & |
|
|
1591 |
foldchange.cond.lo) |
|
|
1592 |
} |
|
|
1593 |
gmed <- apply(errup + errlow, 2, median) |
|
|
1594 |
g90 <- apply(errup + errlow, 2, quantile, 0.9) |
|
|
1595 |
res1 <- cbind( |
|
|
1596 |
samr.obj$pi0 * gmed, samr.obj$pi0 * g90, g2, |
|
|
1597 |
samr.obj$pi0 * gmed / g2, samr.obj$pi0 * g90 / g2, cutlow, |
|
|
1598 |
cutup |
|
|
1599 |
) |
|
|
1600 |
res1 <- cbind(dels, res1) |
|
|
1601 |
dimnames(res1) <- list(NULL, c( |
|
|
1602 |
"delta", "# med false pos", |
|
|
1603 |
"90th perc false pos", "# called", "median FDR", "90th perc FDR", |
|
|
1604 |
"cutlo", "cuthi" |
|
|
1605 |
)) |
|
|
1606 |
return(res1) |
|
|
1607 |
} |
|
|
1608 |
|
|
|
1609 |
####################################################################### |
|
|
1610 |
# \tcompute the delta table for sequencing data |
|
|
1611 |
####################################################################### |
|
|
1612 |
samr.compute.delta.table.seq <- function( |
|
|
1613 |
samr.obj, |
|
|
1614 |
min.foldchange = 0, dels = NULL) { |
|
|
1615 |
res1 <- NULL |
|
|
1616 |
flag <- TRUE |
|
|
1617 |
## check whether any gene satisfies the foldchange |
|
|
1618 |
# restrictions |
|
|
1619 |
if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | |
|
|
1620 |
samr.obj$resp.type == samr.const.twoclass.paired.response) & |
|
|
1621 |
(min.foldchange > 0)) { |
|
|
1622 |
sat.up <- (samr.obj$foldchange >= min.foldchange) & (samr.obj$evo > |
|
|
1623 |
0) |
|
|
1624 |
sat.dn <- (samr.obj$foldchange <= 1 / min.foldchange) & |
|
|
1625 |
(samr.obj$evo < 0) |
|
|
1626 |
if (sum(sat.up) + sum(sat.dn) == 0) { |
|
|
1627 |
flag <- FALSE |
|
|
1628 |
} |
|
|
1629 |
} |
|
|
1630 |
if (flag) { |
|
|
1631 |
if (is.null(dels)) { |
|
|
1632 |
dels <- generate.dels(samr.obj, min.foldchange = min.foldchange) |
|
|
1633 |
} |
|
|
1634 |
cat("Number of thresholds chosen (all possible thresholds) =", |
|
|
1635 |
length(dels), |
|
|
1636 |
fill = TRUE |
|
|
1637 |
) |
|
|
1638 |
if (length(dels) > 0) { |
|
|
1639 |
## sort delta to make the fast calculation right |
|
|
1640 |
dels <- sort(dels) |
|
|
1641 |
## get the upper and lower cutoffs |
|
|
1642 |
cat("Getting all the cutoffs for the thresholds...\n") |
|
|
1643 |
slabs <- samr.seq.detec.slabs(samr.obj, dels, min.foldchange) |
|
|
1644 |
cutup <- slabs$cutup |
|
|
1645 |
cutlow <- slabs$cutlow |
|
|
1646 |
g2 <- slabs$g2 |
|
|
1647 |
## get the number of errors under the null hypothesis |
|
|
1648 |
cat("Getting number of false positives in the permutation...\n") |
|
|
1649 |
errnum <- samr.seq.null.err( |
|
|
1650 |
samr.obj, min.foldchange, |
|
|
1651 |
cutup, cutlow |
|
|
1652 |
) |
|
|
1653 |
res1 <- NULL |
|
|
1654 |
gmed <- apply(errnum, 2, median) |
|
|
1655 |
g90 <- apply(errnum, 2, quantile, 0.9) |
|
|
1656 |
res1 <- cbind(samr.obj$pi0 * gmed, samr.obj$pi0 * |
|
|
1657 |
g90, g2, samr.obj$pi0 * gmed / g2, samr.obj$pi0 * |
|
|
1658 |
g90 / g2, cutlow, cutup) |
|
|
1659 |
res1 <- cbind(dels, res1) |
|
|
1660 |
dimnames(res1) <- list(NULL, c( |
|
|
1661 |
"delta", "# med false pos", |
|
|
1662 |
"90th perc false pos", "# called", "median FDR", |
|
|
1663 |
"90th perc FDR", "cutlo", "cuthi" |
|
|
1664 |
)) |
|
|
1665 |
} |
|
|
1666 |
} |
|
|
1667 |
return(res1) |
|
|
1668 |
} |
|
|
1669 |
|
|
|
1670 |
# ============================================================================== |
|
|
1671 |
# samr.plot |
|
|
1672 |
# ============================================================================== |
|
|
1673 |
samr.plot <- function(samr.obj, del = NULL, min.foldchange = 0) { |
|
|
1674 |
## make observed-expected plot |
|
|
1675 |
## takes foldchange into account too |
|
|
1676 |
if (is.null(del)) { |
|
|
1677 |
del <- sqrt(max(abs(sort(samr.obj$tt) - samr.obj$evo))) |
|
|
1678 |
} |
|
|
1679 |
b <- detec.slab(samr.obj, del, min.foldchange) |
|
|
1680 |
foldchange.cond.up <- rep(TRUE, length(samr.obj$evo)) |
|
|
1681 |
foldchange.cond.lo <- rep(TRUE, length(samr.obj$evo)) |
|
|
1682 |
if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > |
|
|
1683 |
0)) { |
|
|
1684 |
foldchange.cond.up <- samr.obj$foldchange >= min.foldchange |
|
|
1685 |
foldchange.cond.lo <- samr.obj$foldchange <= 1 / min.foldchange |
|
|
1686 |
} |
|
|
1687 |
col <- rep(1, length(samr.obj$evo)) |
|
|
1688 |
col[b$plow] <- 3 |
|
|
1689 |
col[b$pup] <- 2 |
|
|
1690 |
if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > |
|
|
1691 |
0)) { |
|
|
1692 |
col[!foldchange.cond.lo & !foldchange.cond.up] <- 1 |
|
|
1693 |
} |
|
|
1694 |
col.ordered <- col[order(samr.obj$tt)] |
|
|
1695 |
ylims <- range(samr.obj$tt) |
|
|
1696 |
xlims <- range(samr.obj$evo) |
|
|
1697 |
plot(samr.obj$evo, sort(samr.obj$tt), |
|
|
1698 |
xlab = "expected score", |
|
|
1699 |
ylab = "observed score", ylim = ylims, xlim = xlims, |
|
|
1700 |
type = "n" |
|
|
1701 |
) |
|
|
1702 |
points(samr.obj$evo, sort(samr.obj$tt), col = col.ordered) |
|
|
1703 |
abline(0, 1) |
|
|
1704 |
abline(del, 1, lty = 2) |
|
|
1705 |
abline(-del, 1, lty = 2) |
|
|
1706 |
} |
|
|
1707 |
|
|
|
1708 |
# ============================================================================== |
|
|
1709 |
# samr.compute.siggenes.table |
|
|
1710 |
# ============================================================================== |
|
|
1711 |
samr.compute.siggenes.table <- function( |
|
|
1712 |
samr.obj, del, |
|
|
1713 |
data, delta.table, min.foldchange = 0, all.genes = FALSE, |
|
|
1714 |
compute.localfdr = FALSE) { |
|
|
1715 |
## computes significant genes table, starting with samr |
|
|
1716 |
# object 'a' and 'delta.table' |
|
|
1717 |
## for a **single** value del |
|
|
1718 |
## if all.genes is true, all genes are printed (and value |
|
|
1719 |
# of del is ignored) |
|
|
1720 |
if (is.null(data$geneid)) { |
|
|
1721 |
data$geneid <- paste("g", seq_len(nrow(data$x)), sep = "") |
|
|
1722 |
} |
|
|
1723 |
if (is.null(data$genenames)) { |
|
|
1724 |
data$genenames <- paste("g", seq_len(nrow(data$x)), sep = "") |
|
|
1725 |
} |
|
|
1726 |
if (!all.genes) { |
|
|
1727 |
sig <- detec.slab(samr.obj, del, min.foldchange) |
|
|
1728 |
} |
|
|
1729 |
if (all.genes) { |
|
|
1730 |
p <- length(samr.obj$tt) |
|
|
1731 |
pup <- (1:p)[samr.obj$tt >= 0] |
|
|
1732 |
plo <- (1:p)[samr.obj$tt < 0] |
|
|
1733 |
sig <- list(pup = pup, plo = plo) |
|
|
1734 |
} |
|
|
1735 |
if (compute.localfdr) { |
|
|
1736 |
aa <- localfdr(samr.obj, min.foldchange) |
|
|
1737 |
if (length(sig$pup) > 0) { |
|
|
1738 |
fdr.up <- predictlocalfdr(aa$smooth.object, samr.obj$tt[sig$pup]) |
|
|
1739 |
} |
|
|
1740 |
if (length(sig$plo) > 0) { |
|
|
1741 |
fdr.lo <- predictlocalfdr(aa$smooth.object, samr.obj$tt[sig$plo]) |
|
|
1742 |
} |
|
|
1743 |
} |
|
|
1744 |
qvalues <- NULL |
|
|
1745 |
if (length(sig$pup) > 0 | length(sig$plo) > 0) { |
|
|
1746 |
qvalues <- qvalue.func(samr.obj, sig, delta.table) |
|
|
1747 |
} |
|
|
1748 |
res.up <- NULL |
|
|
1749 |
res.lo <- NULL |
|
|
1750 |
done <- FALSE |
|
|
1751 |
|
|
|
1752 |
# two class unpaired or paired (foldchange is reported) |
|
|
1753 |
if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | |
|
|
1754 |
samr.obj$resp.type == samr.const.twoclass.paired.response)) { |
|
|
1755 |
if (!is.null(sig$pup)) { |
|
|
1756 |
res.up <- cbind( |
|
|
1757 |
sig$pup + 1, data$genenames[sig$pup], |
|
|
1758 |
data$geneid[sig$pup], samr.obj$tt[sig$pup], samr.obj$numer[sig$pup], |
|
|
1759 |
samr.obj$sd[sig$pup], samr.obj$foldchange[sig$pup], |
|
|
1760 |
qvalues$qvalue.up |
|
|
1761 |
) |
|
|
1762 |
if (compute.localfdr) { |
|
|
1763 |
res.up <- cbind(res.up, fdr.up) |
|
|
1764 |
} |
|
|
1765 |
temp.names <- list(NULL, c( |
|
|
1766 |
"Row", "Gene ID", "Gene Name", |
|
|
1767 |
"Score(d)", "Numerator(r)", "Denominator(s+s0)", |
|
|
1768 |
"Fold Change", "q-value(%)" |
|
|
1769 |
)) |
|
|
1770 |
if (compute.localfdr) { |
|
|
1771 |
temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") |
|
|
1772 |
} |
|
|
1773 |
dimnames(res.up) <- temp.names |
|
|
1774 |
} |
|
|
1775 |
if (!is.null(sig$plo)) { |
|
|
1776 |
res.lo <- cbind( |
|
|
1777 |
sig$plo + 1, data$genenames[sig$plo], |
|
|
1778 |
data$geneid[sig$plo], samr.obj$tt[sig$plo], samr.obj$numer[sig$plo], |
|
|
1779 |
samr.obj$sd[sig$plo], samr.obj$foldchange[sig$plo], |
|
|
1780 |
qvalues$qvalue.lo |
|
|
1781 |
) |
|
|
1782 |
if (compute.localfdr) { |
|
|
1783 |
res.lo <- cbind(res.lo, fdr.lo) |
|
|
1784 |
} |
|
|
1785 |
temp.names <- list(NULL, c( |
|
|
1786 |
"Row", "Gene ID", "Gene Name", |
|
|
1787 |
"Score(d)", "Numerator(r)", "Denominator(s+s0)", |
|
|
1788 |
"Fold Change", "q-value(%)" |
|
|
1789 |
)) |
|
|
1790 |
if (compute.localfdr) { |
|
|
1791 |
temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") |
|
|
1792 |
} |
|
|
1793 |
dimnames(res.lo) <- temp.names |
|
|
1794 |
} |
|
|
1795 |
done <- TRUE |
|
|
1796 |
} |
|
|
1797 |
|
|
|
1798 |
# multiclass |
|
|
1799 |
if (samr.obj$resp.type == samr.const.multiclass.response) { |
|
|
1800 |
if (!is.null(sig$pup)) { |
|
|
1801 |
res.up <- cbind( |
|
|
1802 |
sig$pup + 1, data$genenames[sig$pup], |
|
|
1803 |
data$geneid[sig$pup], samr.obj$tt[sig$pup], samr.obj$numer[sig$pup], |
|
|
1804 |
samr.obj$sd[sig$pup], samr.obj$stand.contrasts[sig$pup, ], |
|
|
1805 |
qvalues$qvalue.up |
|
|
1806 |
) |
|
|
1807 |
|
|
|
1808 |
if (compute.localfdr) { |
|
|
1809 |
res.up <- cbind(res.up, fdr.up) |
|
|
1810 |
} |
|
|
1811 |
|
|
|
1812 |
collabs.contrast <- paste( |
|
|
1813 |
"contrast-", as.character(seq_len(ncol(samr.obj$stand.contrasts))), |
|
|
1814 |
sep = "" |
|
|
1815 |
) |
|
|
1816 |
temp.names <- list(NULL, c( |
|
|
1817 |
"Row", "Gene ID", "Gene Name", |
|
|
1818 |
"Score(d)", "Numerator(r)", "Denominator(s+s0)", |
|
|
1819 |
collabs.contrast, "q-value(%)" |
|
|
1820 |
)) |
|
|
1821 |
|
|
|
1822 |
if (compute.localfdr) { |
|
|
1823 |
temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") |
|
|
1824 |
} |
|
|
1825 |
dimnames(res.up) <- temp.names |
|
|
1826 |
} |
|
|
1827 |
res.lo <- NULL |
|
|
1828 |
done <- TRUE |
|
|
1829 |
} |
|
|
1830 |
|
|
|
1831 |
# all other cases |
|
|
1832 |
if (!done) { |
|
|
1833 |
if (!is.null(sig$pup)) { |
|
|
1834 |
res.up <- cbind( |
|
|
1835 |
sig$pup + 1, data$genenames[sig$pup], |
|
|
1836 |
data$geneid[sig$pup], samr.obj$tt[sig$pup], samr.obj$numer[sig$pup], |
|
|
1837 |
samr.obj$sd[sig$pup], samr.obj$foldchange[sig$pup], |
|
|
1838 |
qvalues$qvalue.up |
|
|
1839 |
) |
|
|
1840 |
if (compute.localfdr) { |
|
|
1841 |
res.up <- cbind(res.up, fdr.up) |
|
|
1842 |
} |
|
|
1843 |
temp.names <- list(NULL, c( |
|
|
1844 |
"Row", "Gene ID", "Gene Name", |
|
|
1845 |
"Score(d)", "Numerator(r)", "Denominator(s+s0)", |
|
|
1846 |
"q-value(%)" |
|
|
1847 |
)) |
|
|
1848 |
if (compute.localfdr) { |
|
|
1849 |
temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") |
|
|
1850 |
} |
|
|
1851 |
dimnames(res.up) <- temp.names |
|
|
1852 |
} |
|
|
1853 |
if (!is.null(sig$plo)) { |
|
|
1854 |
res.lo <- cbind( |
|
|
1855 |
sig$plo + 1, data$genenames[sig$plo], |
|
|
1856 |
data$geneid[sig$plo], samr.obj$tt[sig$plo], samr.obj$numer[sig$plo], |
|
|
1857 |
samr.obj$sd[sig$plo], samr.obj$foldchange[sig$plo], |
|
|
1858 |
qvalues$qvalue.lo |
|
|
1859 |
) |
|
|
1860 |
if (compute.localfdr) { |
|
|
1861 |
res.lo <- cbind(res.lo, fdr.lo) |
|
|
1862 |
} |
|
|
1863 |
temp.names <- list(NULL, c( |
|
|
1864 |
"Row", "Gene ID", "Gene Name", |
|
|
1865 |
"Score(d)", "Numerator(r)", "Denominator(s+s0)", |
|
|
1866 |
"q-value(%)" |
|
|
1867 |
)) |
|
|
1868 |
if (compute.localfdr) { |
|
|
1869 |
temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") |
|
|
1870 |
} |
|
|
1871 |
dimnames(res.lo) <- temp.names |
|
|
1872 |
} |
|
|
1873 |
done <- TRUE |
|
|
1874 |
} |
|
|
1875 |
if (!is.null(res.up)) { |
|
|
1876 |
o1 <- order(-samr.obj$tt[sig$pup]) |
|
|
1877 |
res.up <- res.up[o1, , drop = FALSE] |
|
|
1878 |
} |
|
|
1879 |
if (!is.null(res.lo)) { |
|
|
1880 |
o2 <- order(samr.obj$tt[sig$plo]) |
|
|
1881 |
res.lo <- res.lo[o2, , drop = FALSE] |
|
|
1882 |
} |
|
|
1883 |
color.ind.for.multi <- NULL |
|
|
1884 |
if ( |
|
|
1885 |
samr.obj$resp.type == samr.const.multiclass.response & !is.null(sig$pup) |
|
|
1886 |
) { |
|
|
1887 |
condition_1 <- |
|
|
1888 |
samr.obj$stand.contrasts[sig$pup, ] > samr.obj$stand.contrasts.95[2] |
|
|
1889 |
condition_2 <- |
|
|
1890 |
samr.obj$stand.contrasts[sig$pup, ] < samr.obj$stand.contrasts.95[1] |
|
|
1891 |
color.ind.for.multi <- 1 * condition_1 + (-1) * condition_2 |
|
|
1892 |
} |
|
|
1893 |
ngenes.up <- nrow(res.up) |
|
|
1894 |
if (is.null(ngenes.up)) { |
|
|
1895 |
ngenes.up <- 0 |
|
|
1896 |
} |
|
|
1897 |
ngenes.lo <- nrow(res.lo) |
|
|
1898 |
if (is.null(ngenes.lo)) { |
|
|
1899 |
ngenes.lo <- 0 |
|
|
1900 |
} |
|
|
1901 |
return( |
|
|
1902 |
list( |
|
|
1903 |
genes.up = res.up, genes.lo = res.lo, |
|
|
1904 |
color.ind.for.multi = color.ind.for.multi, ngenes.up = ngenes.up, |
|
|
1905 |
ngenes.lo = ngenes.lo |
|
|
1906 |
) |
|
|
1907 |
) |
|
|
1908 |
} |
|
|
1909 |
generate.dels <- function(samr.obj, min.foldchange = 0) { |
|
|
1910 |
dels <- NULL |
|
|
1911 |
## initialize calculation |
|
|
1912 |
tag <- order(samr.obj$tt) |
|
|
1913 |
if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | |
|
|
1914 |
samr.obj$resp.type == samr.const.twoclass.paired.response) & |
|
|
1915 |
(min.foldchange > 0)) { |
|
|
1916 |
res.mat <- data.frame( |
|
|
1917 |
tt = samr.obj$tt[tag], fc = samr.obj$foldchange[tag], |
|
|
1918 |
evo = samr.obj$evo, dif = samr.obj$tt[tag] - samr.obj$evo |
|
|
1919 |
) |
|
|
1920 |
res.up <- res.mat[res.mat$evo > 0, ] |
|
|
1921 |
res.lo <- res.mat[res.mat$evo < 0, ] |
|
|
1922 |
res.up <- res.up[res.up$fc >= min.foldchange, ] |
|
|
1923 |
res.lo <- res.lo[res.lo$fc <= 1 / min.foldchange, ] |
|
|
1924 |
} else { |
|
|
1925 |
res.mat <- data.frame( |
|
|
1926 |
tt = samr.obj$tt[tag], evo = samr.obj$evo, |
|
|
1927 |
dif = samr.obj$tt[tag] - samr.obj$evo |
|
|
1928 |
) |
|
|
1929 |
res.up <- res.mat[res.mat$evo > 0, ] |
|
|
1930 |
res.lo <- res.mat[res.mat$evo < 0, ] |
|
|
1931 |
} |
|
|
1932 |
## for the upper part |
|
|
1933 |
up.vec <- rep(NA, nrow(res.up)) |
|
|
1934 |
if (nrow(res.up) > 0) { |
|
|
1935 |
st <- 1e-08 |
|
|
1936 |
i.cur <- 1 |
|
|
1937 |
for (i in seq_len(nrow(res.up))) { |
|
|
1938 |
if (res.up$dif[i] > st) { |
|
|
1939 |
st <- res.up$dif[i] |
|
|
1940 |
up.vec[i.cur] <- st |
|
|
1941 |
i.cur <- i.cur + 1 |
|
|
1942 |
} |
|
|
1943 |
} |
|
|
1944 |
} |
|
|
1945 |
## for the lower part |
|
|
1946 |
lo.vec <- rep(NA, nrow(res.lo)) |
|
|
1947 |
if (nrow(res.lo) > 0) { |
|
|
1948 |
st <- -1e-08 |
|
|
1949 |
i.cur <- 1 |
|
|
1950 |
for (i in seq(nrow(res.lo), 1)) { |
|
|
1951 |
if (res.lo$dif[i] < st) { |
|
|
1952 |
st <- res.lo$dif[i] |
|
|
1953 |
lo.vec[i.cur] <- st |
|
|
1954 |
i.cur <- i.cur + 1 |
|
|
1955 |
} |
|
|
1956 |
} |
|
|
1957 |
} |
|
|
1958 |
## combine them |
|
|
1959 |
vec <- c(up.vec, -lo.vec) |
|
|
1960 |
vec <- vec[!is.na(vec)] |
|
|
1961 |
vec <- vec - 1e-08 |
|
|
1962 |
dels <- sort(unique(vec)) |
|
|
1963 |
return(dels) |
|
|
1964 |
} |
|
|
1965 |
samr.seq.detec.slabs <- function(samr.obj, dels, min.foldchange) { |
|
|
1966 |
## initialize calculation |
|
|
1967 |
tag <- order(samr.obj$tt) |
|
|
1968 |
if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | |
|
|
1969 |
samr.obj$resp.type == samr.const.twoclass.paired.response) & |
|
|
1970 |
(min.foldchange > 0)) { |
|
|
1971 |
res.mat <- data.frame( |
|
|
1972 |
tt = samr.obj$tt[tag], fc = samr.obj$foldchange[tag], |
|
|
1973 |
evo = samr.obj$evo, dif = samr.obj$tt[tag] - samr.obj$evo |
|
|
1974 |
) |
|
|
1975 |
res.up <- res.mat[res.mat$evo > 0, ] |
|
|
1976 |
res.lo <- res.mat[res.mat$evo < 0, ] |
|
|
1977 |
res.up <- res.up[res.up$fc >= min.foldchange, ] |
|
|
1978 |
res.lo <- res.lo[res.lo$fc <= 1 / min.foldchange, ] |
|
|
1979 |
} else { |
|
|
1980 |
res.mat <- data.frame( |
|
|
1981 |
tt = samr.obj$tt[tag], evo = samr.obj$evo, |
|
|
1982 |
dif = samr.obj$tt[tag] - samr.obj$evo |
|
|
1983 |
) |
|
|
1984 |
res.up <- res.mat[res.mat$evo > 0, ] |
|
|
1985 |
res.lo <- res.mat[res.mat$evo < 0, ] |
|
|
1986 |
} |
|
|
1987 |
## begin calculating |
|
|
1988 |
cutup <- rep(1e+10, length(dels)) |
|
|
1989 |
cutlow <- rep(-1e+10, length(dels)) |
|
|
1990 |
g2.up <- g2.lo <- rep(0, length(dels)) |
|
|
1991 |
if (nrow(res.up) > 0) { |
|
|
1992 |
res.up <- data.frame( |
|
|
1993 |
dif = res.up$dif, tt = res.up$tt, |
|
|
1994 |
num = seq(nrow(res.up), 1) |
|
|
1995 |
) |
|
|
1996 |
## get the upper part |
|
|
1997 |
j <- 1 |
|
|
1998 |
ii <- 1 |
|
|
1999 |
while (j <= nrow(res.up) & ii <= length(dels)) { |
|
|
2000 |
if (res.up$dif[j] > dels[ii]) { |
|
|
2001 |
cutup[ii] <- res.up$tt[j] |
|
|
2002 |
g2.up[ii] <- res.up$num[j] |
|
|
2003 |
ii <- ii + 1 |
|
|
2004 |
} else { |
|
|
2005 |
j <- j + 1 |
|
|
2006 |
} |
|
|
2007 |
} |
|
|
2008 |
} |
|
|
2009 |
if (nrow(res.lo) > 0) { |
|
|
2010 |
res.lo <- data.frame( |
|
|
2011 |
dif = res.lo$dif, tt = res.lo$tt, |
|
|
2012 |
num = seq_len(nrow(res.lo)) |
|
|
2013 |
) |
|
|
2014 |
## get the lower part |
|
|
2015 |
j <- nrow(res.lo) |
|
|
2016 |
ii <- 1 |
|
|
2017 |
while (j >= 1 & ii <= length(dels)) { |
|
|
2018 |
if (res.lo$dif[j] < -dels[ii]) { |
|
|
2019 |
cutlow[ii] <- res.lo$tt[j] |
|
|
2020 |
g2.lo[ii] <- res.lo$num[j] |
|
|
2021 |
ii <- ii + 1 |
|
|
2022 |
} else { |
|
|
2023 |
j <- j - 1 |
|
|
2024 |
} |
|
|
2025 |
} |
|
|
2026 |
} |
|
|
2027 |
g2 <- g2.up + g2.lo |
|
|
2028 |
return(list(cutup = cutup, cutlow = cutlow, g2 = g2)) |
|
|
2029 |
} |
|
|
2030 |
sumlengths <- function(aa) { |
|
|
2031 |
length(aa$pl) + length(aa$pu) |
|
|
2032 |
} |
|
|
2033 |
|
|
|
2034 |
samr.seq.null.err <- function( |
|
|
2035 |
samr.obj, min.foldchange, |
|
|
2036 |
cutup, cutlow) { |
|
|
2037 |
errup <- matrix(NA, ncol = length(cutup), nrow = ncol(samr.obj$ttstar0)) |
|
|
2038 |
errlow <- matrix(NA, ncol = length(cutlow), nrow = ncol(samr.obj$ttstar0)) |
|
|
2039 |
cutup.rank <- rank(cutup, ties.method = "min") |
|
|
2040 |
cutlow.rank <- rank(-cutlow, ties.method = "min") |
|
|
2041 |
for (jj in seq_len(ncol(samr.obj$ttstar0))) { |
|
|
2042 |
keep.up <- keep.dn <- samr.obj$ttstar0[, jj] |
|
|
2043 |
if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | |
|
|
2044 |
samr.obj$resp.type == samr.const.twoclass.paired.response) & |
|
|
2045 |
(min.foldchange > 0)) { |
|
|
2046 |
keep.up <- keep.up[samr.obj$foldchange.star[, jj] >= |
|
|
2047 |
min.foldchange] |
|
|
2048 |
keep.dn <- keep.dn[samr.obj$foldchange.star[, jj] <= |
|
|
2049 |
1 / min.foldchange] |
|
|
2050 |
} |
|
|
2051 |
errup[jj, ] <- length(keep.up) - (rank(c(cutup, keep.up), |
|
|
2052 |
ties.method = "min" |
|
|
2053 |
)[seq_len(length(cutup))] - cutup.rank) |
|
|
2054 |
errlow[jj, ] <- length(keep.dn) - (rank(c(-cutlow, -keep.dn), |
|
|
2055 |
ties.method = "min" |
|
|
2056 |
)[seq_len(length(cutlow))] - cutlow.rank) |
|
|
2057 |
} |
|
|
2058 |
errnum <- errup + errlow |
|
|
2059 |
return(errnum) |
|
|
2060 |
} |
|
|
2061 |
detec.slab <- function(samr.obj, del, min.foldchange) { |
|
|
2062 |
## find genes above and below the slab of half-width del |
|
|
2063 |
# this calculation is tricky- for consistency, the slab |
|
|
2064 |
# condition picks |
|
|
2065 |
# all genes that are beyond the first departure from the |
|
|
2066 |
# slab |
|
|
2067 |
# then the fold change condition is applied (if applicable) |
|
|
2068 |
n <- length(samr.obj$tt) |
|
|
2069 |
tt <- samr.obj$tt |
|
|
2070 |
evo <- samr.obj$evo |
|
|
2071 |
tag <- order(tt) |
|
|
2072 |
pup <- NULL |
|
|
2073 |
foldchange.cond.up <- rep(TRUE, length(evo)) |
|
|
2074 |
foldchange.cond.lo <- rep(TRUE, length(evo)) |
|
|
2075 |
if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > |
|
|
2076 |
0)) { |
|
|
2077 |
foldchange.cond.up <- samr.obj$foldchange >= min.foldchange |
|
|
2078 |
foldchange.cond.lo <- samr.obj$foldchange <= 1 / min.foldchange |
|
|
2079 |
} |
|
|
2080 |
o1 <- (1:n)[(tt[tag] - evo > del) & evo > 0] |
|
|
2081 |
if (length(o1) > 0) { |
|
|
2082 |
o1 <- o1[1] |
|
|
2083 |
o11 <- o1:n |
|
|
2084 |
o111 <- rep(FALSE, n) |
|
|
2085 |
o111[tag][o11] <- TRUE |
|
|
2086 |
pup <- (1:n)[o111 & foldchange.cond.up] |
|
|
2087 |
} |
|
|
2088 |
plow <- NULL |
|
|
2089 |
o2 <- (1:n)[(evo - tt[tag] > del) & evo < 0] |
|
|
2090 |
if (length(o2) > 0) { |
|
|
2091 |
o2 <- o2[length(o2)] |
|
|
2092 |
o22 <- 1:o2 |
|
|
2093 |
o222 <- rep(FALSE, n) |
|
|
2094 |
o222[tag][o22] <- TRUE |
|
|
2095 |
plow <- (1:n)[o222 & foldchange.cond.lo] |
|
|
2096 |
} |
|
|
2097 |
return(list(plow = plow, pup = pup)) |
|
|
2098 |
} |
|
|
2099 |
|
|
|
2100 |
#' @importFrom stats smooth.spline |
|
|
2101 |
localfdr <- function( |
|
|
2102 |
samr.obj, min.foldchange, perc = 0.01, |
|
|
2103 |
df = 10) { |
|
|
2104 |
## estimates compute.localfdr at score 'd', using SAM |
|
|
2105 |
# object 'samr.obj' |
|
|
2106 |
## 'd' can be a vector of d scores |
|
|
2107 |
## returns estimate of symmetric fdr as a percentage |
|
|
2108 |
# this version uses a 1% symmetric window, and does not |
|
|
2109 |
# estimate fdr in |
|
|
2110 |
# windows having fewer than 100 genes |
|
|
2111 |
## to use: first run samr and then pass the resulting fit |
|
|
2112 |
# object to |
|
|
2113 |
## localfdr |
|
|
2114 |
## NOTE: at most 20 of the perms are used to estimate the |
|
|
2115 |
# fdr (for speed sake) |
|
|
2116 |
# I try two window shapes: symmetric and an assymetric one |
|
|
2117 |
# currently I use the symmetric window to estimate the |
|
|
2118 |
# compute.localfdr |
|
|
2119 |
ngenes <- length(samr.obj$tt) |
|
|
2120 |
mingenes <- 50 |
|
|
2121 |
# perc is increased, in order to get at least mingenes in a |
|
|
2122 |
# window |
|
|
2123 |
perc <- max(perc, mingenes / length(samr.obj$tt)) |
|
|
2124 |
nperms.to.use <- min(20, ncol(samr.obj$ttstar)) |
|
|
2125 |
nperms <- ncol(samr.obj$ttstar) |
|
|
2126 |
d <- seq(sort(samr.obj$tt)[1], sort(samr.obj$tt)[ngenes], |
|
|
2127 |
length = 100 |
|
|
2128 |
) |
|
|
2129 |
ndscore <- length(d) |
|
|
2130 |
dvector <- rep(NA, ndscore) |
|
|
2131 |
ind.foldchange <- rep(TRUE, length(samr.obj$tt)) |
|
|
2132 |
if (!is.null(samr.obj$foldchange[1]) & min.foldchange > 0) { |
|
|
2133 |
ind.foldchange <- (samr.obj$foldchange >= min.foldchange) | |
|
|
2134 |
(samr.obj$foldchange <= min.foldchange) |
|
|
2135 |
} |
|
|
2136 |
fdr.temp <- function(temp, dlow, dup, pi0, ind.foldchange) { |
|
|
2137 |
return(sum(pi0 * (temp >= dlow & temp <= dup & ind.foldchange))) |
|
|
2138 |
} |
|
|
2139 |
for (i in 1:ndscore) { |
|
|
2140 |
pi0 <- samr.obj$pi0 |
|
|
2141 |
r <- sum(samr.obj$tt < d[i]) |
|
|
2142 |
r22 <- round(max(r - length(samr.obj$tt) * perc / 2, 1)) |
|
|
2143 |
dlow.sym <- sort(samr.obj$tt)[r22] |
|
|
2144 |
r22 <- min(r + length(samr.obj$tt) * perc / 2, length(samr.obj$tt)) |
|
|
2145 |
dup.sym <- sort(samr.obj$tt)[r22] |
|
|
2146 |
oo <- samr.obj$tt >= dlow.sym & samr.obj$tt <= dup.sym & |
|
|
2147 |
ind.foldchange |
|
|
2148 |
if (!is.null(samr.obj$foldchange[1]) & min.foldchange > |
|
|
2149 |
0) { |
|
|
2150 |
temp <- as.vector(samr.obj$foldchange.star[, 1:nperms.to.use]) |
|
|
2151 |
ind.foldchange <- (temp >= min.foldchange) | (temp <= |
|
|
2152 |
min.foldchange) |
|
|
2153 |
} |
|
|
2154 |
temp <- samr.obj$ttstar0[, sample(1:nperms, size = nperms.to.use)] |
|
|
2155 |
fdr.sym <- median(apply( |
|
|
2156 |
temp, 2, fdr.temp, dlow.sym, |
|
|
2157 |
dup.sym, pi0, ind.foldchange |
|
|
2158 |
)) |
|
|
2159 |
fdr.sym <- 100 * fdr.sym / sum(oo) |
|
|
2160 |
dlow.sym <- dlow.sym |
|
|
2161 |
dup.sym <- dup.sym |
|
|
2162 |
dvector[i] <- fdr.sym |
|
|
2163 |
} |
|
|
2164 |
om <- !is.na(dvector) & (dvector != Inf) |
|
|
2165 |
aa <- smooth.spline(d[om], dvector[om], df = df) |
|
|
2166 |
return(list(smooth.object = aa, perc = perc, df = df)) |
|
|
2167 |
} |
|
|
2168 |
|
|
|
2169 |
predictlocalfdr <- function(smooth.object, d) { |
|
|
2170 |
yhat <- predict(smooth.object, d)$y |
|
|
2171 |
yhat <- pmin(yhat, 100) |
|
|
2172 |
yhat <- pmax(yhat, 0) |
|
|
2173 |
return(yhat) |
|
|
2174 |
} |
|
|
2175 |
|
|
|
2176 |
qvalue.func <- function(samr.obj, sig, delta.table) { |
|
|
2177 |
# returns q-value as a percentage (out of 100) |
|
|
2178 |
LARGE <- 1e+10 |
|
|
2179 |
qvalue.up <- rep(NA, length(sig$pup)) |
|
|
2180 |
o1 <- sig$pup |
|
|
2181 |
cutup <- delta.table[, 8] |
|
|
2182 |
FDR <- delta.table[, 5] |
|
|
2183 |
ii <- 0 |
|
|
2184 |
for (i in o1) { |
|
|
2185 |
o <- abs(cutup - samr.obj$tt[i]) |
|
|
2186 |
o[is.na(o)] <- LARGE |
|
|
2187 |
oo <- seq_len(length(o))[o == min(o)] |
|
|
2188 |
oo <- oo[length(oo)] |
|
|
2189 |
ii <- ii + 1 |
|
|
2190 |
qvalue.up[ii] <- FDR[oo] |
|
|
2191 |
} |
|
|
2192 |
qvalue.lo <- rep(NA, length(sig$plo)) |
|
|
2193 |
o2 <- sig$plo |
|
|
2194 |
cutlo <- delta.table[, 7] |
|
|
2195 |
ii <- 0 |
|
|
2196 |
for (i in o2) { |
|
|
2197 |
o <- abs(cutlo - samr.obj$tt[i]) |
|
|
2198 |
o[is.na(o)] <- LARGE |
|
|
2199 |
oo <- seq_len(length(o))[o == min(o)] |
|
|
2200 |
oo <- oo[length(oo)] |
|
|
2201 |
ii <- ii + 1 |
|
|
2202 |
qvalue.lo[ii] <- FDR[oo] |
|
|
2203 |
} |
|
|
2204 |
# any qvalues that are missing, are set to 1 (the highest |
|
|
2205 |
# value) |
|
|
2206 |
qvalue.lo[is.na(qvalue.lo)] <- 1 |
|
|
2207 |
qvalue.up[is.na(qvalue.up)] <- 1 |
|
|
2208 |
# ensure that each qvalue vector is monotone non-increasing |
|
|
2209 |
o1 <- order(samr.obj$tt[sig$plo]) |
|
|
2210 |
qv1 <- qvalue.lo[o1] |
|
|
2211 |
qv11 <- qv1 |
|
|
2212 |
if (length(qv1) > 1) { |
|
|
2213 |
for (i in 2:length(qv1)) { |
|
|
2214 |
if (qv11[i] < qv11[i - 1]) { |
|
|
2215 |
qv11[i] <- qv11[i - 1] |
|
|
2216 |
} |
|
|
2217 |
} |
|
|
2218 |
qv111 <- qv11 |
|
|
2219 |
qv111[o1] <- qv11 |
|
|
2220 |
} else { |
|
|
2221 |
qv111 <- qv1 |
|
|
2222 |
} |
|
|
2223 |
o2 <- order(samr.obj$tt[sig$pup]) |
|
|
2224 |
qv2 <- qvalue.up[o2] |
|
|
2225 |
qv22 <- qv2 |
|
|
2226 |
if (length(qv2) > 1) { |
|
|
2227 |
for (i in 2:length(qv2)) { |
|
|
2228 |
if (qv22[i] > qv22[i - 1]) { |
|
|
2229 |
qv22[i] <- qv22[i - 1] |
|
|
2230 |
} |
|
|
2231 |
} |
|
|
2232 |
qv222 <- qv22 |
|
|
2233 |
qv222[o2] <- qv22 |
|
|
2234 |
} else { |
|
|
2235 |
qv222 <- qv2 |
|
|
2236 |
} |
|
|
2237 |
return(list(qvalue.lo = 100 * qv111, qvalue.up = 100 * qv222)) |
|
|
2238 |
} |