--- a +++ b/R/internal-functions-samr-adapted.R @@ -0,0 +1,2238 @@ +# This script contains customized versions of functions found in the samr +# package. This is necessary because samr seems to have been abandoned, so an +# upstream collaboration doesn't seem possible at the time of writing. +# ATTENTION: The source code in this file is licensed under LGPL-3. + +# ============================================================================== +# Constants +# ============================================================================== +samr.const.twoclass.unpaired.response <- "Two class unpaired" +samr.const.twoclass.paired.response <- "Two class paired" +samr.const.oneclass.response <- "One class" +samr.const.quantitative.response <- "Quantitative" +samr.const.multiclass.response <- "Multiclass" +samr.const.twoclass.unpaired.timecourse.response <- + "Two class unpaired timecourse" +samr.const.twoclass.paired.timecourse.response <- "Two class paired timecourse" +samr.const.oneclass.timecourse.response <- "One class timecourse" +samr.const.survival.response <- "Survival" +samr.const.patterndiscovery.response <- "Pattern discovery" + +# ============================================================================== +# Functions +# ============================================================================== + +#' @title Significance analysis of microarrays +#' @description This function is an adaptation of `samr::samr` +#' @param data Data object with components x- p by n matrix of features, one +#' observation per column (missing values allowed); y- n-vector of outcome +#' measurements; censoring.status- n-vector of censoring censoring.status +#' (1= died or event occurred, 0=survived, or event was censored), needed for a +#' censored survival outcome +#' @param resp.type Problem type: "Quantitative" for a continuous parameter +#' (Available for both array and sequencing data); "Two class unpaired" (for +#' both array and sequencing data); "Survival" for censored survival outcome +#' (for both array and sequencingdata); "Multiclass": more than 2 groups (for +#' both array and sequencing data); "One class" for a single group (only for +#' array data); "Two class paired" for two classes with paired observations +#' (for both array and sequencing data); "Two class unpaired timecourse" (only +#' for array data), "One class time course" (only for array data), +#' "Two class.paired timecourse" (only for array data), or "Pattern discovery" +#' (only for array data) +#' @param assay.type Assay type: "array" for microarray data, "seq" for counts +#' from sequencing +#' @param s0 Exchangeability factor for denominator of test statistic; Default +#' is automatic choice. Only used for array data. +#' @param s0.perc Percentile of standard deviation values to use for s0; default +#' is automatic choice; -1 means s0=0 (different from s0.perc=0, meaning +#' s0=zeroeth percentile of standard deviation values= min of sd values. +#' Only used for array data. +#' @param nperms Number of permutations used to estimate false discovery rates +#' @param center.arrays Should the data for each sample (array) be median +#' centered at the outset? Default =FALSE. Only used for array data. +#' @param testStatistic Test statistic to use in two class unpaired case.Either +#' "standard" (t-statistic) or ,"wilcoxon" (Two-sample wilcoxon or Mann-Whitney +#' test). Only used for array data. +#' @param time.summary.type Summary measure for each time course: "slope", or +#' "signed.area"). Only used for array data. +#' @param regression.method Regression method for quantitative case: "standard", +#' (linear least squares) or "ranks" (linear least squares on ranked data). +#' Only used for array data. +#' @param return.x Should the matrix of feature values be returned? Only useful +#' for time course data, where x contains summaries of the features over time. +#' Otherwise x is the same as the input data +#' @param knn.neighbors Number of nearest neighbors to use for imputation of +#' missing features values. Only used for array data. +#' @param random.seed Optional initial seed for random number generator +#' (integer) +#' @param nresamp For assay.type="seq", number of resamples used to construct +#' test statistic. Default 20. Only used for sequencing data. +#' @param nresamp.perm For assay.type="seq", number of resamples used to +#' construct test statistic for permutations. Default is equal to nresamp and it +#' must be at most nresamp. Only used for sequencing data. +#' @param xl.mode Used by Excel interface +#' @param xl.time Used by Excel interface +#' @param xl.prevfit Used by Excel interface +#' @importFrom impute impute.knn +sammy <- function( + data, resp.type = c( + "Quantitative", "Two class unpaired", + "Survival", "Multiclass", "One class", "Two class paired", + "Two class unpaired timecourse", "One class timecourse", + "Two class paired timecourse", "Pattern discovery" + ), assay.type = c( + "array", + "seq" + ), s0 = NULL, s0.perc = NULL, nperms = 100, center.arrays = FALSE, + testStatistic = c("standard", "wilcoxon"), time.summary.type = c( + "slope", + "signed.area" + ), regression.method = c("standard", "ranks"), + return.x = FALSE, knn.neighbors = 10, random.seed = NULL, + nresamp = 20, nresamp.perm = NULL, xl.mode = c( + "regular", + "firsttime", "next20", "lasttime" + ), xl.time = NULL, xl.prevfit = NULL) { + this.call <- match.call() + resp.type.arg <- match.arg(resp.type) + assay.type <- match.arg(assay.type) + xl.mode <- match.arg(xl.mode) + set.seed(random.seed) + if (is.null(nresamp.perm)) nresamp.perm <- nresamp + nresamp.perm <- min(nresamp, nresamp.perm) + if (xl.mode == "regular" | xl.mode == "firsttime") { + x <- NULL + xresamp <- NULL + ttstar0 <- NULL + evo <- NULL + ystar <- NULL + sdstar.keep <- NULL + censoring.status <- NULL + pi0 <- NULL + stand.contrasts <- NULL + stand.contrasts.star <- NULL + stand.contrasts.95 <- NULL + foldchange <- NULL + foldchange.star <- NULL + perms <- NULL + permsy <- NULL + eigengene <- NULL + eigengene.number <- NULL + testStatistic <- match.arg(testStatistic) + time.summary.type <- match.arg(time.summary.type) + regression.method <- match.arg(regression.method) + x <- data$x + y <- data$y + argy <- y + if (!is.null(data$eigengene.number)) { + eigengene.number <- data$eigengene.number + } + if (sum(is.na(x)) > 0) { + x <- impute.knn(x, k = knn.neighbors) + if (!is.matrix(x)) { + x <- x$data + } + } + are.blocks.specified <- FALSE + cond <- resp.type %in% c( + "One class", "Two class unpaired timecourse", + "One class unpaired timecourse", "Two class paired timecourse", + "Pattern discovery" + ) + if (assay.type == "seq" & cond) { + stop(paste("Resp.type=", resp.type, " not allowed when assay.type='seq'")) + } + if (assay.type == "seq" & min(x) < 0) { + stop(paste("Negative values not allowed when assay.type='seq'")) + } + if (assay.type == "seq" & (sum(x %% 1 != 0) != 0)) { + stop("Non-integer values not alled when assay.type='seq'") + } + if (assay.type == "seq" & center.arrays) { + stop(paste("Centering not allowed when assay.type='seq'")) + } + if (assay.type == "seq" & regression.method == "ranks") { + stop(paste("regression.method==ranks not allowed when assay.type='seq'")) + } + if (center.arrays) { + x <- scale(x, center = apply(x, 2, median), scale = FALSE) + } + depth <- rep(NA, ncol(x)) + if (assay.type == "seq") { + message("Estimating sequencing depths...") + depth <- samr.estimate.depth(x) + message("Resampling to get new data matrices...") + xresamp <- resa(x, depth, nresamp = nresamp) + } + if (resp.type == samr.const.twoclass.unpaired.response) { + if (substring(y[1], 2, 6) == "Block" | substring( + y[1], + 2, 6 + ) == "block") { + junk <- parse.block.labels.for.2classes(y) + y <- junk$y + blocky <- junk$blocky + are.blocks.specified <- TRUE + } + } + if (resp.type == samr.const.twoclass.unpaired.response | + resp.type == samr.const.twoclass.paired.response | + resp.type == samr.const.oneclass.response | + resp.type == samr.const.quantitative.response | + resp.type == samr.const.multiclass.response + ) { + y <- as.numeric(y) + } + sd.internal <- NULL + if (resp.type == samr.const.twoclass.unpaired.timecourse.response | + resp.type == samr.const.twoclass.paired.timecourse.response | + resp.type == samr.const.oneclass.timecourse.response) { + junk <- parse.time.labels.and.summarize.data( + x, y, + resp.type, time.summary.type + ) + y <- junk$y + x <- junk$x + sd.internal <- sqrt(rowMeans(junk$sd^2)) + if (min(table(y)) == 1) { + warning( + "Only one timecourse in one or more classes;\n", + "SAM plot and FDRs will be unreliable;", + "only gene scores are informative" + ) + } + } + if (resp.type == samr.const.twoclass.unpaired.timecourse.response) { + resp.type <- samr.const.twoclass.unpaired.response + } + if (resp.type == samr.const.twoclass.paired.timecourse.response) { + resp.type <- samr.const.twoclass.paired.response + } + if (resp.type == samr.const.oneclass.timecourse.response) { + resp.type <- samr.const.oneclass.response + } + stand.contrasts <- NULL + stand.contrasts.95 <- NULL + if (resp.type == samr.const.survival.response) { + censoring.status <- data$censoring.status + } + check.format( + y, + resp.type = resp.type, censoring.status = censoring.status + ) + if (resp.type == samr.const.quantitative.response & regression.method == + "ranks") { + y <- rank(y) + x <- t(apply(x, 1, rank)) + } + n <- nrow(x) + sd <- NULL + numer <- NULL + if (resp.type == samr.const.twoclass.unpaired.response & + testStatistic == "standard" & assay.type == "array") { + init.fit <- ttest.func(x, y, sd = sd.internal) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.twoclass.unpaired.response & + testStatistic == "wilcoxon" & assay.type == "array") { + init.fit <- wilcoxon.func(x, y) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.oneclass.response & assay.type == + "array") { + init.fit <- onesample.ttest.func(x, y, sd = sd.internal) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.twoclass.paired.response & + assay.type == "array") { + init.fit <- paired.ttest.func(x, y, sd = sd.internal) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.survival.response & assay.type == + "array") { + init.fit <- cox.func(x, y, censoring.status) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.multiclass.response & assay.type == + "array") { + init.fit <- multiclass.func(x, y) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.quantitative.response & assay.type == + "array") { + init.fit <- quantitative.func(x, y) + numer <- init.fit$numer + sd <- init.fit$sd + } + if (resp.type == samr.const.patterndiscovery.response & + assay.type == "array") { + init.fit <- patterndiscovery.func(x) + numer <- init.fit$numer + sd <- init.fit$sd + } + if ((resp.type == samr.const.quantitative.response & + (testStatistic == "wilcoxon" | regression.method == + "ranks" & assay.type == "array") | resp.type == + samr.const.patterndiscovery.response) | resp.type == + samr.const.twoclass.unpaired.response & assay.type == + "array" & testStatistic == "wilcoxon" | (nrow(x) < + 500) & is.null(s0) & is.null(s0.perc)) { + s0 <- quantile(sd, 0.05) + s0.perc <- 0.05 + } + if (is.null(s0) & assay.type == "array") { + if (!is.null(s0.perc)) { + if ((s0.perc != -1 & s0.perc < 0) | s0.perc > + 100) { + stop( + "Illegal value for s0.perc: must be between 0 and 100, ", + "or equal\nto (-1) (meaning that s0 should be set to zero)" + ) + } + if (s0.perc == -1) { + s0 <- 0 + } + if (s0.perc >= 0) { + s0 <- quantile(init.fit$sd, s0.perc / 100) + } + } + if (is.null(s0.perc)) { + s0 <- est.s0(init.fit$tt, init.fit$sd)$s0.hat + s0.perc <- 100 * sum(init.fit$sd < s0) / length(init.fit$sd) + } + } + if (assay.type == "seq") { + s0 <- 0 + s0.perc <- 0 + } + if (resp.type == samr.const.twoclass.unpaired.response & + testStatistic == "standard" & assay.type == "array") { + tt <- ttest.func(x, y, s0 = s0, sd = sd.internal)$tt + } + if (resp.type == samr.const.twoclass.unpaired.response & + testStatistic == "wilcoxon" & assay.type == "array") { + tt <- wilcoxon.func(x, y, s0 = s0)$tt + } + if (resp.type == samr.const.oneclass.response & assay.type == + "array") { + tt <- onesample.ttest.func(x, y, s0 = s0, sd = sd.internal)$tt + } + if (resp.type == samr.const.twoclass.paired.response & + assay.type == "array") { + tt <- paired.ttest.func(x, y, s0 = s0, sd = sd.internal)$tt + } + if (resp.type == samr.const.survival.response & assay.type == + "array") { + tt <- cox.func(x, y, censoring.status, s0 = s0)$tt + } + if (resp.type == samr.const.multiclass.response & assay.type == + "array") { + junk2 <- multiclass.func(x, y, s0 = s0) + tt <- junk2$tt + stand.contrasts <- junk2$stand.contrasts + } + if (resp.type == samr.const.quantitative.response & assay.type == + "array") { + tt <- quantitative.func(x, y, s0 = s0)$tt + } + if (resp.type == samr.const.patterndiscovery.response & + assay.type == "array") { + junk <- patterndiscovery.func( + x, s0 = s0, eigengene.number = eigengene.number + ) + tt <- junk$tt + eigengene <- junk$eigengene + } + if (resp.type == samr.const.twoclass.unpaired.response & + assay.type == "seq") { + junk <- wilcoxon.unpaired.seq.func(xresamp, y) + tt <- junk$tt + numer <- junk$numer + sd <- junk$sd + } + if (resp.type == samr.const.twoclass.paired.response & + assay.type == "seq") { + junk <- wilcoxon.paired.seq.func(xresamp, y) + tt <- junk$tt + numer <- junk$numer + sd <- junk$sd + } + if (resp.type == samr.const.quantitative.response & assay.type == + "seq") { + junk <- quantitative.seq.func(xresamp, y) + tt <- junk$tt + numer <- junk$numer + sd <- junk$sd + } + if (resp.type == samr.const.survival.response & assay.type == + "seq") { + junk <- cox.seq.func(xresamp, y, censoring.status) + tt <- junk$tt + numer <- junk$numer + sd <- junk$sd + } + if (resp.type == samr.const.multiclass.response & assay.type == + "seq") { + junk2 <- multiclass.seq.func(xresamp, y) + tt <- junk2$tt + numer <- junk2$numer + sd <- junk2$sd + stand.contrasts <- junk2$stand.contrasts + } + if ( + resp.type == samr.const.quantitative.response | + resp.type == samr.const.multiclass.response | + resp.type == samr.const.survival.response + ) { + junk <- getperms(y, nperms) + perms <- junk$perms + all.perms.flag <- junk$all.perms.flag + nperms.act <- junk$nperms.act + } + if (resp.type == samr.const.twoclass.unpaired.response) { + if (are.blocks.specified) { + junk <- compute.block.perms(y, blocky, nperms) + permsy <- matrix(junk$permsy, ncol = length(y)) + all.perms.flag <- junk$all.perms.flag + nperms.act <- junk$nperms.act + } else { + junk <- getperms(y, nperms) + permsy <- matrix(y[junk$perms], ncol = length(y)) + all.perms.flag <- junk$all.perms.flag + nperms.act <- junk$nperms.act + } + } + if (resp.type == samr.const.oneclass.response) { + if ((length(y) * log(2)) < log(nperms)) { + allii <- 0:((2^length(y)) - 1) + nperms.act <- 2^length(y) + all.perms.flag <- 1 + } else { + nperms.act <- nperms + all.perms.flag <- 0 + } + permsy <- matrix(NA, nrow = nperms.act, ncol = length(y)) + if (all.perms.flag == 1) { + k <- 0 + for (i in allii) { + junk <- integer.base.b(i, b = 2) + if (length(junk) < length(y)) { + junk <- c( + rep(0, length(y) - length(junk)), + junk + ) + } + k <- k + 1 + permsy[k, ] <- y * (2 * junk - 1) + } + } else { + for (i in 1:nperms.act) { + permsy[i, ] <- sample(c(-1, 1), + size = length(y), + replace = TRUE + ) + } + } + } + if (resp.type == samr.const.twoclass.paired.response) { + junk <- compute.block.perms(y, abs(y), nperms) + permsy <- junk$permsy + all.perms.flag <- junk$all.perms.flag + nperms.act <- junk$nperms.act + } + if (resp.type == samr.const.patterndiscovery.response) { + nperms.act <- nperms + perms <- NULL + permsy <- NULL + all.perms.flag <- FALSE + } + sdstar.keep <- NULL + if (assay.type != "seq") { + sdstar.keep <- matrix(0, ncol = nperms.act, nrow = nrow(x)) + } + ttstar <- matrix(0, nrow = nrow(x), ncol = nperms.act) + foldchange.star <- NULL + if (resp.type == samr.const.twoclass.unpaired.response | + resp.type == samr.const.twoclass.paired.response) { + foldchange.star <- matrix(0, nrow = nrow(x), ncol = nperms.act) + } + if (resp.type == samr.const.multiclass.response) { + stand.contrasts.star <- array(NA, c( + nrow(x), length(table(y)), + nperms.act + )) + } + } + if (xl.mode == "next20" | xl.mode == "lasttime") { + evo <- xl.prevfit$evo + tt <- xl.prevfit$tt + numer <- xl.prevfit$numer + eigengene <- xl.prevfit$eigengene + eigengene.number <- xl.prevfit$eigengene.number + sd <- xl.prevfit$sd - xl.prevfit$s0 + sd.internal <- xl.prevfit$sd.internal + ttstar <- xl.prevfit$ttstar + ttstar0 <- xl.prevfit$ttstar0 + n <- xl.prevfit$n + pi0 <- xl.prevfit$pi0 + foldchange <- xl.prevfit$foldchange + y <- xl.prevfit$y + x <- xl.prevfit$x + xresamp <- xl.prevfit$xresamp + censoring.status <- xl.prevfit$censoring.status + argy <- xl.prevfit$argy + testStatistic <- xl.prevfit$testStatistic + foldchange.star <- xl.prevfit$foldchange.star + s0 <- xl.prevfit$s0 + s0.perc <- xl.prevfit$s0.perc + resp.type <- xl.prevfit$resp.type + resp.type.arg <- xl.prevfit$resp.type.arg + assay.type <- xl.prevfit$assay.type + sdstar.keep <- xl.prevfit$sdstar.keep + resp.type <- xl.prevfit$resp.type + stand.contrasts <- xl.prevfit$stand.contrasts + stand.contrasts.star <- xl.prevfit$stand.contrasts.star + stand.contrasts.95 <- xl.prevfit$stand.contrasts.95 + perms <- xl.prevfit$perms + permsy <- xl.prevfit$permsy + nperms <- xl.prevfit$nperms + nperms.act <- xl.prevfit$nperms.act + all.perms.flag <- xl.prevfit$all.perms.flag + depth <- xl.prevfit$depth + nresamp <- xl.prevfit$nresamp + nresamp.perm <- xl.prevfit$nresamp.perm + } + if (xl.mode == "regular") { + first <- 1 + last <- nperms.act + } + if (xl.mode == "firsttime") { + first <- 1 + last <- 1 + } + if (xl.mode == "next20") { + first <- xl.time + last <- min(xl.time + 19, nperms.act - 1) + } + if (xl.mode == "lasttime") { + first <- nperms.act + last <- nperms.act + } + for (b in first:last) { + message(c("perm = ", b)) + if (assay.type == "array") { + xstar <- x + } + if (assay.type == "seq") { + xstar <- xresamp[, , 1:nresamp.perm] + } + if (resp.type == samr.const.oneclass.response) { + ystar <- permsy[b, ] + if (testStatistic == "standard") { + ttstar[, b] <- onesample.ttest.func(xstar, ystar, + s0 = s0, sd = sd.internal + )$tt + } + } + if (resp.type == samr.const.twoclass.paired.response) { + ystar <- permsy[b, ] + if (assay.type == "array") { + ttstar[, b] <- paired.ttest.func(xstar, ystar, + s0 = s0, sd = sd.internal + )$tt + foldchange.star[, b] <- foldchange.paired( + xstar, + ystar, data$logged2 + ) + } + if (assay.type == "seq") { + ttstar[, b] <- wilcoxon.paired.seq.func( + xstar, + ystar + )$tt + foldchange.star[, b] <- foldchange.seq.twoclass.paired( + x, + ystar, depth + ) + } + } + if (resp.type == samr.const.twoclass.unpaired.response) { + ystar <- permsy[b, ] + if (assay.type == "array") { + if (testStatistic == "standard") { + junk <- ttest.func(xstar, ystar, s0 = s0, sd = sd.internal) + } + if (testStatistic == "wilcoxon") { + junk <- wilcoxon.func(xstar, ystar, s0 = s0) + } + ttstar[, b] <- junk$tt + sdstar.keep[, b] <- junk$sd + foldchange.star[, b] <- foldchange.twoclass( + xstar, + ystar, data$logged2 + ) + } + if (assay.type == "seq") { + ttstar[, b] <- wilcoxon.unpaired.seq.func( + xstar, + ystar + )$tt + foldchange.star[, b] <- foldchange.seq.twoclass.unpaired( + x, + ystar, depth + ) + } + } + if (resp.type == samr.const.survival.response) { + o <- perms[b, ] + if (assay.type == "array") { + ttstar[, b] <- cox.func( + xstar, y[o], + censoring.status = censoring.status[o], s0 = s0 + )$tt + } + if (assay.type == "seq") { + ttstar[, b] <- cox.seq.func( + xstar, y[o], + censoring.status = censoring.status[o] + )$tt + } + } + if (resp.type == samr.const.multiclass.response) { + ystar <- y[perms[b, ]] + if (assay.type == "array") { + junk <- multiclass.func(xstar, ystar, s0 = s0) + ttstar[, b] <- junk$tt + sdstar.keep[, b] <- junk$sd + stand.contrasts.star[, , b] <- junk$stand.contrasts + } + if (assay.type == "seq") { + junk <- multiclass.seq.func(xstar, ystar) + ttstar[, b] <- junk$tt + stand.contrasts.star[, , b] <- junk$stand.contrasts + } + } + if (resp.type == samr.const.quantitative.response) { + ystar <- y[perms[b, ]] + if (assay.type == "array") { + junk <- quantitative.func(xstar, ystar, s0 = s0) + ttstar[, b] <- junk$tt + sdstar.keep[, b] <- junk$sd + } + if (assay.type == "seq") { + junk <- quantitative.seq.func(xstar, ystar) + ttstar[, b] <- junk$tt + } + } + if (resp.type == samr.const.patterndiscovery.response) { + xstar <- permute.rows(x) + junk <- patterndiscovery.func( + xstar, + s0 = s0, eigengene.number = eigengene.number + ) + ttstar[, b] <- junk$tt + sdstar.keep[, b] <- junk$sd + } + } + if (xl.mode == "regular" | xl.mode == "lasttime") { + ttstar0 <- ttstar + for (j in seq_len(ncol(ttstar))) { + ttstar[, j] <- -1 * sort(-1 * ttstar[, j]) + } + for (i in seq_len(nrow(ttstar))) { + ttstar[i, ] <- sort(ttstar[i, ]) + } + evo <- apply(ttstar, 1, mean) + evo <- evo[seq(length(evo), 1)] + pi0 <- 1 + if (resp.type != samr.const.multiclass.response) { + qq <- quantile(ttstar, c(0.25, 0.75)) + } + if (resp.type == samr.const.multiclass.response) { + qq <- quantile(ttstar, c(0, 0.5)) + } + pi0 <- sum(tt > qq[1] & tt < qq[2]) / (0.5 * length(tt)) + foldchange <- NULL + if (resp.type == samr.const.twoclass.unpaired.response & + assay.type == "array") { + foldchange <- foldchange.twoclass(x, y, data$logged2) + } + if (resp.type == samr.const.twoclass.paired.response & + assay.type == "array") { + foldchange <- foldchange.paired(x, y, data$logged2) + } + if (resp.type == samr.const.oneclass.response & assay.type == + "array") { + } + stand.contrasts.95 <- NULL + if (resp.type == samr.const.multiclass.response) { + stand.contrasts.95 <- quantile( + stand.contrasts.star, + c(0.025, 0.975) + ) + } + if (resp.type == samr.const.twoclass.unpaired.response & + assay.type == "seq") { + foldchange <- foldchange.seq.twoclass.unpaired( + x, + y, depth + ) + } + if (resp.type == samr.const.twoclass.paired.response & + assay.type == "seq") { + foldchange <- foldchange.seq.twoclass.paired( + x, y, + depth + ) + } + if (return.x == FALSE) { + x <- NULL + } + } + return( + list( + n = n, x = x, xresamp = xresamp, y = y, argy = argy, + censoring.status = censoring.status, testStatistic = testStatistic, + nperms = nperms, nperms.act = nperms.act, tt = tt, numer = numer, + sd = sd + s0, sd.internal = sd.internal, s0 = s0, s0.perc = s0.perc, + evo = evo, perms = perms, permsy = permsy, nresamp = nresamp, + nresamp.perm = nresamp.perm, all.perms.flag = all.perms.flag, + ttstar = ttstar, ttstar0 = ttstar0, eigengene = eigengene, + eigengene.number = eigengene.number, pi0 = pi0, foldchange = foldchange, + foldchange.star = foldchange.star, sdstar.keep = sdstar.keep, + resp.type = resp.type, resp.type.arg = resp.type.arg, + assay.type = assay.type, stand.contrasts = stand.contrasts, + stand.contrasts.star = stand.contrasts.star, + stand.contrasts.95 = stand.contrasts.95, + depth = depth, call = this.call + ) + ) +} + +#' @title Estimate sequencing depths +#' @param x data matrix. nrow=#gene, ncol=#sample +#' @return depth: estimated sequencing depth. a vector with len sample. +samr.estimate.depth <- function(x) { + iter <- 5 + cmeans <- colSums(x) / sum(x) + for (i in 1:iter) { + n0 <- rowSums(x) %*% t(cmeans) + prop <- rowSums((x - n0)^2 / (n0 + 1e-08)) + qs <- quantile(prop, c(0.25, 0.75)) + keep <- (prop >= qs[1]) & (prop <= qs[2]) + cmeans <- colMeans(x[keep, ]) + cmeans <- cmeans / sum(cmeans) + } + depth <- cmeans / mean(cmeans) + return(depth) +} + +#' @title Resampling +#' @param x data matrix. nrow=#gene, ncol=#sample +#' @param d estimated sequencing depth +#' @param nresamp number of resamplings +#' @return xresamp: an rank array with dim #gene*#sample*nresamp +#' @description Corresponds to `samr::resample` +#' @importFrom stats rpois runif +resa <- function(x, d, nresamp = 20) { + ng <- nrow(x) + ns <- ncol(x) + dbar <- exp(mean(log(d))) + xresamp <- array(0, dim = c(ng, ns, nresamp)) + for (k in 1:nresamp) { + for (j in 1:ns) { + xresamp[, j, k] <- rpois(n = ng, lambda = (dbar / d[j]) * + x[, j]) + runif(ng) * 0.1 + } + } + for (k in 1:nresamp) { + xresamp[, , k] <- t(rankcols(t(xresamp[, , k]))) + } + return(xresamp) +} + +#' @title Rank columns +#' @description Ranks the elements within each col of the matrix x and returns +#' these ranks in a matrix +#' @note this function is equivalent to `samr::rankcol`, but uses `apply` to +#' rank the colums instead of a compiled Fortran function which was causing our +#' DEGanalysis functions to freeze in large datasets. +#' @param x x +rankcols <- function(x) { + # ranks the elements within each col of the matrix x + # and returns these ranks in a matrix + n <- nrow(x) + p <- ncol(x) + mode(n) <- "integer" + mode(p) <- "integer" + mode(x) <- "double" + xr <- apply(x, 2, rank) + return(xr) +} + +#' @title Check format +#' @param y y +#' @param resp.type resp type +#' @param censoring.status censoring status +check.format <- function(y, resp.type, censoring.status = NULL) { + # here i do some format checks for the input data$y + # note that checks for time course data are done in the + # parse function for time course; + # we then check the output from the parser in this function + if (resp.type == samr.const.twoclass.unpaired.response | + resp.type == samr.const.twoclass.unpaired.timecourse.response) { + if (sum(y == 1) + sum(y == 2) != length(y)) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; values must be 1 or 2" + )) + } + } + if (resp.type == samr.const.twoclass.paired.response | resp.type == + samr.const.twoclass.paired.timecourse.response) { + if (sum(y) != 0) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; values must be -1, 1, -2, 2, etc" + )) + } + if (sum(table(y[y > 0]) != abs(table(y[y < 0])))) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; values must be -1, 1, -2, 2, etc" + )) + } + } + if (resp.type == samr.const.oneclass.response | resp.type == + samr.const.oneclass.timecourse.response) { + if (sum(y == 1) != length(y)) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; values must all be 1" + )) + } + } + if (resp.type == samr.const.multiclass.response) { + tt <- table(y) + nc <- length(tt) + if (sum(y <= nc & y > 0) < length(y)) { + stop( + "Error in input response data: response type ", resp.type, + " specified; values must be 1,2, ... number of classes" + ) + } + for (k in 1:nc) { + if (sum(y == k) < 2) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; there must be >1 sample per class" + )) + } + } + } + if (resp.type == samr.const.quantitative.response) { + if (!is.numeric(y)) { + stop(paste( + "Error in input response data: response type", + resp.type, " specified; values must be numeric" + )) + } + } + if (resp.type == samr.const.survival.response) { + if (is.null(censoring.status)) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; error in censoring indicator" + )) + } + if (!is.numeric(y) | sum(y < 0) > 0) { + stop( + "Error in input response data: response type ", resp.type, + " specified; survival times must be numeric and nonnegative" + ) + if (sum(censoring.status == 0) + sum(censoring.status == + 1) != length(censoring.status)) { + stop( + "Error in input response data: response type ", resp.type, + " specified; censoring indicators must be ", + "0 (censored) or 1 (failed)" + ) + } + } + if (sum(censoring.status == 1) < 1) { + stop(paste( + "Error in input response data: response type ", + resp.type, " specified; there are no uncensored observations" + )) + } + } + return() +} + +#' @title Twoclass Wilcoxon statistics +#' @param xresamp an rank array with dim #gene*#sample*nresamp +#' @param y outcome vector of values 1 and 2 +#' @return the statistic. +wilcoxon.unpaired.seq.func <- function(xresamp, y) { + tt <- rep(0, dim(xresamp)[1]) + for (i in seq_len(dim(xresamp)[3])) { + tt <- tt + rowSums(xresamp[, y == 2, i]) - sum(y == 2) * + (length(y) + 1) / 2 + } + tt <- tt / dim(xresamp)[3] + return(list(tt = tt, numer = tt, sd = rep(1, length(tt)))) +} +wilcoxon.paired.seq.func <- function(xresamp, y) { + tt <- rep(0, dim(xresamp)[1]) + for (i in seq_len(dim(xresamp)[3])) { + tt <- tt + rowSums(xresamp[, y > 0, i]) - sum(y > 0) * + (length(y) + 1) / 2 + } + tt <- tt / dim(xresamp)[3] + return(list(tt = tt, numer = tt, sd = rep(1, length(tt)))) +} +getperms <- function(y, nperms) { + total.perms <- factorial(length(y)) + if (total.perms <= nperms) { + perms <- permute(seq_len(length(y))) + all.perms.flag <- 1 + nperms.act <- total.perms + } + if (total.perms > nperms) { + perms <- matrix(NA, nrow = nperms, ncol = length(y)) + for (i in 1:nperms) { + perms[i, ] <- sample(seq_len(length(y)), size = length(y)) + } + all.perms.flag <- 0 + nperms.act <- nperms + } + return(list( + perms = perms, all.perms.flag = all.perms.flag, + nperms.act = nperms.act + )) +} +foldchange.twoclass <- function(x, y, logged2) { + m1 <- rowMeans(x[, y == 1, drop = FALSE]) + m2 <- rowMeans(x[, y == 2, drop = FALSE]) + if (!logged2) { + fc <- m2 / m1 + } + if (logged2) { + fc <- 2^{ + m2 - m1 + } + } + return(fc) +} +#' @title Foldchange of twoclass unpaired sequencing data +#' @param x x +#' @param y y +#' @param depth depth +foldchange.seq.twoclass.unpaired <- function(x, y, depth) { + x.norm <- scale(x, center = FALSE, scale = depth) + 1e-08 + fc <- apply(x.norm[, y == 2], 1, median) / + apply(x.norm[, y == + 1], 1, median) + return(fc) +} +foldchange.seq.twoclass.paired <- function(x, y, depth) { + nc <- ncol(x) / 2 + o1 <- o2 <- rep(0, nc) + for (j in 1:nc) { + o1[j] <- which(y == -j) + o2[j] <- which(y == j) + } + x.norm <- scale(x, center = FALSE, scale = depth) + 1e-08 + d <- x.norm[, o2, drop = FALSE] / x.norm[, o1, drop = FALSE] + fc <- lapply(d, 1, function(x) median(x, na.rm = TRUE)) + return(fc) +} +permute <- function(elem) { + # generates all perms of the vector elem + if (!missing(elem)) { + if (length(elem) == 2) { + return(matrix(c(elem, elem[2], elem[1]), nrow = 2)) + } + last.matrix <- permute(elem[-1]) + dim.last <- dim(last.matrix) + new.matrix <- matrix(0, nrow = dim.last[1] * (dim.last[2] + + 1), ncol = dim.last[2] + 1) + for (row in 1:(dim.last[1])) { + for (col in 1:(dim.last[2] + 1)) { + new.matrix[row + (col - 1) * dim.last[1], ] <- + insert.value(last.matrix[row, ], elem[1], col) + } + } + return(new.matrix) + } else { + message("Usage: permute(elem)\n\twhere elem is a vector") + } +} +insert.value <- function(vec, newval, pos) { + if (pos == 1) { + return(c(newval, vec)) + } + lvec <- length(vec) + if (pos > lvec) { + return(c(vec, newval)) + } + return(c(vec[1:pos - 1], newval, vec[pos:lvec])) +} +parse.block.labels.for.2classes <- function(y) { + # this only works for 2 class case- having form jBlockn, + # where j=1 or 2 + n <- length(y) + y.act <- rep(NA, n) + blocky <- rep(NA, n) + for (i in 1:n) { + blocky[i] <- as.numeric(substring(y[i], 7, nchar(y[i]))) + y.act[i] <- as.numeric(substring(y[i], 1, 1)) + } + return(list(y.act = y.act, blocky = blocky)) +} +parse.time.labels.and.summarize.data <- function( + x, + y, resp.type, time.summary.type) { + # parse time labels, and summarize time data for each + # person, via a slope or area + # does some error checking too + n <- length(y) + last5char <- rep(NA, n) + last3char <- rep(NA, n) + for (i in 1:n) { + last3char[i] <- substring(y[i], nchar(y[i]) - 2, nchar(y[i])) + last5char[i] <- substring(y[i], nchar(y[i]) - 4, nchar(y[i])) + } + if (sum(last3char == "End") != sum(last5char == "Start")) { + stop("Error in format of time course data: a Start or End tag is missing") + } + y.act <- rep(NA, n) + timey <- rep(NA, n) + person.id <- rep(NA, n) + k <- 1 + end.flag <- FALSE + person.id[1] <- 1 + if (substring(y[1], nchar(y[1]) - 4, nchar(y[1])) != "Start") { + stop( + "Error in format of time course data: ", + "first cell should have a Start tag" + ) + } + for (i in 1:n) { + message(i) + j <- 1 + while (substring(y[i], j, j) != "T") { + j <- j + 1 + } + end.of.y <- j - 1 + y.act[i] <- as.numeric(substring(y[i], 1, end.of.y)) + timey[i] <- substring(y[i], end.of.y + 5, nchar(y[i])) + if (nchar(timey[i]) > 3 & substring(timey[i], nchar(timey[i]) - + 2, nchar(timey[i])) == "End") { + end.flag <- TRUE + timey[i] <- substring(timey[i], 1, nchar(timey[i]) - + 3) + } + if (nchar(timey[i]) > 3 & substring(timey[i], nchar(timey[i]) - + 4, nchar(timey[i])) == "Start") { + timey[i] <- substring(timey[i], 1, nchar(timey[i]) - + 5) + } + if (i < n & !end.flag) { + person.id[i + 1] <- k + } + if (i < n & end.flag) { + k <- k + 1 + person.id[i + 1] <- k + } + end.flag <- FALSE + } + timey <- as.numeric(timey) + # do a check that the format was correct + tt <- table(person.id, y.act) + junk <- function(x) { + sum(x != 0) + } + if (sum(apply(tt, 1, junk) != 1) > 0) { + num <- seq_len(nrow(tt))[apply(tt, 1, junk) > 1] + stop(paste( + "Error in format of time course data, timecourse #", + as.character(num) + )) + } + npeople <- length(unique(person.id)) + newx <- matrix(NA, nrow = nrow(x), ncol = npeople) + sd <- matrix(NA, nrow = nrow(x), ncol = npeople) + for (j in 1:npeople) { + jj <- person.id == j + tim <- timey[jj] + xc <- t(scale(t(x[, jj, drop = FALSE]), center = TRUE, scale = FALSE)) + if (time.summary.type == "slope") { + junk <- quantitative.func(xc, tim - mean(tim)) + newx[, j] <- junk$numer + sd[, j] <- junk$sd + } + if (time.summary.type == "signed.area") { + junk <- timearea.func(x[, jj, drop = FALSE], tim) + newx[, j] <- junk$numer + sd[, j] <- junk$sd + } + } + y.unique <- y.act[!duplicated(person.id)] + return(list(y = y.unique, x = newx, sd = sd)) +} +ttest.func <- function(x, y, s0 = 0, sd = NULL) { + n1 <- sum(y == 1) + n2 <- sum(y == 2) + m1 <- rowMeans(x[, y == 1, drop = FALSE]) + m2 <- rowMeans(x[, y == 2, drop = FALSE]) + if (is.null(sd)) { + sd <- sqrt(((n2 - 1) * varr(x[, y == 2], meanx = m2) + + (n1 - 1) * varr(x[, y == 1], meanx = m1)) * (1 / n1 + + 1 / n2) / (n1 + n2 - 2)) + } + numer <- m2 - m1 + dif.obs <- (numer) / (sd + s0) + return(list(tt = dif.obs, numer = numer, sd = sd)) +} + +wilcoxon.func <- function(x, y, s0 = 0) { + n1 <- sum(y == 1) + n2 <- sum(y == 2) + p <- nrow(x) + r2 <- rowSums(t(apply(x, 1, rank))[, y == 2, drop = FALSE]) + numer <- r2 - (n2 / 2) * (n2 + 1) - (n1 * n2) / 2 + sd <- sqrt(n1 * n2 * (n1 + n2 + 1) / 12) + tt <- (numer) / (sd + s0) + return(list(tt = tt, numer = numer, sd = rep(sd, p))) +} + +onesample.ttest.func <- function(x, y, s0 = 0, sd = NULL) { + n <- length(y) + x <- x * matrix(y, nrow = nrow(x), ncol = ncol(x), byrow = TRUE) + m <- rowMeans(x) + if (is.null(sd)) { + sd <- sqrt(varr(x, meanx = m) / n) + } + dif.obs <- m / (sd + s0) + return(list(tt = dif.obs, numer = m, sd = sd)) +} + +patterndiscovery.func <- function(x, s0 = 0, eigengene.number = 1) { + a <- mysvd(x, n.components = eigengene.number) + v <- a$v[, eigengene.number] + # here we try to guess the most interpretable orientation + # for the eigengene + om <- abs(a$u[, eigengene.number]) > quantile( + abs(a$u[, eigengene.number]), + 0.95 + ) + if (median(a$u[om, eigengene.number]) < 0) { + v <- -1 * v + } + aa <- quantitative.func(x, v, s0 = s0) + eigengene <- cbind(seq_len(nrow(a$v)), v) + dimnames(eigengene) <- list(NULL, c("sample number", "value")) + return( + list(tt = aa$tt, numer = aa$numer, sd = aa$sd, eigengene = eigengene) + ) +} + +paired.ttest.func <- function(x, y, s0 = 0, sd = NULL) { + nc <- ncol(x) / 2 + o <- 1:nc + o1 <- rep(0, ncol(x) / 2) + o2 <- o1 + for (j in 1:nc) { + o1[j] <- seq_len(ncol(x))[y == -o[j]] + } + for (j in 1:nc) { + o2[j] <- seq_len(ncol(x))[y == o[j]] + } + d <- x[, o2, drop = FALSE] - x[, o1, drop = FALSE] + if (is.matrix(d)) { + m <- rowMeans(d) + } + if (!is.matrix(d)) { + m <- mean(d) + } + if (is.null(sd)) { + if (is.matrix(d)) { + sd <- sqrt(varr(d, meanx = m) / nc) + } + if (!is.matrix(d)) { + sd <- sqrt(var(d) / nc) + } + } + dif.obs <- m / (sd + s0) + return(list(tt = dif.obs, numer = m, sd = sd)) +} + +cox.func <- function(x, y, censoring.status, s0 = 0) { + # find the index matrix + Dn <- sum(censoring.status == 1) + Dset <- seq_len(ncol(x))[censoring.status == 1] # the set of observed + ind <- matrix(0, ncol(x), Dn) + # get the matrix + for (i in 1:Dn) { + ind[y > y[Dset[i]] - 1e-08, i] <- 1 / sum(y > y[Dset[i]] - + 1e-08) + } + ind.sums <- rowSums(ind) + x.ind <- x %*% ind + # get the derivatives + numer <- x %*% (censoring.status - ind.sums) + sd <- sqrt((x * x) %*% ind.sums - rowSums(x.ind * x.ind)) + tt <- numer / (sd + s0) + return(list(tt = tt, numer = numer, sd = sd)) +} + +multiclass.func <- function(x, y, s0 = 0) { + ## assumes y is coded 1,2... + nn <- table(y) + m <- matrix(0, nrow = nrow(x), ncol = length(nn)) + v <- m + for (j in seq_len(length(nn))) { + m[, j] <- rowMeans(x[, y == j]) + v[, j] <- (nn[j] - 1) * varr(x[, y == j], meanx = m[ + , + j + ]) + } + mbar <- rowMeans(x) + mm <- m - matrix(mbar, nrow = length(mbar), ncol = length(nn)) + fac <- (sum(nn) / prod(nn)) + scor <- sqrt(fac * (apply(matrix(nn, + nrow = nrow(m), ncol = ncol(m), + byrow = TRUE + ) * mm * mm, 1, sum))) + sd <- sqrt(rowSums(v) * (1 / sum(nn - 1)) * sum(1 / nn)) + tt <- scor / (sd + s0) + mm.stand <- t(scale(t(mm), center = FALSE, scale = sd)) + return(list(tt = tt, numer = scor, sd = sd, stand.contrasts = mm.stand)) +} + +est.s0 <- function(tt, sd, s0.perc = seq(0, 1, by = 0.05)) { + ## estimate s0 (exchangeability) factor for denominator. + ## returns the actual estimate s0 (not a percentile) + br <- unique(quantile(sd, seq(0, 1, len = 101))) + nbr <- length(br) + a <- cut(sd, br, labels = FALSE) + a[is.na(a)] <- 1 + cv.sd <- rep(0, length(s0.perc)) + for (j in seq_len(length(s0.perc))) { + w <- quantile(sd, s0.perc[j]) + w[j == 1] <- 0 + tt2 <- tt * sd / (sd + w) + tt2[tt2 == Inf] <- NA + sds <- rep(0, nbr - 1) + for (i in 1:(nbr - 1)) { + sds[i] <- stats::mad(tt2[a == i], na.rm = TRUE) + } + cv.sd[j] <- sqrt(var(sds)) / mean(sds) + } + o <- seq_len(length(s0.perc))[cv.sd == min(cv.sd)] + # we don;t allow taking s0.hat to be 0th percentile when + # min sd is 0 + s0.hat <- quantile(sd[sd != 0], s0.perc[o]) + return(list(s0.perc = s0.perc, cv.sd = cv.sd, s0.hat = s0.hat)) +} + +permute.rows <- function(x) { + dd <- dim(x) + n <- dd[1] + p <- dd[2] + mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n)) + matrix(t(x)[order(mm)], n, p, byrow = TRUE) +} + +foldchange.paired <- function(x, y, logged2) { + nc <- ncol(x) / 2 + o <- 1:nc + o1 <- rep(0, ncol(x) / 2) + o2 <- o1 + for (j in 1:nc) { + o1[j] <- seq_len(ncol(x))[y == -o[j]] + } + for (j in 1:nc) { + o2[j] <- seq_len(ncol(x))[y == o[j]] + } + if (!logged2) { + d <- x[, o2, drop = FALSE] / x[, o1, drop = FALSE] + } + if (logged2) { + d <- x[, o2, drop = FALSE] - x[, o1, drop = FALSE] + } + if (!logged2) { + fc <- rowMeans(d) + } + if (logged2) { + fc <- 2^rowMeans(d) + } + return(fc) +} +foldchange.seq.twoclass.unpaired <- function(x, y, depth) { + x.norm <- scale(x, center = FALSE, scale = depth) + 1e-08 + fc <- apply(x.norm[, y == 2], 1, median) / + apply(x.norm[, y == 1], 1, median) + return(fc) +} +integer.base.b <- function(x, b = 2) { + xi <- as.integer(x) + if (xi == 0) { + return(0) + } + if (any(is.na(xi) | ((x - xi) != 0))) { + print(list(ERROR = "x not integer", x = x)) + } + N <- length(x) + xMax <- max(x) + ndigits <- (floor(logb(xMax, base = 2)) + 1) + Base.b <- array(NA, dim = c(N, ndigits)) + for (i in 1:ndigits) { + Base.b[, ndigits - i + 1] <- (x %% b) + x <- (x %/% b) + } + if (N == 1) { + Base.b[1, ] + } else { + Base.b + } +} +varr <- function(x, meanx = NULL) { + n <- ncol(x) + p <- nrow(x) + Y <- matrix(1, nrow = n, ncol = 1) + if (is.null(meanx)) { + meanx <- rowMeans(x) + } + ans <- rep(1, p) + xdif <- x - meanx %*% t(Y) + ans <- (xdif^2) %*% rep(1 / (n - 1), n) + ans <- drop(ans) + return(ans) +} +quantitative.func <- function(x, y, s0 = 0) { + # regression of x on y + my <- mean(y) + yy <- y - my + temp <- x %*% yy + mx <- rowMeans(x) + syy <- sum(yy^2) + scor <- temp / syy + b0hat <- mx - scor * my + ym <- matrix(y, nrow = nrow(x), ncol = ncol(x), byrow = TRUE) + xhat <- matrix(b0hat, nrow = nrow(x), ncol = ncol(x)) + ym * + matrix(scor, nrow = nrow(x), ncol = ncol(x)) + sigma <- sqrt(rowSums((x - xhat)^2) / (ncol(xhat) - 2)) + sd <- sigma / sqrt(syy) + tt <- scor / (sd + s0) + return(list(tt = tt, numer = scor, sd = sd)) +} +timearea.func <- function(x, y, s0 = 0) { + n <- ncol(x) + xx <- 0.5 * (x[, 2:n] + x[, 1:(n - 1)]) * matrix(diff(y), + nrow = nrow(x), ncol = n - 1, byrow = TRUE + ) + numer <- rowMeans(xx) + sd <- sqrt(varr(xx, meanx = numer) / n) + tt <- numer / sqrt(sd + s0) + return(list(tt = tt, numer = numer, sd = sd)) +} +cox.seq.func <- function(xresamp, y, censoring.status) { + # get the dimensions + ns <- ncol(xresamp) + # prepare for the calculation + # find the index matrix + Dn <- sum(censoring.status == 1) + Dset <- seq_len(ns)[censoring.status == 1] # the set of died + ind <- matrix(0, ns, Dn) + # get the matrix + for (i in 1:Dn) { + ind[y >= y[Dset[i]] - 1e-08, i] <- 1 / sum(y >= y[Dset[i]] - + 1e-08) + } + ind.sums <- rowSums(ind) + # calculate the score statistic + tt <- apply(xresamp, 3, function(x, cen.ind, ind.para, ind.sums.para) { + dev1 <- x %*% cen.ind + x.ind <- x %*% ind.para + dev2 <- (x * x) %*% ind.sums.para - rowSums(x.ind * x.ind) + dev1 / (sqrt(dev2) + 1e-08) + }, (censoring.status - ind.sums), ind, ind.sums) + tt <- rowMeans(tt) + return(list(tt = tt, numer = tt, sd = rep(1, length(tt)))) +} +compute.block.perms <- function(y, blocky, nperms) { + # y are the data (eg class label 1 vs 2; or -1,1, -2,2 for + # paired data) + # blocky are the block labels (abs(y) for paired daatr) + ny <- length(y) + nblocks <- length(unique(blocky)) + tab <- table(blocky) + total.nperms <- prod(factorial(tab)) + # block.perms is a list of all possible permutations + block.perms <- vector("list", nblocks) + # first enumerate all perms, when possible + if (total.nperms <= nperms) { + all.perms.flag <- 1 + nperms.act <- total.nperms + for (i in 1:nblocks) { + block.perms[[i]] <- permute(y[blocky == i]) + } + kk <- 0:(factorial(max(tab))^nblocks - 1) + # the rows of the matrix outerm runs through the 'outer + # product' + # first we assume that all blocks have max(tab) members; + # then we remove rows of outerm that + # are illegal (ie when a block has fewer members) + outerm <- matrix(0, nrow = length(kk), ncol = nblocks) + for (i in seq_len(length(kk))) { + kkkk <- integer.base.b(kk[i], b = factorial(max(tab))) + if (length(kkkk) > nblocks) { + kkkk <- kkkk[(length(kkkk) - nblocks + 1):length(kkkk)] + } + outerm[i, (nblocks - length(kkkk) + 1):nblocks] <- kkkk + } + outerm <- outerm + 1 + # now remove rows that are illegal perms + ind <- rep(TRUE, nrow(outerm)) + for (j in seq_len(ncol(outerm))) { + ind <- ind & outerm[, j] <= factorial(tab[j]) + } + outerm <- outerm[ind, , drop = FALSE] + # finally, construct permutation matrix from outer product + permsy <- matrix(NA, nrow = total.nperms, ncol = ny) + for (i in 1:total.nperms) { + junk <- NULL + for (j in 1:nblocks) { + junk <- c(junk, block.perms[[j]][outerm[i, j], ]) + } + permsy[i, ] <- junk + } + } + # next handle case when there are too many perms to enumerate + if (total.nperms > nperms) { + all.perms.flag <- 0 + nperms.act <- nperms + permsy <- NULL + block.perms <- vector("list", nblocks) + for (j in 1:nblocks) { + block.perms[[j]] <- sample.perms(y[blocky == j], nperms = nperms) + } + for (j in 1:nblocks) { + permsy <- cbind(permsy, block.perms[[j]]) + } + } + return(list( + permsy = permsy, all.perms.flag = all.perms.flag, + nperms.act = nperms.act + )) +} +sample.perms <- function(elem, nperms) { + # randomly generates nperms of the vector elem + res <- permute.rows(matrix(elem, + nrow = nperms, ncol = length(elem), + byrow = TRUE + )) + return(res) +} +mysvd <- function(x, n.components = NULL) { + # finds PCs of matrix x + p <- nrow(x) + n <- ncol(x) + # center the observations (rows) + feature.means <- rowMeans(x) + x <- t(scale(t(x), center = feature.means, scale = FALSE)) + if (is.null(n.components)) { + n.components <- min(n, p) + } + if (p > n) { + a <- eigen(t(x) %*% x) + v <- a$vec[, 1:n.components, drop = FALSE] + d <- sqrt(a$val[1:n.components, drop = FALSE]) + u <- scale(x %*% v, center = FALSE, scale = d) + return(list(u = u, d = d, v = v)) + } else { + junk <- svd(x, LINPACK = TRUE) + nc <- min(ncol(junk$u), n.components) + return(list(u = junk$u[, 1:nc], d = junk$d[1:nc], v = junk$v[ + , + 1:nc + ])) + } +} +quantitative.seq.func <- function(xresamp, y) { + tt <- rep(0, dim(xresamp)[1]) + for (i in seq_len(dim(xresamp)[3])) { + y.ranked <- rank(y, ties.method = "random") - (dim(xresamp)[2] + + 1) / 2 + tt <- tt + (xresamp[, , i] - (dim(xresamp)[2] + 1) / 2) %*% + y.ranked + } + ns <- dim(xresamp)[2] + tt <- tt / (dim(xresamp)[3] * (ns^3 - ns) / 12) + return(list(tt = as.vector(tt), numer = as.vector(tt), sd = rep( + 1, + length(tt) + ))) +} +multiclass.seq.func <- function(xresamp, y) { + # number of classes and number of samples in each class + K <- max(y) + n.each <- rep(0, K) + for (k in 1:K) { + n.each[k] <- sum(y == k) + } + # the statistic + tt <- temp <- rep(0, dim(xresamp)[1]) + stand.contrasts <- matrix(0, dim(xresamp)[1], K) + + for (i in seq_len(dim(xresamp)[3])) { + for (k in 1:K) { + temp <- rowSums(xresamp[, y == k, i]) + tt <- tt + temp^2 / n.each[k] + stand.contrasts[, k] <- stand.contrasts[, k] + temp + } + } + # finalize + nresamp <- dim(xresamp)[3] + ns <- dim(xresamp)[2] + tt <- tt / nresamp * 12 / ns / (ns + 1) - 3 * (ns + 1) + stand.contrasts <- stand.contrasts / nresamp + stand.contrasts <- scale(stand.contrasts, + center = n.each * (ns + 1) / 2, + scale = sqrt(n.each * (ns - n.each) * (ns + 1) / 12) + ) + return(list( + tt = tt, numer = tt, sd = rep(1, length(tt)), + stand.contrasts = stand.contrasts + )) +} + +# ============================================================================== +# samr.compute.delta.table +# ============================================================================== +## Jun added starts +samr.compute.delta.table <- function( + samr.obj, min.foldchange = 0, + dels = NULL, nvals = 50) { + res <- NULL + if (samr.obj$assay.type == "array") { + res <- samr.compute.delta.table.array( + samr.obj, min.foldchange, + dels, nvals + ) + } else if (samr.obj$assay.type == "seq") { + res <- samr.compute.delta.table.seq( + samr.obj, min.foldchange, + dels + ) + } + return(res) +} +## Jun added ends + +## Jun added the first row below, and commented the row +# after it +samr.compute.delta.table.array <- function( + samr.obj, + min.foldchange = 0, dels = NULL, nvals = 50) { + # samr.compute.delta.table <- function(samr.obj, + # min.foldchange=0, dels=NULL, nvals=50) { + # computes delta table, starting with samr object 'a', for + # nvals values of delta + lmax <- sqrt(max(abs(sort(samr.obj$tt) - samr.obj$evo))) + if (is.null(dels)) { + dels <- (seq(0, lmax, length = nvals)^2) + } + res1 <- NULL + foldchange.cond.up <- matrix(TRUE, + nrow = nrow(samr.obj$ttstar), + ncol = ncol(samr.obj$ttstar) + ) + foldchange.cond.lo <- matrix(TRUE, + nrow = nrow(samr.obj$ttstar), + ncol = ncol(samr.obj$ttstar) + ) + if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > + 0)) { + foldchange.cond.up <- samr.obj$foldchange.star >= min.foldchange + foldchange.cond.lo <- samr.obj$foldchange.star <= 1 / min.foldchange + } + cutup <- rep(NA, length(dels)) + cutlow <- rep(NA, length(dels)) + g2 <- rep(NA, length(dels)) + errup <- matrix(NA, ncol = length(dels), nrow = ncol(samr.obj$ttstar0)) + errlow <- matrix(NA, ncol = length(dels), nrow = ncol(samr.obj$ttstar0)) + cat("", fill = TRUE) + cat("Computing delta table", fill = TRUE) + for (ii in seq_len(length(dels))) { + cat(ii, fill = TRUE) + ttt <- detec.slab(samr.obj, dels[ii], min.foldchange) + cutup[ii] <- 1e+10 + if (length(ttt$pup > 0)) { + cutup[ii] <- min(samr.obj$tt[ttt$pup]) + } + cutlow[ii] <- -1e+10 + if (length(ttt$plow) > 0) { + cutlow[ii] <- max(samr.obj$tt[ttt$plow]) + } + g2[ii] <- sumlengths(ttt) + errup[, ii] <- colSums(samr.obj$ttstar0 > cutup[ii] & + foldchange.cond.up) + errlow[, ii] <- colSums(samr.obj$ttstar0 < cutlow[ii] & + foldchange.cond.lo) + } + gmed <- apply(errup + errlow, 2, median) + g90 <- apply(errup + errlow, 2, quantile, 0.9) + res1 <- cbind( + samr.obj$pi0 * gmed, samr.obj$pi0 * g90, g2, + samr.obj$pi0 * gmed / g2, samr.obj$pi0 * g90 / g2, cutlow, + cutup + ) + res1 <- cbind(dels, res1) + dimnames(res1) <- list(NULL, c( + "delta", "# med false pos", + "90th perc false pos", "# called", "median FDR", "90th perc FDR", + "cutlo", "cuthi" + )) + return(res1) +} + +####################################################################### +# \tcompute the delta table for sequencing data +####################################################################### +samr.compute.delta.table.seq <- function( + samr.obj, + min.foldchange = 0, dels = NULL) { + res1 <- NULL + flag <- TRUE + ## check whether any gene satisfies the foldchange + # restrictions + if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | + samr.obj$resp.type == samr.const.twoclass.paired.response) & + (min.foldchange > 0)) { + sat.up <- (samr.obj$foldchange >= min.foldchange) & (samr.obj$evo > + 0) + sat.dn <- (samr.obj$foldchange <= 1 / min.foldchange) & + (samr.obj$evo < 0) + if (sum(sat.up) + sum(sat.dn) == 0) { + flag <- FALSE + } + } + if (flag) { + if (is.null(dels)) { + dels <- generate.dels(samr.obj, min.foldchange = min.foldchange) + } + cat("Number of thresholds chosen (all possible thresholds) =", + length(dels), + fill = TRUE + ) + if (length(dels) > 0) { + ## sort delta to make the fast calculation right + dels <- sort(dels) + ## get the upper and lower cutoffs + cat("Getting all the cutoffs for the thresholds...\n") + slabs <- samr.seq.detec.slabs(samr.obj, dels, min.foldchange) + cutup <- slabs$cutup + cutlow <- slabs$cutlow + g2 <- slabs$g2 + ## get the number of errors under the null hypothesis + cat("Getting number of false positives in the permutation...\n") + errnum <- samr.seq.null.err( + samr.obj, min.foldchange, + cutup, cutlow + ) + res1 <- NULL + gmed <- apply(errnum, 2, median) + g90 <- apply(errnum, 2, quantile, 0.9) + res1 <- cbind(samr.obj$pi0 * gmed, samr.obj$pi0 * + g90, g2, samr.obj$pi0 * gmed / g2, samr.obj$pi0 * + g90 / g2, cutlow, cutup) + res1 <- cbind(dels, res1) + dimnames(res1) <- list(NULL, c( + "delta", "# med false pos", + "90th perc false pos", "# called", "median FDR", + "90th perc FDR", "cutlo", "cuthi" + )) + } + } + return(res1) +} + +# ============================================================================== +# samr.plot +# ============================================================================== +samr.plot <- function(samr.obj, del = NULL, min.foldchange = 0) { + ## make observed-expected plot + ## takes foldchange into account too + if (is.null(del)) { + del <- sqrt(max(abs(sort(samr.obj$tt) - samr.obj$evo))) + } + b <- detec.slab(samr.obj, del, min.foldchange) + foldchange.cond.up <- rep(TRUE, length(samr.obj$evo)) + foldchange.cond.lo <- rep(TRUE, length(samr.obj$evo)) + if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > + 0)) { + foldchange.cond.up <- samr.obj$foldchange >= min.foldchange + foldchange.cond.lo <- samr.obj$foldchange <= 1 / min.foldchange + } + col <- rep(1, length(samr.obj$evo)) + col[b$plow] <- 3 + col[b$pup] <- 2 + if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > + 0)) { + col[!foldchange.cond.lo & !foldchange.cond.up] <- 1 + } + col.ordered <- col[order(samr.obj$tt)] + ylims <- range(samr.obj$tt) + xlims <- range(samr.obj$evo) + plot(samr.obj$evo, sort(samr.obj$tt), + xlab = "expected score", + ylab = "observed score", ylim = ylims, xlim = xlims, + type = "n" + ) + points(samr.obj$evo, sort(samr.obj$tt), col = col.ordered) + abline(0, 1) + abline(del, 1, lty = 2) + abline(-del, 1, lty = 2) +} + +# ============================================================================== +# samr.compute.siggenes.table +# ============================================================================== +samr.compute.siggenes.table <- function( + samr.obj, del, + data, delta.table, min.foldchange = 0, all.genes = FALSE, + compute.localfdr = FALSE) { + ## computes significant genes table, starting with samr + # object 'a' and 'delta.table' + ## for a **single** value del + ## if all.genes is true, all genes are printed (and value + # of del is ignored) + if (is.null(data$geneid)) { + data$geneid <- paste("g", seq_len(nrow(data$x)), sep = "") + } + if (is.null(data$genenames)) { + data$genenames <- paste("g", seq_len(nrow(data$x)), sep = "") + } + if (!all.genes) { + sig <- detec.slab(samr.obj, del, min.foldchange) + } + if (all.genes) { + p <- length(samr.obj$tt) + pup <- (1:p)[samr.obj$tt >= 0] + plo <- (1:p)[samr.obj$tt < 0] + sig <- list(pup = pup, plo = plo) + } + if (compute.localfdr) { + aa <- localfdr(samr.obj, min.foldchange) + if (length(sig$pup) > 0) { + fdr.up <- predictlocalfdr(aa$smooth.object, samr.obj$tt[sig$pup]) + } + if (length(sig$plo) > 0) { + fdr.lo <- predictlocalfdr(aa$smooth.object, samr.obj$tt[sig$plo]) + } + } + qvalues <- NULL + if (length(sig$pup) > 0 | length(sig$plo) > 0) { + qvalues <- qvalue.func(samr.obj, sig, delta.table) + } + res.up <- NULL + res.lo <- NULL + done <- FALSE + + # two class unpaired or paired (foldchange is reported) + if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | + samr.obj$resp.type == samr.const.twoclass.paired.response)) { + if (!is.null(sig$pup)) { + res.up <- cbind( + sig$pup + 1, data$genenames[sig$pup], + data$geneid[sig$pup], samr.obj$tt[sig$pup], samr.obj$numer[sig$pup], + samr.obj$sd[sig$pup], samr.obj$foldchange[sig$pup], + qvalues$qvalue.up + ) + if (compute.localfdr) { + res.up <- cbind(res.up, fdr.up) + } + temp.names <- list(NULL, c( + "Row", "Gene ID", "Gene Name", + "Score(d)", "Numerator(r)", "Denominator(s+s0)", + "Fold Change", "q-value(%)" + )) + if (compute.localfdr) { + temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") + } + dimnames(res.up) <- temp.names + } + if (!is.null(sig$plo)) { + res.lo <- cbind( + sig$plo + 1, data$genenames[sig$plo], + data$geneid[sig$plo], samr.obj$tt[sig$plo], samr.obj$numer[sig$plo], + samr.obj$sd[sig$plo], samr.obj$foldchange[sig$plo], + qvalues$qvalue.lo + ) + if (compute.localfdr) { + res.lo <- cbind(res.lo, fdr.lo) + } + temp.names <- list(NULL, c( + "Row", "Gene ID", "Gene Name", + "Score(d)", "Numerator(r)", "Denominator(s+s0)", + "Fold Change", "q-value(%)" + )) + if (compute.localfdr) { + temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") + } + dimnames(res.lo) <- temp.names + } + done <- TRUE + } + + # multiclass + if (samr.obj$resp.type == samr.const.multiclass.response) { + if (!is.null(sig$pup)) { + res.up <- cbind( + sig$pup + 1, data$genenames[sig$pup], + data$geneid[sig$pup], samr.obj$tt[sig$pup], samr.obj$numer[sig$pup], + samr.obj$sd[sig$pup], samr.obj$stand.contrasts[sig$pup, ], + qvalues$qvalue.up + ) + + if (compute.localfdr) { + res.up <- cbind(res.up, fdr.up) + } + + collabs.contrast <- paste( + "contrast-", as.character(seq_len(ncol(samr.obj$stand.contrasts))), + sep = "" + ) + temp.names <- list(NULL, c( + "Row", "Gene ID", "Gene Name", + "Score(d)", "Numerator(r)", "Denominator(s+s0)", + collabs.contrast, "q-value(%)" + )) + + if (compute.localfdr) { + temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") + } + dimnames(res.up) <- temp.names + } + res.lo <- NULL + done <- TRUE + } + + # all other cases + if (!done) { + if (!is.null(sig$pup)) { + res.up <- cbind( + sig$pup + 1, data$genenames[sig$pup], + data$geneid[sig$pup], samr.obj$tt[sig$pup], samr.obj$numer[sig$pup], + samr.obj$sd[sig$pup], samr.obj$foldchange[sig$pup], + qvalues$qvalue.up + ) + if (compute.localfdr) { + res.up <- cbind(res.up, fdr.up) + } + temp.names <- list(NULL, c( + "Row", "Gene ID", "Gene Name", + "Score(d)", "Numerator(r)", "Denominator(s+s0)", + "q-value(%)" + )) + if (compute.localfdr) { + temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") + } + dimnames(res.up) <- temp.names + } + if (!is.null(sig$plo)) { + res.lo <- cbind( + sig$plo + 1, data$genenames[sig$plo], + data$geneid[sig$plo], samr.obj$tt[sig$plo], samr.obj$numer[sig$plo], + samr.obj$sd[sig$plo], samr.obj$foldchange[sig$plo], + qvalues$qvalue.lo + ) + if (compute.localfdr) { + res.lo <- cbind(res.lo, fdr.lo) + } + temp.names <- list(NULL, c( + "Row", "Gene ID", "Gene Name", + "Score(d)", "Numerator(r)", "Denominator(s+s0)", + "q-value(%)" + )) + if (compute.localfdr) { + temp.names[[2]] <- c(temp.names[[2]], "localfdr(%)") + } + dimnames(res.lo) <- temp.names + } + done <- TRUE + } + if (!is.null(res.up)) { + o1 <- order(-samr.obj$tt[sig$pup]) + res.up <- res.up[o1, , drop = FALSE] + } + if (!is.null(res.lo)) { + o2 <- order(samr.obj$tt[sig$plo]) + res.lo <- res.lo[o2, , drop = FALSE] + } + color.ind.for.multi <- NULL + if ( + samr.obj$resp.type == samr.const.multiclass.response & !is.null(sig$pup) + ) { + condition_1 <- + samr.obj$stand.contrasts[sig$pup, ] > samr.obj$stand.contrasts.95[2] + condition_2 <- + samr.obj$stand.contrasts[sig$pup, ] < samr.obj$stand.contrasts.95[1] + color.ind.for.multi <- 1 * condition_1 + (-1) * condition_2 + } + ngenes.up <- nrow(res.up) + if (is.null(ngenes.up)) { + ngenes.up <- 0 + } + ngenes.lo <- nrow(res.lo) + if (is.null(ngenes.lo)) { + ngenes.lo <- 0 + } + return( + list( + genes.up = res.up, genes.lo = res.lo, + color.ind.for.multi = color.ind.for.multi, ngenes.up = ngenes.up, + ngenes.lo = ngenes.lo + ) + ) +} +generate.dels <- function(samr.obj, min.foldchange = 0) { + dels <- NULL + ## initialize calculation + tag <- order(samr.obj$tt) + if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | + samr.obj$resp.type == samr.const.twoclass.paired.response) & + (min.foldchange > 0)) { + res.mat <- data.frame( + tt = samr.obj$tt[tag], fc = samr.obj$foldchange[tag], + evo = samr.obj$evo, dif = samr.obj$tt[tag] - samr.obj$evo + ) + res.up <- res.mat[res.mat$evo > 0, ] + res.lo <- res.mat[res.mat$evo < 0, ] + res.up <- res.up[res.up$fc >= min.foldchange, ] + res.lo <- res.lo[res.lo$fc <= 1 / min.foldchange, ] + } else { + res.mat <- data.frame( + tt = samr.obj$tt[tag], evo = samr.obj$evo, + dif = samr.obj$tt[tag] - samr.obj$evo + ) + res.up <- res.mat[res.mat$evo > 0, ] + res.lo <- res.mat[res.mat$evo < 0, ] + } + ## for the upper part + up.vec <- rep(NA, nrow(res.up)) + if (nrow(res.up) > 0) { + st <- 1e-08 + i.cur <- 1 + for (i in seq_len(nrow(res.up))) { + if (res.up$dif[i] > st) { + st <- res.up$dif[i] + up.vec[i.cur] <- st + i.cur <- i.cur + 1 + } + } + } + ## for the lower part + lo.vec <- rep(NA, nrow(res.lo)) + if (nrow(res.lo) > 0) { + st <- -1e-08 + i.cur <- 1 + for (i in seq(nrow(res.lo), 1)) { + if (res.lo$dif[i] < st) { + st <- res.lo$dif[i] + lo.vec[i.cur] <- st + i.cur <- i.cur + 1 + } + } + } + ## combine them + vec <- c(up.vec, -lo.vec) + vec <- vec[!is.na(vec)] + vec <- vec - 1e-08 + dels <- sort(unique(vec)) + return(dels) +} +samr.seq.detec.slabs <- function(samr.obj, dels, min.foldchange) { + ## initialize calculation + tag <- order(samr.obj$tt) + if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | + samr.obj$resp.type == samr.const.twoclass.paired.response) & + (min.foldchange > 0)) { + res.mat <- data.frame( + tt = samr.obj$tt[tag], fc = samr.obj$foldchange[tag], + evo = samr.obj$evo, dif = samr.obj$tt[tag] - samr.obj$evo + ) + res.up <- res.mat[res.mat$evo > 0, ] + res.lo <- res.mat[res.mat$evo < 0, ] + res.up <- res.up[res.up$fc >= min.foldchange, ] + res.lo <- res.lo[res.lo$fc <= 1 / min.foldchange, ] + } else { + res.mat <- data.frame( + tt = samr.obj$tt[tag], evo = samr.obj$evo, + dif = samr.obj$tt[tag] - samr.obj$evo + ) + res.up <- res.mat[res.mat$evo > 0, ] + res.lo <- res.mat[res.mat$evo < 0, ] + } + ## begin calculating + cutup <- rep(1e+10, length(dels)) + cutlow <- rep(-1e+10, length(dels)) + g2.up <- g2.lo <- rep(0, length(dels)) + if (nrow(res.up) > 0) { + res.up <- data.frame( + dif = res.up$dif, tt = res.up$tt, + num = seq(nrow(res.up), 1) + ) + ## get the upper part + j <- 1 + ii <- 1 + while (j <= nrow(res.up) & ii <= length(dels)) { + if (res.up$dif[j] > dels[ii]) { + cutup[ii] <- res.up$tt[j] + g2.up[ii] <- res.up$num[j] + ii <- ii + 1 + } else { + j <- j + 1 + } + } + } + if (nrow(res.lo) > 0) { + res.lo <- data.frame( + dif = res.lo$dif, tt = res.lo$tt, + num = seq_len(nrow(res.lo)) + ) + ## get the lower part + j <- nrow(res.lo) + ii <- 1 + while (j >= 1 & ii <= length(dels)) { + if (res.lo$dif[j] < -dels[ii]) { + cutlow[ii] <- res.lo$tt[j] + g2.lo[ii] <- res.lo$num[j] + ii <- ii + 1 + } else { + j <- j - 1 + } + } + } + g2 <- g2.up + g2.lo + return(list(cutup = cutup, cutlow = cutlow, g2 = g2)) +} +sumlengths <- function(aa) { + length(aa$pl) + length(aa$pu) +} + +samr.seq.null.err <- function( + samr.obj, min.foldchange, + cutup, cutlow) { + errup <- matrix(NA, ncol = length(cutup), nrow = ncol(samr.obj$ttstar0)) + errlow <- matrix(NA, ncol = length(cutlow), nrow = ncol(samr.obj$ttstar0)) + cutup.rank <- rank(cutup, ties.method = "min") + cutlow.rank <- rank(-cutlow, ties.method = "min") + for (jj in seq_len(ncol(samr.obj$ttstar0))) { + keep.up <- keep.dn <- samr.obj$ttstar0[, jj] + if ((samr.obj$resp.type == samr.const.twoclass.unpaired.response | + samr.obj$resp.type == samr.const.twoclass.paired.response) & + (min.foldchange > 0)) { + keep.up <- keep.up[samr.obj$foldchange.star[, jj] >= + min.foldchange] + keep.dn <- keep.dn[samr.obj$foldchange.star[, jj] <= + 1 / min.foldchange] + } + errup[jj, ] <- length(keep.up) - (rank(c(cutup, keep.up), + ties.method = "min" + )[seq_len(length(cutup))] - cutup.rank) + errlow[jj, ] <- length(keep.dn) - (rank(c(-cutlow, -keep.dn), + ties.method = "min" + )[seq_len(length(cutlow))] - cutlow.rank) + } + errnum <- errup + errlow + return(errnum) +} +detec.slab <- function(samr.obj, del, min.foldchange) { + ## find genes above and below the slab of half-width del + # this calculation is tricky- for consistency, the slab + # condition picks + # all genes that are beyond the first departure from the + # slab + # then the fold change condition is applied (if applicable) + n <- length(samr.obj$tt) + tt <- samr.obj$tt + evo <- samr.obj$evo + tag <- order(tt) + pup <- NULL + foldchange.cond.up <- rep(TRUE, length(evo)) + foldchange.cond.lo <- rep(TRUE, length(evo)) + if (!is.null(samr.obj$foldchange[1]) & (min.foldchange > + 0)) { + foldchange.cond.up <- samr.obj$foldchange >= min.foldchange + foldchange.cond.lo <- samr.obj$foldchange <= 1 / min.foldchange + } + o1 <- (1:n)[(tt[tag] - evo > del) & evo > 0] + if (length(o1) > 0) { + o1 <- o1[1] + o11 <- o1:n + o111 <- rep(FALSE, n) + o111[tag][o11] <- TRUE + pup <- (1:n)[o111 & foldchange.cond.up] + } + plow <- NULL + o2 <- (1:n)[(evo - tt[tag] > del) & evo < 0] + if (length(o2) > 0) { + o2 <- o2[length(o2)] + o22 <- 1:o2 + o222 <- rep(FALSE, n) + o222[tag][o22] <- TRUE + plow <- (1:n)[o222 & foldchange.cond.lo] + } + return(list(plow = plow, pup = pup)) +} + +#' @importFrom stats smooth.spline +localfdr <- function( + samr.obj, min.foldchange, perc = 0.01, + df = 10) { + ## estimates compute.localfdr at score 'd', using SAM + # object 'samr.obj' + ## 'd' can be a vector of d scores + ## returns estimate of symmetric fdr as a percentage + # this version uses a 1% symmetric window, and does not + # estimate fdr in + # windows having fewer than 100 genes + ## to use: first run samr and then pass the resulting fit + # object to + ## localfdr + ## NOTE: at most 20 of the perms are used to estimate the + # fdr (for speed sake) + # I try two window shapes: symmetric and an assymetric one + # currently I use the symmetric window to estimate the + # compute.localfdr + ngenes <- length(samr.obj$tt) + mingenes <- 50 + # perc is increased, in order to get at least mingenes in a + # window + perc <- max(perc, mingenes / length(samr.obj$tt)) + nperms.to.use <- min(20, ncol(samr.obj$ttstar)) + nperms <- ncol(samr.obj$ttstar) + d <- seq(sort(samr.obj$tt)[1], sort(samr.obj$tt)[ngenes], + length = 100 + ) + ndscore <- length(d) + dvector <- rep(NA, ndscore) + ind.foldchange <- rep(TRUE, length(samr.obj$tt)) + if (!is.null(samr.obj$foldchange[1]) & min.foldchange > 0) { + ind.foldchange <- (samr.obj$foldchange >= min.foldchange) | + (samr.obj$foldchange <= min.foldchange) + } + fdr.temp <- function(temp, dlow, dup, pi0, ind.foldchange) { + return(sum(pi0 * (temp >= dlow & temp <= dup & ind.foldchange))) + } + for (i in 1:ndscore) { + pi0 <- samr.obj$pi0 + r <- sum(samr.obj$tt < d[i]) + r22 <- round(max(r - length(samr.obj$tt) * perc / 2, 1)) + dlow.sym <- sort(samr.obj$tt)[r22] + r22 <- min(r + length(samr.obj$tt) * perc / 2, length(samr.obj$tt)) + dup.sym <- sort(samr.obj$tt)[r22] + oo <- samr.obj$tt >= dlow.sym & samr.obj$tt <= dup.sym & + ind.foldchange + if (!is.null(samr.obj$foldchange[1]) & min.foldchange > + 0) { + temp <- as.vector(samr.obj$foldchange.star[, 1:nperms.to.use]) + ind.foldchange <- (temp >= min.foldchange) | (temp <= + min.foldchange) + } + temp <- samr.obj$ttstar0[, sample(1:nperms, size = nperms.to.use)] + fdr.sym <- median(apply( + temp, 2, fdr.temp, dlow.sym, + dup.sym, pi0, ind.foldchange + )) + fdr.sym <- 100 * fdr.sym / sum(oo) + dlow.sym <- dlow.sym + dup.sym <- dup.sym + dvector[i] <- fdr.sym + } + om <- !is.na(dvector) & (dvector != Inf) + aa <- smooth.spline(d[om], dvector[om], df = df) + return(list(smooth.object = aa, perc = perc, df = df)) +} + +predictlocalfdr <- function(smooth.object, d) { + yhat <- predict(smooth.object, d)$y + yhat <- pmin(yhat, 100) + yhat <- pmax(yhat, 0) + return(yhat) +} + +qvalue.func <- function(samr.obj, sig, delta.table) { + # returns q-value as a percentage (out of 100) + LARGE <- 1e+10 + qvalue.up <- rep(NA, length(sig$pup)) + o1 <- sig$pup + cutup <- delta.table[, 8] + FDR <- delta.table[, 5] + ii <- 0 + for (i in o1) { + o <- abs(cutup - samr.obj$tt[i]) + o[is.na(o)] <- LARGE + oo <- seq_len(length(o))[o == min(o)] + oo <- oo[length(oo)] + ii <- ii + 1 + qvalue.up[ii] <- FDR[oo] + } + qvalue.lo <- rep(NA, length(sig$plo)) + o2 <- sig$plo + cutlo <- delta.table[, 7] + ii <- 0 + for (i in o2) { + o <- abs(cutlo - samr.obj$tt[i]) + o[is.na(o)] <- LARGE + oo <- seq_len(length(o))[o == min(o)] + oo <- oo[length(oo)] + ii <- ii + 1 + qvalue.lo[ii] <- FDR[oo] + } + # any qvalues that are missing, are set to 1 (the highest + # value) + qvalue.lo[is.na(qvalue.lo)] <- 1 + qvalue.up[is.na(qvalue.up)] <- 1 + # ensure that each qvalue vector is monotone non-increasing + o1 <- order(samr.obj$tt[sig$plo]) + qv1 <- qvalue.lo[o1] + qv11 <- qv1 + if (length(qv1) > 1) { + for (i in 2:length(qv1)) { + if (qv11[i] < qv11[i - 1]) { + qv11[i] <- qv11[i - 1] + } + } + qv111 <- qv11 + qv111[o1] <- qv11 + } else { + qv111 <- qv1 + } + o2 <- order(samr.obj$tt[sig$pup]) + qv2 <- qvalue.up[o2] + qv22 <- qv2 + if (length(qv2) > 1) { + for (i in 2:length(qv2)) { + if (qv22[i] > qv22[i - 1]) { + qv22[i] <- qv22[i - 1] + } + } + qv222 <- qv22 + qv222[o2] <- qv22 + } else { + qv222 <- qv2 + } + return(list(qvalue.lo = 100 * qv111, qvalue.up = 100 * qv222)) +}