Diff of /voomNSC.train.R [000000] .. [81de4e]

Switch to unified view

a b/voomNSC.train.R
1
voomNSC.train =
2
  function (counts, conditions, n.threshold = 30, offset.percent = 50, remove.zeros = TRUE,
3
            normalization = c("TMM", "deseq", "none")) 
4
  {
5
    normalization = match.arg(normalization)
6
    this.call = match.call()
7
    
8
#### voom transformation steps
9
    {
10
      if (normalization == "TMM"){
11
        design = model.matrix(~conditions)
12
        rownames(design) = colnames(counts)
13
        dge = DGEList(counts = counts)
14
        dge = calcNormFactors(dge, method = "TMM")
15
        vm = voom(dge, design, plot=F)
16
        x = vm $ E #pxn dim gene expression matrix
17
        w = vm $ weights #pxn dim weight matrix
18
        dimnames(w) = dimnames(x)
19
      } 
20
      else if (normalization == "deseq"){
21
        design = model.matrix(~conditions)
22
        rownames(design) = colnames(counts)
23
        dge = DGEList(counts = counts)
24
        dge = calcNormFactors(dge, method = "RLE")
25
        vm = voom(dge, design, plot=F)
26
        x = vm $ E #pxn dim gene expression matrix
27
        w = vm $ weights #pxn dim weight matrix
28
        dimnames(w) = dimnames(x)
29
      }
30
      else {
31
        design = model.matrix(~conditions)
32
        rownames(design) = colnames(counts)
33
        vm = voom(counts, design, plot=F)
34
        x = vm $ E #pxn dim gene expression matrix
35
        w = vm $ weights #pxn dim weight matrix
36
        dimnames(w) = dimnames(x)
37
      }
38
    }
39
40
    y = as.factor(conditions)
41
    n.class = table(y)
42
    prior = n.class/length(y)
43
    
44
# soft.shrink(...)
45
soft.shrink = 
46
  function (delta, threshold) 
47
  {
48
    dif = abs(delta) - threshold
49
    delta = sign(delta) * dif * (dif > 0)
50
    nonzero = sum(drop((dif > 0) %*% rep(1, ncol(delta))) > 0)
51
    attr(delta, "nonzero") = nonzero
52
    delta
53
  }
54
55
# diag.disc(...)
56
diag.disc =
57
  function (x, centroids, prior, weight) 
58
  {
59
    if (!missing(weight)) {
60
      posid = (weight > 0)
61
      if (any(posid)) {
62
        weight = sqrt(weight[posid])
63
        centroids = centroids[posid, , drop = FALSE] * weight
64
        x = x[posid, , drop = FALSE] * weight
65
      }
66
      else {
67
        mat = outer(rep(1, ncol(x)), log(prior), "*")
68
        dimnames(mat) = list(NULL, dimnames(centroids)[[2]])
69
        return(mat)
70
      }
71
    }
72
    dd = t(x) %*% centroids
73
    dd0 = drop(rep(1, nrow(centroids)) %*% (centroids^2))/2 - 
74
      log(prior)
75
    names(dd0) = NULL
76
    scale(dd, dd0, FALSE)
77
  }
78
79
# safe.exp(...)
80
safe.exp = function (x) 
81
{
82
  xx = sign(x) * pmin(abs(x), 500)
83
  return(exp(xx))
84
}
85
86
#soft.max(...)
87
softmax =
88
  function (x, gap = FALSE) 
89
  {
90
    d = dim(x)
91
    maxdist = x[, 1]
92
    pclass = rep(1, d[1])
93
    for (i in seq(2, d[2])) {
94
      l = x[, i] > maxdist
95
      pclass[l] = i
96
      maxdist[l] = x[l, i]
97
    }
98
    dd = dimnames(x)[[2]]
99
    if (gap) {
100
      x = abs(maxdist - x)
101
      x[cbind(seq(d[1]), pclass)] = drop(x %*% rep(1, d[2]))
102
      gaps = do.call("pmin", data.frame(x))
103
    }
104
    pclass = if (is.null(dd) || !length(dd)) 
105
      pclass
106
    else factor(pclass, levels = seq(d[2]), labels = dd)
107
    if (gap) 
108
      list(class = pclass, gaps = gaps)
109
    else pclass
110
  }
111
112
## wnsc(....)
113
wnsc =
114
  function (x, y, n.threshold = 30, offset.percent = 50, prior = NULL, remove.zeros = TRUE, weights = NULL) 
115
  {
116
    selected.genes = selected.genesIndex = list()
117
    this.call = match.call()
118
    Y = model.matrix(~factor(y) - 1, data = list(y = y))
119
    
120
    xtest = x
121
    ytest = y
122
    
123
    wStats = weighted.stats(x, weights, y)
124
    
125
if (min(n.class) == 1) {
126
      stop(warning("Warning: a class contains only 1 sample"))
127
    }
128
    
129
    n = sum(n.class)
130
    ntest = ncol(xtest)
131
    K = length(prior)
132
    p = nrow(x)
133
    dimnames(Y) = list(NULL, names(n.class))
134
    centroids = wStats$weightedMean.C
135
    sd = wStats$weightedSD.pooled
136
    offset = quantile(sd, offset.percent/100)
137
    sd = sd + offset
138
    centroid.overall = wStats$weightedMean
139
    se.scale = wStats$se.scale
140
    delta = wStats$delta
141
    threshold = seq(0, max(abs(delta)), length = n.threshold)
142
    nonzero = seq(n.threshold)
143
    errors = threshold
144
    pred.conditions = as.list(seq(n.threshold))
145
    prob = array(0, c(ntest, K, n.threshold))
146
    
147
    dshrunkAll <- list()
148
    for (ii in 1:n.threshold) {
149
      
150
      delta.shrunk = soft.shrink(delta, threshold[ii])
151
      delta.shrunk = t(t(delta.shrunk) * as.numeric(se.scale))
152
      dshrunkAll[[ii]] <- delta.shrunk
153
      
154
      nonzero[ii] = attr(delta.shrunk, "nonzero")
155
      posid = drop(abs(delta.shrunk) %*% rep(1, K)) > 0
156
      selected.genes[[ii]] = rownames(wStats$weightedMean.C)[posid]
157
      selected.genesIndex[[ii]] = which(posid == TRUE)
158
      dd = diag.disc((xtest - centroid.overall)/sd, delta.shrunk, 
159
                      prior, weight = posid)
160
      pred.conditions[[ii]] = softmax(dd)
161
      dd = safe.exp(dd)
162
      prob[, , ii] = dd/drop(dd %*% rep(1, K))
163
      if (!is.null(ytest)) {
164
        errors[ii] = sum(pred.conditions[[ii]] != ytest)
165
      }
166
    }
167
    
168
    thresh.names = format(round(threshold, 3))
169
    names(pred.conditions) = names(selected.genes) = thresh.names
170
    attr(pred.conditions, "row.names") = paste(seq(ntest))
171
    class(pred.conditions) = "data.frame"
172
    
173
    if (remove.zeros) 
174
      n.threshold = match(0, nonzero, n.threshold)
175
    
176
    dimnames(prob) = list(paste(seq(ntest)), names(n.class), 
177
                           thresh.names)
178
    
179
    object = list(counts = counts, conditions = factor(conditions), pred.conditions = pred.conditions, prob = prob[, , seq(n.threshold)], 
180
                  weightedMean.C = centroids, weightedMean = centroid.overall, delta = delta, normalization = normalization,
181
                  weightedSD.pooled = sd, threshold = threshold[seq(n.threshold)], nonzero = nonzero[seq(n.threshold)], 
182
                   se.scale = se.scale, call = this.call, prior = prior, offset = offset, SelectedGenes = selected.genes, 
183
                   SelectedGenesIndex = selected.genesIndex)
184
    
185
    object$errors = errors
186
    
187
    opt.threshold = max(object$threshold[which(object$errors == min(object$errors))])
188
    
189
    model.res = as.data.frame(cbind(object$threshold, object$nonzero, object$errors))
190
    colnames(model.res) = c("threshold", "nonzero", "errors")
191
    object$modelRes = model.res
192
    
193
    object$delta.shrunk <- dshrunkAll[[which(object$threshold == opt.threshold)]]
194
    #min.error = model.res[model.res$errors == min(model.res$errors),]
195
    #min.error.gene = min.error[min.error$nonzero == min(min.error$nonzero),]
196
    #opt.threshold = min.error.gene$threshold
197
    
198
    object$opt.threshold = opt.threshold
199
    object
200
  }
201
202
    junk = wnsc(x, y = conditions, offset.percent = offset.percent, n.threshold = n.threshold, prior = prior,
203
                remove.zeros = remove.zeros, weights = w)
204
    
205
    junk$call = this.call
206
    class(junk) = "pamrtrained"
207
    junk
208
  }