a b/Codes(02.05.2015).R
1
#Calculation of quantile normalization factors necessary for calcNormFactorsGSD function. Same for both train and test sets.
2
calcFactorQuantileGSD =
3
  function (data, lib.size, p = 0.75) 
4
  {
5
    y <- t(t(data)/lib.size)
6
    f <- apply(y, 2, function(x) quantile(x, p = p))
7
  }
8
9
#Calculation of weighted normalization factors necessary for calcNormFactorsGSD function.
10
calcFactorWeightedGSD =
11
  function (obs, ref, libsize.obs = NULL, libsize.ref = NULL, logratioTrim = 0.3, 
12
            sumTrim = 0.05, doWeighting = TRUE, Acutoff = -1e+10) 
13
  {
14
    if (all(obs == ref)) 
15
      return(1)
16
    obs <- as.numeric(obs)
17
    ref <- as.numeric(ref)
18
    if (is.null(libsize.obs)) 
19
      nO <- sum(obs)
20
    else nO <- libsize.obs
21
    if (is.null(libsize.ref)) 
22
      nR <- sum(ref)
23
    else nR <- libsize.ref
24
    logR <- log2((obs/nO)/(ref/nR))
25
    absE <- (log2(obs/nO) + log2(ref/nR))/2
26
    v <- (nO - obs)/nO/obs + (nR - ref)/nR/ref
27
    fin <- is.finite(logR) & is.finite(absE) & (absE > Acutoff)
28
    logR <- logR[fin]
29
    absE <- absE[fin]
30
    v <- v[fin]
31
    n <- sum(fin)
32
    loL <- floor(n * logratioTrim) + 1
33
    hiL <- n + 1 - loL
34
    loS <- floor(n * sumTrim) + 1
35
    hiS <- n + 1 - loS
36
    keep <- (rank(logR) >= loL & rank(logR) <= hiL) & (rank(absE) >= loS & rank(absE) <= hiS)
37
    if (doWeighting) 
38
      2^(sum(logR[keep]/v[keep], na.rm = TRUE)/sum(1/v[keep],na.rm = TRUE))
39
    else 2^(mean(logR[keep], na.rm = TRUE))
40
  }
41
42
#Calculation of deseq normalization factors necessary for calcNormFactorsGSD function.
43
calcFactorRLEGSD =
44
  function (data.train, data.test, lib.size, lib.size.test) 
45
  {
46
    gm <- exp(rowMeans(log(data.train)))
47
    f = apply(data.train, 2, function(u) median((u/gm)[gm > 0]))
48
    f.test = apply(data.test, 2, function(u) median((u/gm)[gm > 0]))
49
    f = f / lib.size
50
    f.test = f.test / lib.size.test
51
    deseqsizefactors = list(f,f.test)
52
    return(deseqsizefactors)
53
  }
54
55
#Calculation of normalization factors using calcNormFactorsGSD function.
56
calcNormFactorsGSD =
57
  function (data.train, data.test, lib.size = NULL, method = c("TMM", "deseq", "none"), refColumn = NULL, logratioTrim = 0.3, sumTrim = 0.05, 
58
            doWeighting = TRUE, Acutoff = -1e+10, p = 0.75, ...) 
59
  {
60
    x <- as.matrix(data.train)
61
    xtest <- as.matrix(data.test)
62
    if (any(is.na(x)||is.na(xtest)))
63
      stop("NAs not permitted")
64
    if (is.null(lib.size)) 
65
      lib.size <- colSums(x)
66
      
67
    lib.size.test <- colSums(xtest)
68
    method <- match.arg(method)
69
    allzero <- rowSums(x > 0) == 0
70
    if (any(allzero)) 
71
      x <- x[!allzero, , drop = FALSE]
72
      xtest <- xtest[!allzero, , drop = FALSE]
73
    if (nrow(x) == 0 || ncol(x) == 1) 
74
      method = "none"
75
    
76
    if(method == "TMM"){
77
      f75 <- calcFactorQuantileGSD(data = x, lib.size = lib.size, p = 0.75)
78
      f75.test <- calcFactorQuantileGSD(data = xtest, lib.size = lib.size.test, p = 0.75)
79
      
80
      refColumn <- which.min(abs(f75 - mean(f75)))
81
      f <- rep(NA, ncol(x))
82
      f.test <- rep(NA, ncol(xtest))
83
      for (i in 1:ncol(x)) f[i] <- calcFactorWeightedGSD(obs = x[,i], ref = x[, refColumn], libsize.obs = lib.size[i], 
84
                                                         libsize.ref = lib.size[refColumn], logratioTrim = logratioTrim, 
85
                                                         sumTrim = sumTrim, doWeighting = doWeighting, Acutoff = Acutoff)
86
      for (i in 1:ncol(xtest)) f.test[i] <- calcFactorWeightedGSD(obs = xtest[,i], ref = x[, refColumn], libsize.obs = lib.size.test[i], 
87
                                                                  libsize.ref = lib.size[refColumn], logratioTrim = logratioTrim, 
88
                                                                  sumTrim = sumTrim, doWeighting = doWeighting, Acutoff = Acutoff)
89
      normf = list(f,f.test)
90
    } 
91
    else if(method == "deseq"){
92
      normf = calcFactorRLEGSD(data.train = x, data.test = xtest, lib.size = lib.size, lib.size.test = lib.size.test)#/lib.size 
93
    }
94
    else {
95
      normf = list(rep(1, ncol(x)), rep(1, ncol(xtest)))
96
    }
97
    
98
    names(normf) = c("train", "test")
99
    
100
    f = as.numeric(normf[[1]]) / (exp(mean(log(normf[[1]]))))
101
    f.test = as.numeric(normf[[2]]) / (exp(mean(log(normf[[1]]))))
102
    normf2 = list(f, f.test, lib.size, lib.size.test)
103
    names(normf2) = c("TrainNormFactor","TestNormFactor","TrainLibSize","TestLibSize")
104
    return(normf2)
105
  }
106
107
#traindatayı, testdatayı ve trainclassı alacak, method = TMM, RLE, none olacak.
108
#train ve test icin voom gibi expression ve weights döndürecek.
109
voomGSD = 
110
  function(data.train, data.test, group, norm.method = c("TMM", "deseq", "none"), design = NULL, lib.size = NULL, span = 0.5)
111
  {
112
    out <- list()
113
    NormFactors = calcNormFactorsGSD(data.train = data.train, data.test = data.test, method = norm.method)
114
    TrainNormFactor = NormFactors$TrainNormFactor
115
    TestNormFactor = NormFactors$TestNormFactor
116
    TrainLibSize = NormFactors$TrainLibSize
117
    TestLibSize = NormFactors$TestLibSize
118
    lib.size.tr = TrainNormFactor * TrainLibSize
119
    lib.size.ts = TestNormFactor * TestLibSize
120
    
121
    design.tr = model.matrix(~group)
122
    rownames(design.tr) = colnames(data.train)
123
    
124
    design.ts <- matrix(1, ncol(data.test), 1)
125
    rownames(design.ts) <- colnames(data.test)
126
    colnames(design.ts) <- "GrandMean"
127
    
128
    y.tr <- t(log2(t(data.train + 0.5)/(lib.size.tr + 1) * 1e+06))
129
    y.ts <- t(log2(t(data.test + 0.5)/(lib.size.ts + 1) * 1e+06))
130
    fit.tr <- lmFit(y.tr, design.tr)
131
    fit.ts <- lmFit(y.ts, design.ts)
132
133
    
134
    if (is.null(fit.tr$Amean)) 
135
      fit$Amean <- rowMeans(y.tr, na.rm = TRUE)
136
    
137
    fit.ts$Amean = fit.tr$Amean
138
    fit.ts$sigma = fit.tr$sigma
139
    fit.ts$coefficients = fit.tr$coefficients[,1]
140
    
141
    sx <- fit.tr$Amean + mean(log2(lib.size.tr + 1)) - log2(1e+06)
142
    sy <- sqrt(fit.tr$sigma)
143
    l <- lowess(sx, sy, f = span)
144
    f <- approxfun(l, rule = 2)
145
146
    fitted.values.tr <- fit.tr$coefficients %*% t(fit.tr$design)
147
    fitted.values.ts <- fit.ts$coefficients %*% t(fit.ts$design)
148
    fitted.cpm.tr <- 2^fitted.values.tr
149
    fitted.cpm.ts <- 2^fitted.values.ts
150
    fitted.count.tr <- 1e-06 * t(t(fitted.cpm.tr) * (lib.size.tr + 1))
151
    fitted.count.ts <- 1e-06 * t(t(fitted.cpm.ts) * (lib.size.ts + 1))
152
    fitted.logcount.tr <- log2(fitted.count.tr)
153
    fitted.logcount.ts <- log2(fitted.count.ts)
154
    w.tr <- 1/f(fitted.logcount.tr)^4
155
    w.ts <- 1/f(fitted.logcount.ts)^4
156
    dim(w.tr) <- dim(fitted.logcount.tr)
157
    dim(w.ts) <- dim(fitted.logcount.ts)
158
    dimnames(w.tr) = dimnames(y.tr)
159
    dimnames(w.ts) = dimnames(y.ts)
160
    out$TrainExp <- y.tr
161
    out$TestExp <- y.ts
162
    out$TrainWeights <- w.tr
163
    out$TestWeights <- w.ts
164
    new("EList", out)
165
  }