|
a |
|
b/Codes(02.05.2015).R |
|
|
1 |
#Calculation of quantile normalization factors necessary for calcNormFactorsGSD function. Same for both train and test sets. |
|
|
2 |
calcFactorQuantileGSD = |
|
|
3 |
function (data, lib.size, p = 0.75) |
|
|
4 |
{ |
|
|
5 |
y <- t(t(data)/lib.size) |
|
|
6 |
f <- apply(y, 2, function(x) quantile(x, p = p)) |
|
|
7 |
} |
|
|
8 |
|
|
|
9 |
#Calculation of weighted normalization factors necessary for calcNormFactorsGSD function. |
|
|
10 |
calcFactorWeightedGSD = |
|
|
11 |
function (obs, ref, libsize.obs = NULL, libsize.ref = NULL, logratioTrim = 0.3, |
|
|
12 |
sumTrim = 0.05, doWeighting = TRUE, Acutoff = -1e+10) |
|
|
13 |
{ |
|
|
14 |
if (all(obs == ref)) |
|
|
15 |
return(1) |
|
|
16 |
obs <- as.numeric(obs) |
|
|
17 |
ref <- as.numeric(ref) |
|
|
18 |
if (is.null(libsize.obs)) |
|
|
19 |
nO <- sum(obs) |
|
|
20 |
else nO <- libsize.obs |
|
|
21 |
if (is.null(libsize.ref)) |
|
|
22 |
nR <- sum(ref) |
|
|
23 |
else nR <- libsize.ref |
|
|
24 |
logR <- log2((obs/nO)/(ref/nR)) |
|
|
25 |
absE <- (log2(obs/nO) + log2(ref/nR))/2 |
|
|
26 |
v <- (nO - obs)/nO/obs + (nR - ref)/nR/ref |
|
|
27 |
fin <- is.finite(logR) & is.finite(absE) & (absE > Acutoff) |
|
|
28 |
logR <- logR[fin] |
|
|
29 |
absE <- absE[fin] |
|
|
30 |
v <- v[fin] |
|
|
31 |
n <- sum(fin) |
|
|
32 |
loL <- floor(n * logratioTrim) + 1 |
|
|
33 |
hiL <- n + 1 - loL |
|
|
34 |
loS <- floor(n * sumTrim) + 1 |
|
|
35 |
hiS <- n + 1 - loS |
|
|
36 |
keep <- (rank(logR) >= loL & rank(logR) <= hiL) & (rank(absE) >= loS & rank(absE) <= hiS) |
|
|
37 |
if (doWeighting) |
|
|
38 |
2^(sum(logR[keep]/v[keep], na.rm = TRUE)/sum(1/v[keep],na.rm = TRUE)) |
|
|
39 |
else 2^(mean(logR[keep], na.rm = TRUE)) |
|
|
40 |
} |
|
|
41 |
|
|
|
42 |
#Calculation of deseq normalization factors necessary for calcNormFactorsGSD function. |
|
|
43 |
calcFactorRLEGSD = |
|
|
44 |
function (data.train, data.test, lib.size, lib.size.test) |
|
|
45 |
{ |
|
|
46 |
gm <- exp(rowMeans(log(data.train))) |
|
|
47 |
f = apply(data.train, 2, function(u) median((u/gm)[gm > 0])) |
|
|
48 |
f.test = apply(data.test, 2, function(u) median((u/gm)[gm > 0])) |
|
|
49 |
f = f / lib.size |
|
|
50 |
f.test = f.test / lib.size.test |
|
|
51 |
deseqsizefactors = list(f,f.test) |
|
|
52 |
return(deseqsizefactors) |
|
|
53 |
} |
|
|
54 |
|
|
|
55 |
#Calculation of normalization factors using calcNormFactorsGSD function. |
|
|
56 |
calcNormFactorsGSD = |
|
|
57 |
function (data.train, data.test, lib.size = NULL, method = c("TMM", "deseq", "none"), refColumn = NULL, logratioTrim = 0.3, sumTrim = 0.05, |
|
|
58 |
doWeighting = TRUE, Acutoff = -1e+10, p = 0.75, ...) |
|
|
59 |
{ |
|
|
60 |
x <- as.matrix(data.train) |
|
|
61 |
xtest <- as.matrix(data.test) |
|
|
62 |
if (any(is.na(x)||is.na(xtest))) |
|
|
63 |
stop("NAs not permitted") |
|
|
64 |
if (is.null(lib.size)) |
|
|
65 |
lib.size <- colSums(x) |
|
|
66 |
|
|
|
67 |
lib.size.test <- colSums(xtest) |
|
|
68 |
method <- match.arg(method) |
|
|
69 |
allzero <- rowSums(x > 0) == 0 |
|
|
70 |
if (any(allzero)) |
|
|
71 |
x <- x[!allzero, , drop = FALSE] |
|
|
72 |
xtest <- xtest[!allzero, , drop = FALSE] |
|
|
73 |
if (nrow(x) == 0 || ncol(x) == 1) |
|
|
74 |
method = "none" |
|
|
75 |
|
|
|
76 |
if(method == "TMM"){ |
|
|
77 |
f75 <- calcFactorQuantileGSD(data = x, lib.size = lib.size, p = 0.75) |
|
|
78 |
f75.test <- calcFactorQuantileGSD(data = xtest, lib.size = lib.size.test, p = 0.75) |
|
|
79 |
|
|
|
80 |
refColumn <- which.min(abs(f75 - mean(f75))) |
|
|
81 |
f <- rep(NA, ncol(x)) |
|
|
82 |
f.test <- rep(NA, ncol(xtest)) |
|
|
83 |
for (i in 1:ncol(x)) f[i] <- calcFactorWeightedGSD(obs = x[,i], ref = x[, refColumn], libsize.obs = lib.size[i], |
|
|
84 |
libsize.ref = lib.size[refColumn], logratioTrim = logratioTrim, |
|
|
85 |
sumTrim = sumTrim, doWeighting = doWeighting, Acutoff = Acutoff) |
|
|
86 |
for (i in 1:ncol(xtest)) f.test[i] <- calcFactorWeightedGSD(obs = xtest[,i], ref = x[, refColumn], libsize.obs = lib.size.test[i], |
|
|
87 |
libsize.ref = lib.size[refColumn], logratioTrim = logratioTrim, |
|
|
88 |
sumTrim = sumTrim, doWeighting = doWeighting, Acutoff = Acutoff) |
|
|
89 |
normf = list(f,f.test) |
|
|
90 |
} |
|
|
91 |
else if(method == "deseq"){ |
|
|
92 |
normf = calcFactorRLEGSD(data.train = x, data.test = xtest, lib.size = lib.size, lib.size.test = lib.size.test)#/lib.size |
|
|
93 |
} |
|
|
94 |
else { |
|
|
95 |
normf = list(rep(1, ncol(x)), rep(1, ncol(xtest))) |
|
|
96 |
} |
|
|
97 |
|
|
|
98 |
names(normf) = c("train", "test") |
|
|
99 |
|
|
|
100 |
f = as.numeric(normf[[1]]) / (exp(mean(log(normf[[1]])))) |
|
|
101 |
f.test = as.numeric(normf[[2]]) / (exp(mean(log(normf[[1]])))) |
|
|
102 |
normf2 = list(f, f.test, lib.size, lib.size.test) |
|
|
103 |
names(normf2) = c("TrainNormFactor","TestNormFactor","TrainLibSize","TestLibSize") |
|
|
104 |
return(normf2) |
|
|
105 |
} |
|
|
106 |
|
|
|
107 |
#traindatayı, testdatayı ve trainclassı alacak, method = TMM, RLE, none olacak. |
|
|
108 |
#train ve test icin voom gibi expression ve weights döndürecek. |
|
|
109 |
voomGSD = |
|
|
110 |
function(data.train, data.test, group, norm.method = c("TMM", "deseq", "none"), design = NULL, lib.size = NULL, span = 0.5) |
|
|
111 |
{ |
|
|
112 |
out <- list() |
|
|
113 |
NormFactors = calcNormFactorsGSD(data.train = data.train, data.test = data.test, method = norm.method) |
|
|
114 |
TrainNormFactor = NormFactors$TrainNormFactor |
|
|
115 |
TestNormFactor = NormFactors$TestNormFactor |
|
|
116 |
TrainLibSize = NormFactors$TrainLibSize |
|
|
117 |
TestLibSize = NormFactors$TestLibSize |
|
|
118 |
lib.size.tr = TrainNormFactor * TrainLibSize |
|
|
119 |
lib.size.ts = TestNormFactor * TestLibSize |
|
|
120 |
|
|
|
121 |
design.tr = model.matrix(~group) |
|
|
122 |
rownames(design.tr) = colnames(data.train) |
|
|
123 |
|
|
|
124 |
design.ts <- matrix(1, ncol(data.test), 1) |
|
|
125 |
rownames(design.ts) <- colnames(data.test) |
|
|
126 |
colnames(design.ts) <- "GrandMean" |
|
|
127 |
|
|
|
128 |
y.tr <- t(log2(t(data.train + 0.5)/(lib.size.tr + 1) * 1e+06)) |
|
|
129 |
y.ts <- t(log2(t(data.test + 0.5)/(lib.size.ts + 1) * 1e+06)) |
|
|
130 |
fit.tr <- lmFit(y.tr, design.tr) |
|
|
131 |
fit.ts <- lmFit(y.ts, design.ts) |
|
|
132 |
|
|
|
133 |
|
|
|
134 |
if (is.null(fit.tr$Amean)) |
|
|
135 |
fit$Amean <- rowMeans(y.tr, na.rm = TRUE) |
|
|
136 |
|
|
|
137 |
fit.ts$Amean = fit.tr$Amean |
|
|
138 |
fit.ts$sigma = fit.tr$sigma |
|
|
139 |
fit.ts$coefficients = fit.tr$coefficients[,1] |
|
|
140 |
|
|
|
141 |
sx <- fit.tr$Amean + mean(log2(lib.size.tr + 1)) - log2(1e+06) |
|
|
142 |
sy <- sqrt(fit.tr$sigma) |
|
|
143 |
l <- lowess(sx, sy, f = span) |
|
|
144 |
f <- approxfun(l, rule = 2) |
|
|
145 |
|
|
|
146 |
fitted.values.tr <- fit.tr$coefficients %*% t(fit.tr$design) |
|
|
147 |
fitted.values.ts <- fit.ts$coefficients %*% t(fit.ts$design) |
|
|
148 |
fitted.cpm.tr <- 2^fitted.values.tr |
|
|
149 |
fitted.cpm.ts <- 2^fitted.values.ts |
|
|
150 |
fitted.count.tr <- 1e-06 * t(t(fitted.cpm.tr) * (lib.size.tr + 1)) |
|
|
151 |
fitted.count.ts <- 1e-06 * t(t(fitted.cpm.ts) * (lib.size.ts + 1)) |
|
|
152 |
fitted.logcount.tr <- log2(fitted.count.tr) |
|
|
153 |
fitted.logcount.ts <- log2(fitted.count.ts) |
|
|
154 |
w.tr <- 1/f(fitted.logcount.tr)^4 |
|
|
155 |
w.ts <- 1/f(fitted.logcount.ts)^4 |
|
|
156 |
dim(w.tr) <- dim(fitted.logcount.tr) |
|
|
157 |
dim(w.ts) <- dim(fitted.logcount.ts) |
|
|
158 |
dimnames(w.tr) = dimnames(y.tr) |
|
|
159 |
dimnames(w.ts) = dimnames(y.ts) |
|
|
160 |
out$TrainExp <- y.tr |
|
|
161 |
out$TestExp <- y.ts |
|
|
162 |
out$TrainWeights <- w.tr |
|
|
163 |
out$TestWeights <- w.ts |
|
|
164 |
new("EList", out) |
|
|
165 |
} |