|
a |
|
b/NullModel.R |
|
|
1 |
NullModel <- |
|
|
2 |
function(x,type=c("mle","deseq","quantile")){ |
|
|
3 |
type <- match.arg(type) |
|
|
4 |
# x MUST be a n times p matrix - i.e. observations on the rows and features on the columns |
|
|
5 |
rowsum <- rowSums(x) |
|
|
6 |
colsum <- colSums(x) |
|
|
7 |
mle <- outer(rowsum, colsum, "*")/sum(rowsum) |
|
|
8 |
if(type=="mle"){ |
|
|
9 |
return(list(n=mle, sizes=rowSums(x)/sum(x))) |
|
|
10 |
} else if (type=="quantile"){ |
|
|
11 |
#This is quantile normalization idea of Bullard et al 2010 -- quantile-normalize using 75th quantile of observed counts for each sample, excluding zero-counts |
|
|
12 |
sample.qts <- apply(x,1,quantile,.75) |
|
|
13 |
sample.qts <- pmax(sample.qts, 1) # Don't wait to standardize by 0... min allowed is 1 |
|
|
14 |
sample.qts <- sample.qts/sum(sample.qts) |
|
|
15 |
fit <- outer(sample.qts,colsum,"*") |
|
|
16 |
return(list(n=fit, sizes=sample.qts)) |
|
|
17 |
} else if (type=="deseq"){ #Trying to implement idea from Anders and Huber Genome Biology 2010 paper. |
|
|
18 |
# I stole the next 3 lines from the DESeq bioconductor package.... it was in the function estimateSizeFactorsForMatrix |
|
|
19 |
counts <- t(x) |
|
|
20 |
geomeans <- exp(rowMeans(log(counts))) |
|
|
21 |
sizes <- apply(counts, 2, function(cnts) median((cnts/geomeans)[geomeans > 0])) |
|
|
22 |
rawsizestr <- sizes |
|
|
23 |
sizes <- sizes/sum(sizes) |
|
|
24 |
fit <- outer(sizes, colsum, "*") |
|
|
25 |
return(list(n=fit,sizes=sizes,geomeans=geomeans,rawsizestr=rawsizestr)) |
|
|
26 |
} |
|
|
27 |
} |
|
|
28 |
|