Diff of /NullModel.R [000000] .. [81de4e]

Switch to side-by-side view

--- a
+++ b/NullModel.R
@@ -0,0 +1,28 @@
+NullModel <-
+function(x,type=c("mle","deseq","quantile")){
+  type <- match.arg(type)
+  # x MUST be a n times p matrix - i.e. observations on the rows and features on the columns
+  rowsum <- rowSums(x)
+  colsum <- colSums(x)
+  mle <- outer(rowsum, colsum, "*")/sum(rowsum)
+  if(type=="mle"){
+    return(list(n=mle, sizes=rowSums(x)/sum(x)))
+  } else if (type=="quantile"){
+    #This is quantile normalization idea of Bullard et al 2010 -- quantile-normalize using 75th quantile of observed counts for each sample, excluding zero-counts
+    sample.qts <- apply(x,1,quantile,.75)
+    sample.qts <- pmax(sample.qts, 1) # Don't wait to standardize by 0... min allowed is 1
+    sample.qts <- sample.qts/sum(sample.qts)
+    fit <- outer(sample.qts,colsum,"*")
+    return(list(n=fit, sizes=sample.qts))
+  } else if (type=="deseq"){ #Trying to implement idea from Anders and Huber Genome Biology 2010 paper.
+    # I stole the next 3 lines from the DESeq bioconductor package.... it was in the function estimateSizeFactorsForMatrix
+    counts <- t(x)
+    geomeans <- exp(rowMeans(log(counts)))
+    sizes <- apply(counts, 2, function(cnts) median((cnts/geomeans)[geomeans > 0]))
+    rawsizestr <- sizes
+    sizes <- sizes/sum(sizes)
+    fit <- outer(sizes, colsum, "*")
+    return(list(n=fit,sizes=sizes,geomeans=geomeans,rawsizestr=rawsizestr))
+  } 
+}
+