Switch to side-by-side view

--- a
+++ b/common_scripts/statistics/mixtureModel.R
@@ -0,0 +1,60 @@
+mixtureM.gene=function(gene, d.matrix, FIX=F){
+  library(mclust)
+  library(parallel)
+  dat=d.matrix[rownames(d.matrix)%in%gene,,drop=F]
+  model=Mclust(as.numeric(dat),2)
+  
+  # first test if we can find clear components:
+  class=model$classification
+  
+  class[class == 1] = -1 # background
+  class[class == 2] = 1 # expressed high
+  
+  for (j in 1:dim(dat)[2]) { # Adjust the classes.
+    if (dat[,j] < 4) {
+      class[j] = -1
+      next
+    }
+    if (model$uncertainty[j] > 0.1) {
+      class[j] = 0
+      next
+    }
+    if (dat[,j] > 10) {
+      class[j] = 1
+    }
+  }
+  
+  # in some cases background distribution does not exist (gene always expressed, -1 missing)
+  # or over 90% of the samples are uncertain
+  # in these cases use original model component for expressed - notexpressed
+  # These cases have only one distribution, try to identify if it is background distribution or highly expressed gene distribution
+  if(FIX){
+    
+    # this means, that only one distribution was found, not two:
+    if((sum(class==-1)==0|sum(class==0)>0.9*length(class))&!all(class==1)){
+      
+      # odds, how many samples have expression above/below 6
+      odds=sum(dat[class==0]>=6)/sum(class==0)
+      odds2=sum(dat[class==0]<6)/sum(class==0)
+      
+      # 60% of the uncertain have expression above 6
+      if(odds>0.6){
+        class[model$classification == 2] = 1
+      }
+      
+      # 60% of the uncertain have expression below 6
+      if(odds2>0.6){
+        class[model$classification == 1] = -1
+      }
+    }
+  }
+  
+  ret_class=data.matrix(t(class))
+  dimnames(ret_class)=dimnames(dat)
+  return(ret_class)
+}
+
+# data=t(get(load("/research/groups/sysgen/PROJECTS/HEMAP/HEMAP/dat2figs/data/data9544_with_gene_symbols.RData")))
+# d.matrix=data
+# profile=do.call(rbind, mclapply(rownames(data), mixtureM.gene, data, FIX=T, mc.cores=8))
+# save(profile, file="mixtureM_profile.Rdata")