integration_analysis_scr / Git / Diff of /integration

Models:

AlyssaS/

integration_analysis_scr

Downloads: 1

Diff of /integration_unsupervised.R [000000] .. [91a5d7]

Switch to unified view

 b/integration_unsupervised.R
+##########################################################################
+### Script to perform integrative Clustering of several omics datasets ###
+##########################################################################
+# get options
+library("optparse")
+option_list = list(
+  make_option(c("-d", "--DNA"), type="character", default=NULL, help="file with mutation data [default= %default]", metavar="character"),
+  make_option(c("-r", "--RNA"), type="character", default=NULL, help="file with expression data [default= %default]", metavar="character"),
+  make_option(c("-k", "--Kmin"), type="numeric", default=2, help="minimum number of clusters [default= %default]", metavar="numeric"),
+  make_option(c("-K", "--Kmax"), type="numeric", default=6, help="maximum number of clusters [default= %default]", metavar="numeric"),
+  make_option(c("-m", "--methylation"), type="character", default=NULL, help="file with methylation data [default= %default]", metavar="character"),
+  make_option(c("-C", "--CNV"), type="character", default=NULL, help="file with copy number data [default= %default]", metavar="character"),
+  make_option(c("-o", "--out"), type="character", default="out", help="output directory name [default= %default]", metavar="character"),
+  make_option(c("-c", "--cores"), type="numeric", default=1, help="number of cores for statistical computation [default= %default]", metavar="numeric")
+);
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+Kmin  = as.numeric(opt$Kmin)
+Kmax  = as.numeric(opt$Kmax)
+cores = as.numeric(opt$cores)
+require(iClusterPlus)
+require(gplots)
+require(lattice)
+# read data
+Ddata = NULL
+Cdata = NULL
+Rdata = NULL
+Mdata = NULL
+dtl = vector("list",4)
+ntypes = 0
+type = c()
+row.order = c()
+plot.chr  = c()
+sparse    = c()
+cap       = c()
+if(!is.null(opt$DNA)){
+  ntypes = ntypes +1
+  Ddata = read.table(opt$DNA,h=T,row.names = 1)
+  dtl[[ntypes]] = t(Ddata)
+  type = c(type,"binomial")
+  row.order = c(row.order,T)
+  plot.chr  = c(plot.chr,F)
+  sparse    = c(sparse,T)
+  cap       = c(cap,T)
+}
+if(!is.null(opt$CNV)){
+  ntypes = ntypes +1
+  dtl[[ntypes]] = t(Cdata)
+  type = c(type,"gaussian")
+  row.order = c(row.order,T)
+  plot.chr  = c(plot.chr,T)
+  sparse    = c(sparse,T)
+  cap       = c(cap,T)
+}
+if(!is.null(opt$RNA)){
+  ntypes = ntypes +1
+  Rdata = read.table(opt$RNA,h=T,row.names = 1,check.names = T)
+  dtl[[ntypes]] = t(Rdata)
+  type = c(type,"gaussian")
+  row.order = c(row.order,T)
+  plot.chr  = c(plot.chr,F)
+  sparse    = c(sparse,T)
+  cap       = c(cap,T)
+}
+if(!is.null(opt$methylation)){
+  ntypes = ntypes +1
+  Mdata = read.table(opt$methylation,h=T,row.names = 1)
+  dtl[[ntypes]] = t(Mdata)
+  type = c(type,"gaussian")
+  row.order = c(row.order,T)
+  plot.chr  = c(plot.chr,F)
+  sparse    = c(sparse,T)
+  cap       = c(cap,T)
+}
+print(dtl)
+print(type)
+# tune lambda
+cv.fitl = vector("list",Kmax-Kmin+1)
+for(k in (Kmin:Kmax-1) ){
+  cv.fitl[[k]] = tune.iClusterPlus(cpus=cores,dt1=dtl[[1]],dt2=dtl[[2]],dt3=dtl[[3]],dt4=dtl[[4]], type=type, n.lambda=NULL,K=k,maxiter=20)
+  cv.fittmp = cv.fitl[[k]]
+  save(cv.fittmp, file=paste(opt$out,"cv.fit.K",k+1,".Rdata",sep=""))
+}
+warnings()
+nLambda = nrow(cv.fitl[[1]]$lambda)
+nK = length(cv.fitl)
+BIC = getBIC(cv.fitl)
+devR = getDevR(cv.fitl)
+minBICid = apply(BIC,2,which.min)
+devRatMinBIC = rep(NA,nK)
+for(i in 1:nK){
+  devRatMinBIC[i] = devR[minBICid[i],i]
+}
+#best.fitl=sapply( Kmin:Kmax-1 , function(k) cv.fitl[[k]]$fit[[which.min(BIC[,k])]] )
+clusters=getClusters(cv.fitl)
+colnames(clusters)=paste("K=",Kmin:Kmax,sep="")
+clusters2 = clusters
+k = Kmin
+while(k<Kmax){
+  print(k)
+  clusters2 = clusters
+  for(i in 1:max(clusters[,k-1]) ){
+    print(i)
+    tatmp = table(clusters[clusters[,k-1]==i,k])
+    newid = as.numeric(names(tatmp)[which.max(tatmp)])
+    clusters2[clusters[,k]==newid,k] = i
+    clusters2[clusters[,k]==i,k] = newid
+    print(clusters2)
+    clusters = clusters2
+  }
+  k = k+1
+}
+svg(paste(opt$out,"iClusters.svg",sep=""),h=6,w=8*2)
+par(family="Times",mfrow=c(1,2))
+plot(Kmin:Kmax,devRatMinBIC,type="b",xlab=expression(italic(K)),ylab=expression("Pseudo "~italic(R^2)),las=1)
+image(1:nrow(clusters),Kmin:Kmax,clusters,col=rainbow(Kmax),axes=F,xlab="",ylab=expression(italic(K)),las=2 )
+axis(1,at = 1:nrow(clusters),labels = rownames(dtl[[1]]) ,las=2)
+axis(2,at = Kmin:Kmax,las=1)
+dev.off()
+# plot heatmap and write features
+features = lapply(dtl,colnames)
+for(k in (Kmin:Kmax-1) ){
+  print(k)
+  best.cluster=clusters[,k]
+  print(best.cluster)
+  best.fit=cv.fitl[[k]]$fit[[which.min(BIC[,k])]]
+  print(best.fit)
+  col.scheme = alist()
+  col.scheme[[1]] = bluered(256)
+  col.scheme[[2]] = bluered(256)
+  col.scheme[[3]] = bluered(256)
+  col.scheme[[4]] = bluered(256)
+  png(paste(opt$out,"Heatmap_K",k+1,".png",sep=""),h=4*200,w=4*300,bg=0)
+  plotHeatmap(fit=best.fit,datasets=dtl[1:ntypes], type=type, col.scheme = col.scheme[1:ntypes], row.order=row.order,plot.chr=plot.chr,sparse=sparse,cap=cap)
+  dev.off()
+  # get best features
+  sigfeatures=alist()
+  idsigfeatures=alist()
+  print("yop")
+  print(sum(!sapply(features,is.null)))
+  for(i in 1:sum(!sapply(features,is.null)) ){
+    print(i)
+    rowsum=apply(abs(best.fit$beta[[i]]),1, sum)
+    upper=quantile(rowsum,prob=0.75)
+    sigfeatures[[i]]=(features[[i]])[which(rowsum>upper)]
+    idsigfeatures[[i]]=which(rowsum>upper)
+    write.table(sigfeatures[[i]],file = paste(opt$out,"_K",k+1,"_features",i,".txt",sep=""),col.names = F,quote = F)
+  }
+  #names(sigfeatures)=c("expression","methylation")
+}