Switch to unified view

a b/integration_unsupervised.R
1
##########################################################################
2
### Script to perform integrative Clustering of several omics datasets ###
3
##########################################################################
4
5
# get options
6
library("optparse")
7
8
option_list = list(
9
  make_option(c("-d", "--DNA"), type="character", default=NULL, help="file with mutation data [default= %default]", metavar="character"),
10
  make_option(c("-r", "--RNA"), type="character", default=NULL, help="file with expression data [default= %default]", metavar="character"),
11
  make_option(c("-k", "--Kmin"), type="numeric", default=2, help="minimum number of clusters [default= %default]", metavar="numeric"),
12
  make_option(c("-K", "--Kmax"), type="numeric", default=6, help="maximum number of clusters [default= %default]", metavar="numeric"),
13
  make_option(c("-m", "--methylation"), type="character", default=NULL, help="file with methylation data [default= %default]", metavar="character"),
14
  make_option(c("-C", "--CNV"), type="character", default=NULL, help="file with copy number data [default= %default]", metavar="character"),
15
  make_option(c("-o", "--out"), type="character", default="out", help="output directory name [default= %default]", metavar="character"),
16
  make_option(c("-c", "--cores"), type="numeric", default=1, help="number of cores for statistical computation [default= %default]", metavar="numeric")
17
); 
18
19
opt_parser = OptionParser(option_list=option_list);
20
opt = parse_args(opt_parser);
21
22
Kmin  = as.numeric(opt$Kmin)
23
Kmax  = as.numeric(opt$Kmax)
24
cores = as.numeric(opt$cores)
25
26
require(iClusterPlus)
27
require(gplots)
28
require(lattice)
29
# read data
30
Ddata = NULL
31
Cdata = NULL
32
Rdata = NULL
33
Mdata = NULL
34
35
36
dtl = vector("list",4)
37
ntypes = 0
38
type = c()
39
row.order = c()
40
plot.chr  = c()
41
sparse    = c()
42
cap       = c()
43
if(!is.null(opt$DNA)){
44
  ntypes = ntypes +1
45
  Ddata = read.table(opt$DNA,h=T,row.names = 1)
46
  dtl[[ntypes]] = t(Ddata)
47
  type = c(type,"binomial")
48
  row.order = c(row.order,T)
49
  plot.chr  = c(plot.chr,F)
50
  sparse    = c(sparse,T)
51
  cap       = c(cap,T)
52
}
53
if(!is.null(opt$CNV)){
54
  ntypes = ntypes +1
55
  dtl[[ntypes]] = t(Cdata)
56
  type = c(type,"gaussian")
57
  row.order = c(row.order,T)
58
  plot.chr  = c(plot.chr,T)
59
  sparse    = c(sparse,T)
60
  cap       = c(cap,T)
61
}
62
if(!is.null(opt$RNA)){
63
  ntypes = ntypes +1
64
  Rdata = read.table(opt$RNA,h=T,row.names = 1,check.names = T)
65
  dtl[[ntypes]] = t(Rdata)
66
  type = c(type,"gaussian")
67
  row.order = c(row.order,T)
68
  plot.chr  = c(plot.chr,F)
69
  sparse    = c(sparse,T)
70
  cap       = c(cap,T)
71
}
72
if(!is.null(opt$methylation)){
73
  ntypes = ntypes +1
74
  Mdata = read.table(opt$methylation,h=T,row.names = 1)
75
  dtl[[ntypes]] = t(Mdata)
76
  type = c(type,"gaussian")
77
  row.order = c(row.order,T)
78
  plot.chr  = c(plot.chr,F)
79
  sparse    = c(sparse,T)
80
  cap       = c(cap,T)
81
}
82
83
print(dtl)
84
print(type)
85
86
# tune lambda
87
cv.fitl = vector("list",Kmax-Kmin+1)
88
for(k in (Kmin:Kmax-1) ){
89
  cv.fitl[[k]] = tune.iClusterPlus(cpus=cores,dt1=dtl[[1]],dt2=dtl[[2]],dt3=dtl[[3]],dt4=dtl[[4]], type=type, n.lambda=NULL,K=k,maxiter=20)
90
  cv.fittmp = cv.fitl[[k]]
91
  save(cv.fittmp, file=paste(opt$out,"cv.fit.K",k+1,".Rdata",sep=""))
92
}
93
warnings()
94
nLambda = nrow(cv.fitl[[1]]$lambda)
95
nK = length(cv.fitl)
96
BIC = getBIC(cv.fitl)
97
devR = getDevR(cv.fitl)
98
99
minBICid = apply(BIC,2,which.min)
100
devRatMinBIC = rep(NA,nK)
101
for(i in 1:nK){
102
  devRatMinBIC[i] = devR[minBICid[i],i]
103
}
104
105
#best.fitl=sapply( Kmin:Kmax-1 , function(k) cv.fitl[[k]]$fit[[which.min(BIC[,k])]] )
106
clusters=getClusters(cv.fitl)
107
colnames(clusters)=paste("K=",Kmin:Kmax,sep="")
108
clusters2 = clusters
109
k = Kmin
110
while(k<Kmax){
111
  print(k)
112
  clusters2 = clusters
113
  for(i in 1:max(clusters[,k-1]) ){
114
    print(i)
115
    tatmp = table(clusters[clusters[,k-1]==i,k])
116
    newid = as.numeric(names(tatmp)[which.max(tatmp)])
117
    clusters2[clusters[,k]==newid,k] = i
118
    clusters2[clusters[,k]==i,k] = newid
119
    print(clusters2)
120
    clusters = clusters2
121
  }
122
  k = k+1
123
}
124
125
svg(paste(opt$out,"iClusters.svg",sep=""),h=6,w=8*2)
126
par(family="Times",mfrow=c(1,2))
127
plot(Kmin:Kmax,devRatMinBIC,type="b",xlab=expression(italic(K)),ylab=expression("Pseudo "~italic(R^2)),las=1)
128
image(1:nrow(clusters),Kmin:Kmax,clusters,col=rainbow(Kmax),axes=F,xlab="",ylab=expression(italic(K)),las=2 )
129
axis(1,at = 1:nrow(clusters),labels = rownames(dtl[[1]]) ,las=2)
130
axis(2,at = Kmin:Kmax,las=1)
131
dev.off()
132
133
# plot heatmap and write features
134
features = lapply(dtl,colnames)
135
for(k in (Kmin:Kmax-1) ){
136
  print(k)
137
  best.cluster=clusters[,k]
138
  print(best.cluster)
139
  best.fit=cv.fitl[[k]]$fit[[which.min(BIC[,k])]]
140
  print(best.fit)
141
  col.scheme = alist()
142
  col.scheme[[1]] = bluered(256)
143
  col.scheme[[2]] = bluered(256)
144
  col.scheme[[3]] = bluered(256)
145
  col.scheme[[4]] = bluered(256)
146
147
  png(paste(opt$out,"Heatmap_K",k+1,".png",sep=""),h=4*200,w=4*300,bg=0)
148
  plotHeatmap(fit=best.fit,datasets=dtl[1:ntypes], type=type, col.scheme = col.scheme[1:ntypes], row.order=row.order,plot.chr=plot.chr,sparse=sparse,cap=cap)
149
  dev.off()
150
  
151
  # get best features
152
  sigfeatures=alist()
153
  idsigfeatures=alist()
154
  print("yop")
155
  print(sum(!sapply(features,is.null)))
156
  for(i in 1:sum(!sapply(features,is.null)) ){
157
    print(i)
158
    rowsum=apply(abs(best.fit$beta[[i]]),1, sum)
159
    upper=quantile(rowsum,prob=0.75)
160
    sigfeatures[[i]]=(features[[i]])[which(rowsum>upper)]
161
    idsigfeatures[[i]]=which(rowsum>upper)
162
    write.table(sigfeatures[[i]],file = paste(opt$out,"_K",k+1,"_features",i,".txt",sep=""),col.names = F,quote = F)
163
  }
164
  #names(sigfeatures)=c("expression","methylation")
165
}
166
167
168