--- a +++ b/4-Multi-Omic Integration/scripts/1.WGCNA_transcriptomics.r @@ -0,0 +1,138 @@ +# the required input file: +# Transcriptome feature abundance (transcriptome.txt):An abundance table with genes in rows and samples in columns. + +# output files generated: +# Co-expression module abundance tables with prefix being "1_hostT" stored in the "Output" directory + +library(WGCNA) +library(flashClust) +allowWGCNAThreads() +enableWGCNAThreads() + + +min.ModuleSize = 10 +cut.Height = 0.1 +output.prefix = "1_hostT" +outputDir = "Output" + +if(!dir.exists(outputDir)) dir.create(outputDir) + +dataExpr <- read.table("transcriptome.txt", + sep='\t', row.names=1, header=T, quote="", comment="", check.names=F) +dim(dataExpr) + + +#### filter those mad < 0.01 ############### +m.mad <- apply(dataExpr,1,mad) +dataExprVar <- dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 0.25))[2],0.01)),] +dataExpr <- as.data.frame(t(dataExprVar)) +#dataExpr <- as.data.frame(t(dataExpr)) + +gsg = goodSamplesGenes(dataExpr, verbose = 3) + +## Flagging genes and samples with too many missing values... +## ..step 1 + +if (!gsg$allOK){ + # Optionally, print the gene and sample names that were removed: + if (sum(!gsg$goodGenes)>0) + printFlush(paste("Removing genes:", + paste(names(dataExpr)[!gsg$goodGenes], collapse = ","))); + if (sum(!gsg$goodSamples)>0) + printFlush(paste("Removing samples:", + paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ","))); + # Remove the offending genes and samples from the data: + dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes] +} + +nGenes = ncol(dataExpr) +nSamples = nrow(dataExpr) + +dim(dataExpr) + + +powers = c(c(1:10), seq(from = 12, to=30, by=2)) +sft = pickSoftThreshold(dataExpr, powerVector=powers, networkType="signed", verbose=5) + +######### plot power curves #################### +sizeGrWindow(9, 5) +par(mfrow = c(1,2)); +cex1 = 0.9; +pdf(paste(outputDir,"/",output.prefix,".power_curve.pdf",sep = "")) + +plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], + xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", + main = paste("Scale independence")); +text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], + labels=powers,cex=cex1,col="red"); +# this line corresponds to using an R^2 cut-off of h +abline(h=0.85,col="red") +# Mean connectivity as a function of the soft-thresholding power +plot(sft$fitIndices[,1], sft$fitIndices[,5], + xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", + main = paste("Mean connectivity")) +text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") +dev.off() +############ plot power curves ################ + +softPower <- sft$powerEstimate + +############ scale free examination ############# +#k <- softConnectivity(datE=dataExpr,power=softPower) +#sizeGrWindow(10, 5) +#par(mfrow=c(1,2)) +#hist(k) +#pdf("2_scale_free_plot.pdf") +#scaleFreePlot(k,main="Check Scale free topology\n") +#dev.off() +########### scale free examination ############## + +adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "bicor", corOptions = "use = 'pairwise.complete.obs'") +#adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "cor", corOptions = "use = 'p', method = 'spearman'") + +TOM = TOMsimilarity(adjacency); + +dissTOM = 1-TOM +geneTree = flashClust (as.dist (dissTOM), method = "complete") + +#geneTree = hclust(as.dist(dissTOM), method = "average"); + + +# minModuleSize = 10; + +# try both pamRespectsDendro=F and T, see which one is better # +moduleLabels1 = cutreeDynamic (dendro = geneTree, distM = dissTOM, method = "hybrid", deepSplit = 2, pamRespectsDendro = F, minClusterSize = min.ModuleSize) +moduleLabels1 = labels2colors (moduleLabels1) +table(moduleLabels1) + +#mergeCutHeight: +#################### merge modules based on cutHeight ################# +merge = mergeCloseModules (dataExpr, moduleLabels1, corFnc = "cor", corOptions = list (use = 'p', method = 'spearman'), cutHeight = cut.Height) + +moduleLabels2 = merge$colors + + +MEList = moduleEigengenes(dataExpr, colors = moduleLabels2) +MEs = MEList$eigengenes + +MEDiss = 1-cor(MEs); +# Cluster module eigengenes +METree = hclust(as.dist(MEDiss), method = "complete"); + +pdf(paste(outputDir,"/",output.prefix,".eigengene_heatmap.pdf", sep = "") ) +plotEigengeneNetworks(MEs, "Eigengene adjacency heatmap", marDendro = c(3,3,2,4), marHeatmap = c(3,4,2,2), plotDendrograms = TRUE, xLabelsAngle = 90) +dev.off() +pdf(paste(outputDir,"/",output.prefix,".gene_cluster.pdf",sep = "") ) +plotDendroAndColors(geneTree, cbind(moduleLabels1, moduleLabels2), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) +dev.off() + +moduleColorsMeta = moduleLabels2 +names (moduleColorsMeta) = colnames (dataExpr) +MEsMeta = orderMEs (MEs) +rownames (MEsMeta) = rownames (dataExpr) + +###################save results ##################### +write.table(moduleColorsMeta, paste(outputDir,"/",output.prefix, ".module_assign.txt", sep = "") , sep="\t", append=FALSE, quote=FALSE) +write.table(MEsMeta, paste(outputDir,"/",output.prefix, ".module_eigengene.txt", sep = ""), sep="\t", append=FALSE, quote=FALSE) + +save(TOM,file=paste(outputDir,"/",output.prefix,".tom.rda",sep = ""))