COPD_multiomics / Git / Diff of /4-Multi-Omic Integration/scripts/1.WGCNA

Models:
AlyssaS/
COPD_multiomics
Downloads: 1
Diff of /4-Multi-Omic Integration/scripts/1.WGCNA_transcriptomics.r [000000] .. [16eabd]
Switch to side-by-side view

--- a
+++ b/4-Multi-Omic Integration/scripts/1.WGCNA_transcriptomics.r
@@ -0,0 +1,138 @@
+# the required input file: 
+# Transcriptome feature abundance (transcriptome.txt):An abundance table with genes in rows and samples in columns.
+
+# output files generated:
+# Co-expression module abundance tables with prefix being "1_hostT" stored in the "Output" directory
+
+library(WGCNA)
+library(flashClust)
+allowWGCNAThreads()
+enableWGCNAThreads()
+
+
+min.ModuleSize = 10
+cut.Height = 0.1
+output.prefix = "1_hostT"
+outputDir = "Output"
+
+if(!dir.exists(outputDir)) dir.create(outputDir)
+
+dataExpr <- read.table("transcriptome.txt", 
+                       sep='\t', row.names=1, header=T, quote="", comment="", check.names=F)
+dim(dataExpr)
+
+
+#### filter those mad < 0.01 ###############
+m.mad <- apply(dataExpr,1,mad)
+dataExprVar <- dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 0.25))[2],0.01)),]
+dataExpr <- as.data.frame(t(dataExprVar))
+#dataExpr <- as.data.frame(t(dataExpr))
+
+gsg = goodSamplesGenes(dataExpr, verbose = 3)
+
+##  Flagging genes and samples with too many missing values...
+##   ..step 1
+
+if (!gsg$allOK){
+  # Optionally, print the gene and sample names that were removed:
+  if (sum(!gsg$goodGenes)>0) 
+    printFlush(paste("Removing genes:", 
+                     paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
+  if (sum(!gsg$goodSamples)>0) 
+    printFlush(paste("Removing samples:", 
+                     paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
+  # Remove the offending genes and samples from the data:
+  dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
+}
+
+nGenes = ncol(dataExpr)
+nSamples = nrow(dataExpr)
+
+dim(dataExpr)
+
+
+powers = c(c(1:10), seq(from = 12, to=30, by=2))
+sft = pickSoftThreshold(dataExpr, powerVector=powers, networkType="signed", verbose=5)
+
+######### plot power curves ####################
+sizeGrWindow(9, 5)
+par(mfrow = c(1,2));
+cex1 = 0.9;
+pdf(paste(outputDir,"/",output.prefix,".power_curve.pdf",sep = ""))
+
+plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
+     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
+     main = paste("Scale independence"));
+text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
+     labels=powers,cex=cex1,col="red");
+# this line corresponds to using an R^2 cut-off of h
+abline(h=0.85,col="red")
+# Mean connectivity as a function of the soft-thresholding power
+plot(sft$fitIndices[,1], sft$fitIndices[,5],
+     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
+     main = paste("Mean connectivity"))
+text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
+dev.off()
+############ plot power curves ################
+
+softPower <- sft$powerEstimate
+
+############ scale free examination #############
+#k <- softConnectivity(datE=dataExpr,power=softPower) 
+#sizeGrWindow(10, 5)
+#par(mfrow=c(1,2))
+#hist(k)
+#pdf("2_scale_free_plot.pdf")
+#scaleFreePlot(k,main="Check Scale free topology\n")
+#dev.off()
+########### scale free examination ##############
+
+adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "bicor", corOptions = "use = 'pairwise.complete.obs'")
+#adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "cor", corOptions = "use = 'p', method = 'spearman'")
+
+TOM = TOMsimilarity(adjacency);
+
+dissTOM = 1-TOM
+geneTree = flashClust (as.dist (dissTOM), method = "complete")
+
+#geneTree = hclust(as.dist(dissTOM), method = "average");
+
+
+# minModuleSize = 10;
+
+# try both pamRespectsDendro=F and T, see which one is better #
+moduleLabels1 = cutreeDynamic (dendro = geneTree, distM = dissTOM, method = "hybrid", deepSplit = 2, pamRespectsDendro = F, minClusterSize = min.ModuleSize)
+moduleLabels1 = labels2colors (moduleLabels1)
+table(moduleLabels1)
+
+#mergeCutHeight: 
+#################### merge modules based on cutHeight #################
+merge = mergeCloseModules (dataExpr, moduleLabels1, corFnc = "cor", corOptions = list (use = 'p', method = 'spearman'), cutHeight = cut.Height)
+
+moduleLabels2 = merge$colors
+
+
+MEList = moduleEigengenes(dataExpr, colors = moduleLabels2)
+MEs = MEList$eigengenes
+
+MEDiss = 1-cor(MEs);
+# Cluster module eigengenes
+METree = hclust(as.dist(MEDiss), method = "complete");
+
+pdf(paste(outputDir,"/",output.prefix,".eigengene_heatmap.pdf", sep = "") )
+plotEigengeneNetworks(MEs, "Eigengene adjacency heatmap", marDendro = c(3,3,2,4), marHeatmap = c(3,4,2,2), plotDendrograms = TRUE, xLabelsAngle = 90) 
+dev.off()
+pdf(paste(outputDir,"/",output.prefix,".gene_cluster.pdf",sep = "") )
+plotDendroAndColors(geneTree, cbind(moduleLabels1, moduleLabels2), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
+dev.off()
+
+moduleColorsMeta = moduleLabels2
+names (moduleColorsMeta) = colnames (dataExpr)
+MEsMeta = orderMEs (MEs)
+rownames (MEsMeta) = rownames (dataExpr)
+
+###################save results #####################
+write.table(moduleColorsMeta, paste(outputDir,"/",output.prefix, ".module_assign.txt", sep = "") , sep="\t", append=FALSE, quote=FALSE)
+write.table(MEsMeta, paste(outputDir,"/",output.prefix, ".module_eigengene.txt", sep = ""), sep="\t", append=FALSE, quote=FALSE)
+
+save(TOM,file=paste(outputDir,"/",output.prefix,".tom.rda",sep = ""))