COPD_multiomics / Git / [16eabd] /4-Multi-Omic Integration/scripts/1.WGCNA

Models:
AlyssaS/
COPD_multiomics
Downloads: 1
[16eabd]: / 4-Multi-Omic Integration / scripts / 1.WGCNA_transcriptomics.r
History
Download this file
139 lines (103 with data), 5.4 kB

# the required input file: 
# Transcriptome feature abundance (transcriptome.txt):An abundance table with genes in rows and samples in columns.

# output files generated:
# Co-expression module abundance tables with prefix being "1_hostT" stored in the "Output" directory

library(WGCNA)
library(flashClust)
allowWGCNAThreads()
enableWGCNAThreads()


min.ModuleSize = 10
cut.Height = 0.1
output.prefix = "1_hostT"
outputDir = "Output"

if(!dir.exists(outputDir)) dir.create(outputDir)

dataExpr <- read.table("transcriptome.txt", 
                       sep='\t', row.names=1, header=T, quote="", comment="", check.names=F)
dim(dataExpr)


#### filter those mad < 0.01 ###############
m.mad <- apply(dataExpr,1,mad)
dataExprVar <- dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 0.25))[2],0.01)),]
dataExpr <- as.data.frame(t(dataExprVar))
#dataExpr <- as.data.frame(t(dataExpr))

gsg = goodSamplesGenes(dataExpr, verbose = 3)

##  Flagging genes and samples with too many missing values...
##   ..step 1

if (!gsg$allOK){
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0) 
    printFlush(paste("Removing genes:", 
                     paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
  if (sum(!gsg$goodSamples)>0) 
    printFlush(paste("Removing samples:", 
                     paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
  # Remove the offending genes and samples from the data:
  dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
}

nGenes = ncol(dataExpr)
nSamples = nrow(dataExpr)

dim(dataExpr)


powers = c(c(1:10), seq(from = 12, to=30, by=2))
sft = pickSoftThreshold(dataExpr, powerVector=powers, networkType="signed", verbose=5)

######### plot power curves ####################
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
pdf(paste(outputDir,"/",output.prefix,".power_curve.pdf",sep = ""))

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.85,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
############ plot power curves ################

softPower <- sft$powerEstimate

############ scale free examination #############
#k <- softConnectivity(datE=dataExpr,power=softPower) 
#sizeGrWindow(10, 5)
#par(mfrow=c(1,2))
#hist(k)
#pdf("2_scale_free_plot.pdf")
#scaleFreePlot(k,main="Check Scale free topology\n")
#dev.off()
########### scale free examination ##############

adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "bicor", corOptions = "use = 'pairwise.complete.obs'")
#adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "cor", corOptions = "use = 'p', method = 'spearman'")

TOM = TOMsimilarity(adjacency);

dissTOM = 1-TOM
geneTree = flashClust (as.dist (dissTOM), method = "complete")

#geneTree = hclust(as.dist(dissTOM), method = "average");


# minModuleSize = 10;

# try both pamRespectsDendro=F and T, see which one is better #
moduleLabels1 = cutreeDynamic (dendro = geneTree, distM = dissTOM, method = "hybrid", deepSplit = 2, pamRespectsDendro = F, minClusterSize = min.ModuleSize)
moduleLabels1 = labels2colors (moduleLabels1)
table(moduleLabels1)

#mergeCutHeight: 
#################### merge modules based on cutHeight #################
merge = mergeCloseModules (dataExpr, moduleLabels1, corFnc = "cor", corOptions = list (use = 'p', method = 'spearman'), cutHeight = cut.Height)

moduleLabels2 = merge$colors


MEList = moduleEigengenes(dataExpr, colors = moduleLabels2)
MEs = MEList$eigengenes

MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "complete");

pdf(paste(outputDir,"/",output.prefix,".eigengene_heatmap.pdf", sep = "") )
plotEigengeneNetworks(MEs, "Eigengene adjacency heatmap", marDendro = c(3,3,2,4), marHeatmap = c(3,4,2,2), plotDendrograms = TRUE, xLabelsAngle = 90) 
dev.off()
pdf(paste(outputDir,"/",output.prefix,".gene_cluster.pdf",sep = "") )
plotDendroAndColors(geneTree, cbind(moduleLabels1, moduleLabels2), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()

moduleColorsMeta = moduleLabels2
names (moduleColorsMeta) = colnames (dataExpr)
MEsMeta = orderMEs (MEs)
rownames (MEsMeta) = rownames (dataExpr)

###################save results #####################
write.table(moduleColorsMeta, paste(outputDir,"/",output.prefix, ".module_assign.txt", sep = "") , sep="\t", append=FALSE, quote=FALSE)
write.table(MEsMeta, paste(outputDir,"/",output.prefix, ".module_eigengene.txt", sep = ""), sep="\t", append=FALSE, quote=FALSE)

save(TOM,file=paste(outputDir,"/",output.prefix,".tom.rda",sep = ""))