[16eabd]: / 4-Multi-Omic Integration / scripts / 1.WGCNA_transcriptomics.r

Download this file

139 lines (103 with data), 5.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# the required input file:
# Transcriptome feature abundance (transcriptome.txt):An abundance table with genes in rows and samples in columns.
# output files generated:
# Co-expression module abundance tables with prefix being "1_hostT" stored in the "Output" directory
library(WGCNA)
library(flashClust)
allowWGCNAThreads()
enableWGCNAThreads()
min.ModuleSize = 10
cut.Height = 0.1
output.prefix = "1_hostT"
outputDir = "Output"
if(!dir.exists(outputDir)) dir.create(outputDir)
dataExpr <- read.table("transcriptome.txt",
sep='\t', row.names=1, header=T, quote="", comment="", check.names=F)
dim(dataExpr)
#### filter those mad < 0.01 ###############
m.mad <- apply(dataExpr,1,mad)
dataExprVar <- dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 0.25))[2],0.01)),]
dataExpr <- as.data.frame(t(dataExprVar))
#dataExpr <- as.data.frame(t(dataExpr))
gsg = goodSamplesGenes(dataExpr, verbose = 3)
## Flagging genes and samples with too many missing values...
## ..step 1
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:",
paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:",
paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
# Remove the offending genes and samples from the data:
dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
}
nGenes = ncol(dataExpr)
nSamples = nrow(dataExpr)
dim(dataExpr)
powers = c(c(1:10), seq(from = 12, to=30, by=2))
sft = pickSoftThreshold(dataExpr, powerVector=powers, networkType="signed", verbose=5)
######### plot power curves ####################
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
pdf(paste(outputDir,"/",output.prefix,".power_curve.pdf",sep = ""))
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.85,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
############ plot power curves ################
softPower <- sft$powerEstimate
############ scale free examination #############
#k <- softConnectivity(datE=dataExpr,power=softPower)
#sizeGrWindow(10, 5)
#par(mfrow=c(1,2))
#hist(k)
#pdf("2_scale_free_plot.pdf")
#scaleFreePlot(k,main="Check Scale free topology\n")
#dev.off()
########### scale free examination ##############
adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "bicor", corOptions = "use = 'pairwise.complete.obs'")
#adjacency = adjacency (dataExpr, power = softPower, type = "signed", corFnc = "cor", corOptions = "use = 'p', method = 'spearman'")
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM
geneTree = flashClust (as.dist (dissTOM), method = "complete")
#geneTree = hclust(as.dist(dissTOM), method = "average");
# minModuleSize = 10;
# try both pamRespectsDendro=F and T, see which one is better #
moduleLabels1 = cutreeDynamic (dendro = geneTree, distM = dissTOM, method = "hybrid", deepSplit = 2, pamRespectsDendro = F, minClusterSize = min.ModuleSize)
moduleLabels1 = labels2colors (moduleLabels1)
table(moduleLabels1)
#mergeCutHeight:
#################### merge modules based on cutHeight #################
merge = mergeCloseModules (dataExpr, moduleLabels1, corFnc = "cor", corOptions = list (use = 'p', method = 'spearman'), cutHeight = cut.Height)
moduleLabels2 = merge$colors
MEList = moduleEigengenes(dataExpr, colors = moduleLabels2)
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "complete");
pdf(paste(outputDir,"/",output.prefix,".eigengene_heatmap.pdf", sep = "") )
plotEigengeneNetworks(MEs, "Eigengene adjacency heatmap", marDendro = c(3,3,2,4), marHeatmap = c(3,4,2,2), plotDendrograms = TRUE, xLabelsAngle = 90)
dev.off()
pdf(paste(outputDir,"/",output.prefix,".gene_cluster.pdf",sep = "") )
plotDendroAndColors(geneTree, cbind(moduleLabels1, moduleLabels2), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()
moduleColorsMeta = moduleLabels2
names (moduleColorsMeta) = colnames (dataExpr)
MEsMeta = orderMEs (MEs)
rownames (MEsMeta) = rownames (dataExpr)
###################save results #####################
write.table(moduleColorsMeta, paste(outputDir,"/",output.prefix, ".module_assign.txt", sep = "") , sep="\t", append=FALSE, quote=FALSE)
write.table(MEsMeta, paste(outputDir,"/",output.prefix, ".module_eigengene.txt", sep = ""), sep="\t", append=FALSE, quote=FALSE)
save(TOM,file=paste(outputDir,"/",output.prefix,".tom.rda",sep = ""))