[16eabd]: / 4-Multi-Omic Integration / scripts / 4.MetaB.HostT.link.r

Download this file

160 lines (106 with data), 6.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# files required:
# 1) "3_MetaB_affects_NEU_through_HostT.txt": resulting data frame generated from script "4.mediation_metaB.2.hostT.r"
# 2) "metabolome.txt" metabolomic abundance table on the feature level
# 3) "1_metaB.module_assign.txt" module assignment file of the metabolomic featrues, generated from script "1.WGCNA_metabolomics.r"
# 4) "transcriptome.txt" transcriptomic abundance table on the feature level
# 5) "1_hostT.module_assign.txt" module assignment file of the transcriptomic featrues, generated from script "1.WGCNA_transcriptomics.r"
# 6) "metabo2CIDm.txt" storing information on compound IDs and corresponding CIDm IDs
# 7) "all_CIDm_targets.txt" link file between CIDm ID and host targets
# output:
# "MetaB.HostT.modules.linked.txt"
output.dir = "Output"
log.file = "MetaB.HostT.link.log"
## ###############################################
##
## import data
##
## ###############################################
library(data.table)
library(dplyr)
cat(paste("\n\n",as.character(Sys.time()), '\n'), file=log.file, append=T)
cat("Importing data : \n", file=log.file, append=T)
mediation.res = fread("3_MetaB_affects_NEU_through_HostT.txt")
MetaB.dat <- data.frame(fread("metabolome.txt"), row.names = 1) %>% t() %>% as.data.frame()
MetaB_module.feature <- fread("1_metaB.module_assign.txt", data.table = F, col.names =c("Feature","Module"))
HostT.dat <- data.frame(fread("transcriptome.txt"), row.names = 1) %>% t() %>% as.data.frame()
HostT_module.feature <- fread("1_hostT.module_assign.txt", data.table = F, col.names =c("Feature","Module"))
metabo2CIDm <- fread("metabo2CIDm.txt", data.table = F, header = F, col.names = c("Metabo","CIDm"))
# load and organize CIDm.receptor data
conn <- file("all_CIDm_targets.txt", open="r")
linn <-readLines(conn)
close(conn)
numElements <- sapply(linn, function(x) length(strsplit(x, "\t", fixed = T)[[1]]))
table(numElements) # 4 levels of lengths
tmp <- linn[numElements == 4]
subdf1 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
colnames(subdf1) <- c("CIDm","ENSP","geneName","linkType")
tmp <- linn[numElements == 5]
subdf2 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
colnames(subdf2) <- c("CIDm","ENSP","geneName","description", "linkType")
tmp <- linn[numElements == 9]
subdf3 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
colnames(subdf3) <- c("CIDm","ENSP","geneName","linkType","X1","X2","X3","X4", "score")
tmp <- linn[numElements == 10]
subdf4 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
colnames(subdf4) <- c("CIDm","ENSP","geneName","description", "linkType","X1","X2","X3","X4","score")
CIDm.receptor.score.co = 800
CIDm.receptor <- bind_rows(subdf1, subdf2, subdf3, subdf4) %>% filter(score >= CIDm.receptor.score.co)
# merge data
MetaB.HostT.dat <- merge(MetaB.dat, HostT.dat, by=0)
## ###############################################
##
## perform link analysis
##
## ###############################################
cat("Performing link analysis : \n", file=log.file, append=T)
# identify MetaB-HostT module pairs
#MetaB.HostT.modPairs <- strsplit((mediation.res %>% dplyr::filter(ACME.p <= ACME.p.co))$Treat_Mediator_Y,"_",fixed = T)
ACME.p.co = 0.10
MetaB.HostT.modPairs <- (mediation.res %>% dplyr::filter(ACME.p <= ACME.p.co))$Treat_Mediator_Y
# identify links
links <- NULL
for(mdp in MetaB.HostT.modPairs){
# mdp = MetaB.HostT.modPairs[[1]]
ConfirmedLink = FALSE
#metab.md = sub("ME(.*)$","\\1",sub("MetaB\\.(.*)$","\\1",mdp[1]) )
#hostt.md = sub("ME(.*)$","\\1",sub("HostT\\.(.*)$","\\1",mdp[2]) )
parts = strsplit(mdp,"_", fixed = T)[[1]]
metab.md = sub("ME(.*)$","\\1",sub("MetaB\\.(.*)$","\\1",parts[1]) )
hostt.md = sub("ME(.*)$","\\1",sub("HostT\\.(.*)$","\\1",parts[2]) )
#cat(paste("\n\nAnalyzing module pairs: ", parts[1], "_",parts[2],"\n", sep = ""), file=log.file, append=T)
i_mdp <- which(MetaB.HostT.modPairs == mdp)
if(i_mdp %% 1000 == 0) cat(paste(" -------------------------- progress: ", i_mdp, " out of ", length(MetaB.HostT.modPairs)," module pairs -------------------------- \n", sep = ""), file=log.file, append=T)
metab.ftr = MetaB_module.feature$Feature[MetaB_module.feature$Module == metab.md]
hostt.ftr = HostT_module.feature$Feature[HostT_module.feature$Module == hostt.md]
for(bf in metab.ftr){
# bf=metab.ftr[2]
# writeLines(paste("the ", which(metab.ftr == bf), " bf", sep = "") )
if(ConfirmedLink ) break
cidm = metabo2CIDm$CIDm[which(metabo2CIDm$Metabo == bf)]
if(length(cidm) == 0) {
writeLines(paste("MetaB feature ",bf, " (belonged to the ", parts[1]," module) doesn't have matching CIDm", sep=""))
next
}
if(length(cidm) > 1) cidm <- as.character((data.frame(table(cidm)) %>% dplyr::arrange(desc(Freq)))$cidm[1])
receptors_df = CIDm.receptor %>% filter(CIDm == cidm)
receptors <- receptors_df$geneName
receptors <- receptors[receptors != ""]
if( length(receptors) == 0 ) next
if(!any(receptors %in% hostt.ftr)) next
# verify the links
recptr_check <- receptors[receptors %in% hostt.ftr]
receptors_df <- receptors_df %>% filter(geneName %in% recptr_check)
for(i in c(1:nrow(receptors_df))){
rcpt <- receptors_df$geneName[i]
lkType <- receptors_df$linkType[i]
if(lkType %in% c("catalysis","reaction", "expression")) next
link = c(paste(parts[1], parts[2], sep = "_" ), bf, rcpt, lkType )
names(link) <- c("MetaB.HostT_modulePair", "MetaB.feature","HostT.feature","Link.by")
links <- bind_rows(links,link)
remove(link)
ConfirmedLink = T
} # loop through all the receptors to be checked
}# loop through metab.features
} # loop through module pairs
if(!dir.exists(output.dir)) dir.create(output.dir)
write.table(links, file = paste(output.dir,"/4_MetaB.HostT.modules.NEU.linked.txt",sep = ""), sep = "\t", quote = F, row.names = F)