Switch to side-by-side view

--- a
+++ b/4-Multi-Omic Integration/scripts/4.MetaB.HostT.link.r
@@ -0,0 +1,159 @@
+# files required:
+# 1) "3_MetaB_affects_NEU_through_HostT.txt": resulting data frame generated from script "4.mediation_metaB.2.hostT.r"
+# 2) "metabolome.txt" metabolomic abundance table on the feature level
+# 3) "1_metaB.module_assign.txt" module assignment file of the metabolomic featrues, generated from script "1.WGCNA_metabolomics.r"
+# 4) "transcriptome.txt" transcriptomic abundance table on the feature level
+# 5) "1_hostT.module_assign.txt" module assignment file of the transcriptomic featrues, generated from script "1.WGCNA_transcriptomics.r"
+# 6) "metabo2CIDm.txt" storing information on compound IDs and corresponding CIDm IDs
+# 7) "all_CIDm_targets.txt" link file between CIDm ID and host targets
+
+# output:
+# "MetaB.HostT.modules.linked.txt"
+
+
+
+output.dir = "Output"
+log.file = "MetaB.HostT.link.log"
+
+
+## ###############################################
+##
+##  import data 
+##
+## ###############################################
+library(data.table)
+library(dplyr)
+
+cat(paste("\n\n",as.character(Sys.time()), '\n'),  file=log.file, append=T)
+cat("Importing data : \n", file=log.file, append=T)
+
+mediation.res = fread("3_MetaB_affects_NEU_through_HostT.txt")
+MetaB.dat <- data.frame(fread("metabolome.txt"), row.names = 1) %>% t() %>% as.data.frame()
+MetaB_module.feature <- fread("1_metaB.module_assign.txt", data.table = F, col.names =c("Feature","Module"))
+HostT.dat <- data.frame(fread("transcriptome.txt"), row.names = 1) %>% t() %>% as.data.frame()
+HostT_module.feature <- fread("1_hostT.module_assign.txt", data.table = F, col.names =c("Feature","Module"))
+metabo2CIDm <- fread("metabo2CIDm.txt", data.table = F, header = F, col.names = c("Metabo","CIDm"))
+
+# load and organize CIDm.receptor data 
+conn <- file("all_CIDm_targets.txt", open="r")
+linn <-readLines(conn)
+close(conn)
+
+
+numElements <- sapply(linn, function(x) length(strsplit(x, "\t", fixed = T)[[1]]))
+table(numElements) # 4 levels of lengths
+
+tmp <- linn[numElements == 4]
+subdf1 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
+colnames(subdf1) <- c("CIDm","ENSP","geneName","linkType")
+
+
+tmp <- linn[numElements == 5]
+subdf2 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
+colnames(subdf2) <- c("CIDm","ENSP","geneName","description", "linkType")
+
+tmp <- linn[numElements == 9]
+subdf3 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
+colnames(subdf3) <- c("CIDm","ENSP","geneName","linkType","X1","X2","X3","X4", "score") 
+
+tmp <- linn[numElements == 10]
+subdf4 <- unname(sapply(tmp,function(x) strsplit(x,"\t")[[1]] )) %>% t() %>% data.frame(stringsAsFactors = F)
+colnames(subdf4) <- c("CIDm","ENSP","geneName","description", "linkType","X1","X2","X3","X4","score") 
+
+CIDm.receptor.score.co = 800
+CIDm.receptor <- bind_rows(subdf1, subdf2, subdf3, subdf4) %>% filter(score >= CIDm.receptor.score.co)
+
+
+# merge data
+MetaB.HostT.dat <- merge(MetaB.dat, HostT.dat, by=0)
+
+
+## ###############################################
+##
+##  perform link analysis 
+##
+## ###############################################
+cat("Performing link analysis : \n", file=log.file, append=T)
+# identify MetaB-HostT module pairs 
+#MetaB.HostT.modPairs <- strsplit((mediation.res %>% dplyr::filter(ACME.p <= ACME.p.co))$Treat_Mediator_Y,"_",fixed = T)
+ACME.p.co = 0.10
+MetaB.HostT.modPairs <- (mediation.res %>% dplyr::filter(ACME.p <= ACME.p.co))$Treat_Mediator_Y
+
+
+# identify links
+links <- NULL
+for(mdp in MetaB.HostT.modPairs){
+  # mdp = MetaB.HostT.modPairs[[1]]
+  
+  ConfirmedLink = FALSE
+  #metab.md = sub("ME(.*)$","\\1",sub("MetaB\\.(.*)$","\\1",mdp[1]) )
+  #hostt.md = sub("ME(.*)$","\\1",sub("HostT\\.(.*)$","\\1",mdp[2]) )
+  
+  parts = strsplit(mdp,"_", fixed = T)[[1]]
+  metab.md = sub("ME(.*)$","\\1",sub("MetaB\\.(.*)$","\\1",parts[1]) )
+  hostt.md = sub("ME(.*)$","\\1",sub("HostT\\.(.*)$","\\1",parts[2]) )
+  
+  #cat(paste("\n\nAnalyzing module pairs: ", parts[1], "_",parts[2],"\n", sep = ""), file=log.file, append=T)
+  i_mdp <- which(MetaB.HostT.modPairs == mdp)
+  if(i_mdp %% 1000 == 0) cat(paste(" -------------------------- progress: ", i_mdp, " out of ", length(MetaB.HostT.modPairs)," module pairs -------------------------- \n", sep = ""), file=log.file, append=T)
+  
+  
+  
+  
+  metab.ftr = MetaB_module.feature$Feature[MetaB_module.feature$Module == metab.md]
+  hostt.ftr = HostT_module.feature$Feature[HostT_module.feature$Module == hostt.md]
+  
+  
+  for(bf in metab.ftr){
+    # bf=metab.ftr[2]
+    # writeLines(paste("the ", which(metab.ftr == bf), " bf", sep = "") )
+    
+    if(ConfirmedLink ) break
+    cidm = metabo2CIDm$CIDm[which(metabo2CIDm$Metabo == bf)]
+    
+    if(length(cidm) == 0) {
+      writeLines(paste("MetaB feature ",bf, " (belonged to the ", parts[1]," module) doesn't have matching CIDm", sep=""))
+      next
+    }
+    
+    if(length(cidm) > 1) cidm <- as.character((data.frame(table(cidm)) %>% dplyr::arrange(desc(Freq)))$cidm[1])
+    
+    receptors_df = CIDm.receptor %>% filter(CIDm == cidm)
+    receptors <- receptors_df$geneName
+    
+    receptors <- receptors[receptors != ""]
+    
+    if( length(receptors) == 0 ) next 
+    if(!any(receptors %in% hostt.ftr)) next
+    
+    # verify the links
+    recptr_check <- receptors[receptors %in% hostt.ftr]
+    receptors_df <- receptors_df %>% filter(geneName %in%  recptr_check)
+    
+    for(i in c(1:nrow(receptors_df))){
+      
+      rcpt <- receptors_df$geneName[i]
+      lkType <- receptors_df$linkType[i]
+      
+      if(lkType %in% c("catalysis","reaction", "expression")) next
+      
+      link = c(paste(parts[1], parts[2], sep = "_" ), bf, rcpt, lkType )
+      names(link) <- c("MetaB.HostT_modulePair", "MetaB.feature","HostT.feature","Link.by")
+      
+      links <- bind_rows(links,link)
+      remove(link)
+      ConfirmedLink = T
+      
+    } # loop through all the receptors to be checked 
+      
+
+    
+  }# loop through metab.features
+  
+  
+  
+} # loop through module pairs
+
+if(!dir.exists(output.dir)) dir.create(output.dir)
+write.table(links, file = paste(output.dir,"/4_MetaB.HostT.modules.NEU.linked.txt",sep = ""), sep = "\t", quote = F, row.names = F)
+