COPD_multiomics / Git / Diff of /4-Multi-Omic Integration/scripts/4.HostT.HostP.link.r

Models:
AlyssaS/
COPD_multiomics
Downloads: 1
Diff of /4-Multi-Omic Integration/scripts/4.HostT.HostP.link.r [000000] .. [16eabd]
Switch to side-by-side view

--- a
+++ b/4-Multi-Omic Integration/scripts/4.HostT.HostP.link.r
@@ -0,0 +1,160 @@
+# files required:
+# 1) "3_HostT_affects_NEU_through_HostP.txt": resulting data frame generated from script "4.mediation_hostT.2.hostP.r"
+# 2) "transcriptome.txt" transcriptomic abundance table on the feature level
+# 3) "sputum_proteome.txt" proteomic abundance table on the feature level
+# 4) "1_hostT.module_assign.txt" module assignment file of the transcriptomic featrues, generated from script "1.WGCNA_transcriptomics.r"
+# 5) "protein_info.txt" protein names and corresponding gene names
+# 6) "human_pathway.gmt"  Human pathway information file (.gmt file) 
+
+# output:
+# "4_HostT.module_HostP.protein.linked.txt"
+
+
+output.dir = "Output"
+log.file = "HostT.HostP.link.log"
+
+
+## ######################################################################
+##
+## import data 
+##
+## #####################################################################
+library(data.table)
+library(dplyr)
+
+cat(paste("\n\n",as.character(Sys.time()), '\n'),  file=log.file, append=T)
+cat("Importing data : \n", file=log.file, append=T)
+
+mediation.res <- fread("3_HosT_affects_NEU_through_HosP.txt")
+
+
+# impport host t data
+m1 <- data.frame(fread("transcriptome.txt"), row.names=1)
+feature.abb_df1 <- cbind.data.frame(feature = rownames(m1),
+                                    abb = paste("feature",seq(1,nrow(m1),1),sep = ""),
+                                    stringsAsFactors = F)
+rownames(m1) <- sapply(rownames(m1), function(x) feature.abb_df1$abb[which(feature.abb_df1$feature == x)])
+m1 <- t(m1) %>% as.data.frame(stringsAsFactors=F)
+colnames(m1) <- sapply(colnames(m1), function(x) feature.abb_df1$feature[which(feature.abb_df1$abb == x)])
+
+HostT.dat <- m1
+
+
+
+# import host p data
+m2 =  data.frame(fread("sputum_proteome.txt"), row.names = 1 )
+feature.abb_df2 <- cbind.data.frame(feature = rownames(m2),
+                                    abb = paste("feature",seq(1,nrow(m2),1),sep = ""),
+                                    stringsAsFactors = F)
+rownames(m2) <- sapply(rownames(m2), function(x) feature.abb_df2$abb[which(feature.abb_df2$feature == x)])
+m2 <- t(m2) %>% as.data.frame(stringsAsFactors=F)
+colnames(m2) <- sapply(colnames(m2), function(x) feature.abb_df2$feature[which(feature.abb_df2$abb == x)])
+
+HostP.dat <- m2
+
+# hostT module - feature assignment
+
+HostT_module.feature <- fread("1_hostT.module_assign.txt", data.table = F, col.names =c("Feature","Module"))
+
+# protein - gene information
+
+temp <- fread("database/protein_info.txt", data.table = T) %>% dplyr::select(Target, GeneName) %>% unique()
+Protein.Gene_list <- vector("list", length = nrow(temp))
+names(Protein.Gene_list) <- temp$Target
+for(i in c(1:nrow(temp))){
+  #i=1
+  prot =  temp$Target[i]
+  genenames = strsplit(temp$GeneName[i], split = '[\\s\\/]', perl = T )[[1]]
+  Protein.Gene_list[[prot]] <- genenames
+  
+}
+
+# pathway gene information 
+
+PTWY_list <- Read.GeneSets.db2("database/human_pathway.gmt")
+Pthwy.genes_list <- PTWY_list$gs  # the gene names
+PTWY_list$gs.desc[1:3] # the pathway id
+
+
+# merge data
+HostT.HostP.dat <- merge(HostT.dat, HostP.dat, by=0)
+
+
+
+## ###############################################
+##
+##  perform linke analysis 
+##
+## ###############################################
+cat("Performing link analysis : \n", file=log.file, append=T)
+# identify HostT-HostP module pairs 
+# HostT.HostP.modPairs <- strsplit((mediation.res %>% dplyr::filter(ACME.p <= ACME.p.co))$Treat_Mediator_Y,"_",fixed = T)
+ACME.p.co = 0.10
+HostT.HostP.modPairs <-(mediation.res %>% dplyr::filter(ACME.p <= ACME.p.co))$Treat_Mediator_Y
+
+# identify links
+links <- NULL
+for(mdp in HostT.HostP.modPairs){
+  # mdp = HostT.HostP.modPairs[1]
+  
+  
+  parts = strsplit(mdp,"_", fixed = T)[[1]]
+  
+  
+  
+  #cat(paste("\n\nAnalyzing module pairs: ", parts[1], "_",parts[2],"\n", sep = ""), file=log.file, append=T)
+  i_mdp <- which(HostT.HostP.modPairs == mdp)
+  if(i_mdp %% 100 == 0) cat(paste(" -------------------- progress: ", i_mdp, " out of ", length(HostT.HostP.modPairs)," module pairs -------------------------- \n", sep = ""), file=log.file, append=T)
+  
+  
+  
+  
+  
+  
+  # identify hostp associated gene
+  hostp.pro = sub("ME(.*)$","\\1",sub("HostP\\.(.*)$","\\1",parts[2]) )
+  hostp.gene = Protein.Gene_list[[hostp.pro]]
+  
+  # identify hostt associated gene 
+  # identify hostt associated gene by module assignment
+  hostt.md = sub("ME(.*)$","\\1",sub("HostT\\.(.*)$","\\1",parts[1]) )
+  hostt.ftr = HostT_module.feature$Feature[HostT_module.feature$Module == hostt.md]
+  
+  # identify hostt associated gene through pathway enrichment analysis
+  pathwy.pvalue <- NULL
+  for(pthy in names(Pthwy.genes_list)[!grepl("^GENEGO", names(Pthwy.genes_list))]){
+    #pthy = names(Pthwy.genes_list)[!grepl("^GENEGO", names(Pthwy.genes_list))][1]
+    
+    pthy.genes = Pthwy.genes_list[[pthy]]
+    overlaped = intersect(pthy.genes, hostt.ftr)
+    totalgene.no = ncol(HostT.dat)
+    
+    pvalue <-dhyper(x=length(overlaped), m=length(pthy.genes), n= totalgene.no - length(pthy.genes), k = length(hostt.ftr))
+    p.vec_temp <-  c(pthy, pvalue)
+    names(p.vec_temp) <- c("Pathway","dhyper.pvalue")
+    pathwy.pvalue <- bind_rows(pathwy.pvalue, p.vec_temp)
+  }
+  pathwy.pvalue <- pathwy.pvalue %>% 
+    mutate(fdr = p.adjust(dhyper.pvalue, method = "fdr"))  %>% 
+    dplyr::arrange(fdr) %>% filter(fdr <= 0.25)
+  
+  if(nrow(pathwy.pvalue) == 0) {
+    hostt.ftr.integrated <- hostt.ftr
+  } else {
+    hostt.ftr.pthwy = Pthwy.genes_list[[pathwy.pvalue$Pathway[1]]]
+    hostt.ftr.integrated <- unique(c(hostt.ftr,hostt.ftr.pthwy ))
+  } 
+  
+  
+  if(length( intersect(hostt.ftr.integrated, hostp.gene)) > 0) {
+    link = c(paste(parts[1], parts[2], sep = "_" ), paste(intersect(hostt.ftr.integrated, hostp.gene), collapse = ";") )
+    names(link) <- c("HostT.module_HostP.protein_pair", "Genes.association")
+    links <- bind_rows(links,link)
+    remove(link)
+  }  
+  
+} # loop through module pairs  
+
+
+if(!dir.exists(output.dir)) dir.create(output.dir)
+write.table(links, file = paste(output.dir,"/HostT.module_HostP.protein.NEU.linked.txt",sep = ""), sep = "\t", quote = F, row.names = F)