--- a
+++ b/4-Multi-Omic Integration/scripts/2.significant.hostP.features.r
@@ -0,0 +1,107 @@
+# the required input file: 
+# 1) Proteomic feature abundance (sputum_cyto.txt): An abundance table with proteins in rows and samples in columns.
+# 2) Meta data prepared in txt file (meta.txt), containing a column named SampleID and a column indicating disease state.
+
+# output files generated:
+# "2_significant_HostP_features.RData" 
+
+library(data.table)
+library(dplyr)
+
+HostP.data <- data.frame(fread("sputum_cyto.txt"), row.names = 1 )
+
+m = HostP.data
+feature.abb_df <- cbind.data.frame(feature = rownames(m),
+                                   abb = paste("feature",seq(1,nrow(m),1),sep = ""),
+                                   stringsAsFactors = F)
+rownames(m) <- sapply(rownames(m), function(x) feature.abb_df$abb[which(feature.abb_df$feature == x)])
+m <- t(m) %>% as.data.frame(stringsAsFactors=F)
+
+
+diseaseState = 'COPD'
+meta_df <- fread("meta.txt",data.table = F)
+colnames(meta_df)[colnames(meta_df) == diseaseState ] <- "Y"
+
+
+sig.modules <- vector("character")
+for(md in colnames(m)){
+  m.t_sub <- m %>% dplyr::select(all_of(md))
+  # if(all(m.t_sub == 0)) next
+  dat <- merge(meta_df, m.t_sub, by.x="SampleID", by.y=0,all = T) %>% dplyr::select(-SampleID)
+  colnames(dat)[which(colnames(dat) == md)] <- "mod"
+  #print(md)
+  
+  colnames(dat)
+  glm.md <- glm(as.formula(paste("Y ~ ", paste(colnames(dat)[colnames(dat) != "Y"], collapse = " + "  ), sep = "" )),
+                data = dat, family = "binomial")
+  
+  tmp <- summary(glm.md)$coefficients %>% as.data.frame()
+  if( !("mod" %in% rownames(tmp)) ) next
+  if(tmp$`Pr(>|z|)`[which(rownames(tmp) == "mod")] <= 0.05)  sig.modules <- append(sig.modules, md)
+}
+
+if(all(grepl("^feature", colnames(m)))) sig.modules <- sapply(sig.modules, function(x) feature.abb_df$feature[which(feature.abb_df$abb == x)])
+
+HostP.sigFeatures <- sig.modules
+
+
+## ################################################################################################
+# calculate correlation between modules and NEU or EOS,
+# find significantly correlated modules and organize into HostT.sigMods.NEU and HostT.sigMods.EOS
+## ################################################################################################
+
+
+# load meta data --------------------------------------
+meta.NEU <- fread("source.data/meta.mediation.NEU.txt")
+meta.EOS <- fread("source.data/meta.mediation.EOS.txt")
+
+# merge data and calculate correlation between module and inflammatory feature ------------------------
+meta <- merge(meta.NEU, meta.EOS, by="SampleID",all = T) %>% select(SampleID, NEU, EOS)
+# sampleID 不匹配，meta数据里面sampleID加上X
+meta$SampleID[grepl("^\\d", meta$SampleID)] <- paste("X",meta$SampleID[grepl("^\\d", meta$SampleID)],sep = "")
+
+HostP.data <- m 
+
+rp.res <- NULL
+for(df.name in c("HostP.data")){
+
+  omic.dat <- eval(parse(text = df.name))
+  dat<-merge(omic.dat, meta, by.x=0, by.y="SampleID") %>% tibble::column_to_rownames("Row.names")
+  
+  for(clin.var in c("NEU","EOS")){
+    # clin.var = "NEU"
+    
+    i.clin <- which(colnames(dat) == clin.var)
+    
+    for(i in c(1:ncol(omic.dat))){
+      Mod.name = colnames(dat)[i]
+      dat.sub <- dat[,c(i,i.clin)]
+      dat.sub <- dat.sub[complete.cases(dat.sub),]
+      Cor = cor.test(dat.sub[,1], dat.sub[,2], method = "spearman")
+      r = Cor$estimate
+      p = Cor$p.value
+      
+      vec <- c(df.name, Mod.name, clin.var, r, p)
+      names(vec) <- c("Omic","ModuleName","ClinicVar","r","p")
+      
+      rp.res <- bind_rows(rp.res, vec)
+    }# loop through Modules
+  }# loop through NEU and EOS
+}# loop through omic data
+
+HostP.sigFeatures.NEU <- 
+  (rp.res %>% 
+     filter(grepl("HostP", Omic)) %>% filter(ClinicVar == "NEU") %>%
+     mutate(padj=p.adjust(p)) %>%
+     filter(padj <= 0.1))$ModuleName
+HostP.sigFeatures.NEU <- intersect(HostP.sigFeatures.NEU, HostP.sigFeatures)
+
+HostP.sigFeatures.EOS <- 
+  (rp.res %>% 
+     filter(grepl("HostP", Omic)) %>% filter(ClinicVar == "EOS") %>%
+     mutate(padj=p.adjust(p)) %>%
+     filter(padj <= 0.1))$ModuleName
+HostP.sigFeatures.EOS <- intersect(HostP.sigFeatures.EOS, HostP.sigFeatures)
+
+
+save(HostP.sigFeatures.NEU, HostP.sigFeatures.EOS, file = "2_significant_HostP_features.RData")