Switch to side-by-side view

--- a
+++ b/Functions/clusterProfiler.enricher.R
@@ -0,0 +1,161 @@
+#### 使用clusterProfiler进行通路富集分析
+
+#' @param gene 字符向量,感兴趣的基因集合, 如"FUT7"         "AIRE"         "IFI35"
+#' @param geneType 字符型向量,指明输入的geneList中name属性类型,即基因id的类型
+#' @param db.type 字符型向量,指明GSEA针对的数据库
+#' @param saveDir gsea输入目录
+#' @param pAdjustMethod, pvalueCutoff p值矫正方法和阈值
+#' @param MsigDB.category 指定特定的MsigDB中的数据库进行GSEA分析
+#' MsigDB.pathway, msigdbr(species = "Homo sapiens", category = i), 指定i为H, C1, C2, C3, C4, C5, C6, C7, C8
+# 通过gs_cat gs_subcat,来选择库
+#' @param GO.ont 指定GSEA分析GO数据库时使用的条目类型,支持"BP", "MF", and "CC",以及"ALL"
+
+#' 功能富集分析的原理是基于hypergeometric test
+#' clusterProfiler主要基于EntrezID做分析
+#' 分析过程中的universe,即背景基因的数目,是跟你选择的db中的subcategory有关系的,而不是固定不变的
+cluterProfiler.enricher <- function(gene, geneType = c("ENTREZID", "SYMBOL", "ENSEMBL"),
+                                    db.type = c("MsigDB", "GO", "KEGG"),
+                                    saveDir, title,
+                                    pAdjustMethod = "BH",
+                                    qvalueCutoff = 0.2,
+                                    pvalueCutoff = 0.05,
+                                    MsigDB.category = list(H = c("All"), C2 = c("BIOCARTA", "KEGG", "REACTOME"), C5 = c("BP")),
+                                    GO.ont = "BP", simplify = T,
+                                    minGSSize = 10, maxGSSize = 500,
+                                    showCategory = 20
+                                    ){
+  require(enrichplot)
+  require(clusterProfiler)
+  require(tidyverse)
+
+  
+  geneType <- match.arg(geneType)
+  db.type <- match.arg(db.type)  
+  # 如果基因名字类型不是SYMBOL,则需要进行ID转换
+  if(geneType == "ENTREZID"){
+    cat("不需要转换\n")
+    geneList <- gene
+  }
+  if(geneType %in% c("SYMBOL", "ENSEMBL")){
+    genes <- IDConvert(genes = gene, fromType = geneType, toType = "ENTREZID")
+    cat("转换前基因数目:", length(gene), "\n")
+    index <- match(gene, genes[,1])
+    geneList <- as.character(genes[na.omit(index),2])
+    cat("转换后基因数目:", length(geneList), "\n")    
+  }
+  
+  ## 第一种富集策略:MsigDB基因集合
+  if(db.type == "MsigDB"){
+      library(msigdbr)
+    
+      # 通路提取
+      MsigDB.pathway <- tibble(gs_name = NULL, entrez_gene = NULL)
+      
+      # 将MsigDB所有基因作为背景基因
+      #MsigDB.gene <- unique(msigdbr(species = "Homo sapiens") %>% dplyr::select(gs_name,entrez_gene))
+      
+      if(class(MsigDB.category) == "character"){
+        MsigDB.pathway <- msigdbr(species = "Homo sapiens") %>% dplyr::select(gs_name, entrez_gene)
+      }else{
+        for(i in names(MsigDB.category)){
+          if(MsigDB.category[[i]][1] == "All"){
+            MsigDB.pathway <- rbind(MsigDB.pathway, msigdbr(species = "Homo sapiens", category = i) %>% dplyr::select(gs_name, entrez_gene))
+          }else{
+            for(j in MsigDB.category[[i]]){
+              MsigDB.pathway <- rbind(MsigDB.pathway, msigdbr(species = "Homo sapiens", category = i, subcategory = j) %>% dplyr::select(gs_name, entrez_gene))
+            }
+          }
+        }
+      }
+      em.res <- enricher(geneList, TERM2GENE = MsigDB.pathway, qvalueCutoff = qvalueCutoff, pvalueCutoff = pvalueCutoff, pAdjustMethod = pAdjustMethod, minGSSize = minGSSize, maxGSSize = maxGSSize)
+  }
+  
+  if(db.type == "GO"){
+    em.res <- enrichGO(gene = geneList,
+                    OrgDb = org.Hs.eg.db,
+                    ont = GO.ont,minGSSize = minGSSize, maxGSSize = maxGSSize,
+                    pAdjustMethod = pAdjustMethod,
+                    pvalueCutoff  = pvalueCutoff,
+                    qvalueCutoff  = qvalueCutoff)
+    #基于simplify method, we can remove redundant terms
+    if(simplify){
+      em.res <- simplify(em.res, cutoff = 0.7, by="p.adjust", select_fun = min)
+    }
+  }
+  
+  if(db.type == "KEGG"){
+    em.res <- enrichKEGG(gene = geneList, organism = "hsa", keyType = "kegg",
+                       pAdjustMethod = pAdjustMethod, minGSSize = minGSSize, maxGSSize = maxGSSize,
+                       pvalueCutoff  = pvalueCutoff,
+                       qvalueCutoff  = qvalueCutoff)
+  }
+  #转换为geneSymbol
+  em.res.geneSymbol <- setReadable(em.res, org.Hs.eg.db, keyType = "ENTREZID")
+    
+  # if(nrow(em.res@result) > 0){
+  #   write.csv(em.res.geneSymbol, file = file.path(saveDir, paste0(title, "_", "enricherResult.csv")))
+  #   tryCatch({
+  #     p1 <- barplot(em.res.geneSymbol, showCategory = showCategory, main = title) + theme(axis.text.x = element_text(size=4), axis.text.y = element_text(size=4))
+  #     print(p1)
+  #     p2 <- dotplot(em.res.geneSymbol, showCategory = showCategory, title = title) + theme(axis.text.x = element_text(size=4), axis.text.y = element_text(size=4))
+  #     print(p2)
+  #     p3 <- heatplot(em.res.geneSymbol, showCategory = showCategory) + theme(axis.text.x = element_text(size=4), axis.text.y = element_text(size=4))
+  #     print(p3)
+  #   }, warning = function(w){
+  #     # 这里是出现warning状态时,应该怎么做,可以用print打印出来,可以执行其它命令
+  #     cat("Warning in plot!\n")
+  #   }, error = function(e){
+  #     # 这里时出现Error状态时,应该怎么做,可以用print打印出来,也可以执行其它命令
+  #     cat("Error in plot!\n")
+  #   },finally = {
+  #     # 这是运行正常时,应该怎么做,可以用print打印出来,也可以执行其它命令
+  #     cat("Plot complete!\n")
+  #   })
+  # }
+  return(em.res = list(em.res.entrezid = em.res, em.res.genesymbol = em.res.geneSymbol))
+}
+
+
+##### ID转换
+#author: Zhilin long
+#time: 2021.1.19
+
+#' @author Zhilin Long
+#' @param gene 字符型向量,需要转换的ID向量
+#' @param fromType 字符向量,初始的ID类型
+#' @param toType 字符向量,需要转换的ID类型
+#' 
+#' @return data.frame 已经去除了重复和NA,可直接使用
+IDConvert <- function(genes, 
+                      method = c("clusterProfiler", "org.Hs.eg.db"),
+                      fromType = c("ENTREZID", "SYMBOL", "ENSEMBL"), 
+                      toType = c("ENTREZID", "SYMBOL", "ENSEMBL")
+                      ){
+    require(org.Hs.eg.db)
+    require(clusterProfiler)
+
+    #规范参数输入
+    method <- match.arg(method)
+    fromType <- match.arg(fromType)
+    toType <- match.arg(toType)
+
+    if(method == "clusterProfiler"){
+        gene.df <- bitr(genes, fromType = fromType, toType = toType, OrgDb = "org.Hs.eg.db")
+        #默认会过滤没有map到的id,即条目会减少
+    }else{
+        gene.df <- mapIds(org.Hs.eg.db, 
+                          keys = genes, 
+                          column = toType, 
+                          keytype = fromType,
+                          multiVals="first")
+        gene.df <- data.frame(ENSEMBL = names(gene.df), SYMBOL = gene.df)
+        colnames(gene.df) <- c(fromType, toType)
+        #没有map,则为NA,即条目不变
+        gene.df <- na.omit(gene.df) #去除NA的行
+    }
+
+    #去除重复, 
+    gene.df <- gene.df[!duplicated(gene.df[,2]),]
+    gene.df <- gene.df[!duplicated(gene.df[,1]),]
+    return(gene.df)
+}
\ No newline at end of file