Switch to unified view

a b/Functions/clusterProfiler.enricher.R
1
#### 使用clusterProfiler进行通路富集分析
2
3
#' @param gene 字符向量,感兴趣的基因集合, 如"FUT7"         "AIRE"         "IFI35"
4
#' @param geneType 字符型向量,指明输入的geneList中name属性类型,即基因id的类型
5
#' @param db.type 字符型向量,指明GSEA针对的数据库
6
#' @param saveDir gsea输入目录
7
#' @param pAdjustMethod, pvalueCutoff p值矫正方法和阈值
8
#' @param MsigDB.category 指定特定的MsigDB中的数据库进行GSEA分析
9
#' MsigDB.pathway, msigdbr(species = "Homo sapiens", category = i), 指定i为H, C1, C2, C3, C4, C5, C6, C7, C8
10
# 通过gs_cat gs_subcat,来选择库
11
#' @param GO.ont 指定GSEA分析GO数据库时使用的条目类型,支持"BP", "MF", and "CC",以及"ALL"
12
13
#' 功能富集分析的原理是基于hypergeometric test
14
#' clusterProfiler主要基于EntrezID做分析
15
#' 分析过程中的universe,即背景基因的数目,是跟你选择的db中的subcategory有关系的,而不是固定不变的
16
cluterProfiler.enricher <- function(gene, geneType = c("ENTREZID", "SYMBOL", "ENSEMBL"),
17
                                    db.type = c("MsigDB", "GO", "KEGG"),
18
                                    saveDir, title,
19
                                    pAdjustMethod = "BH",
20
                                    qvalueCutoff = 0.2,
21
                                    pvalueCutoff = 0.05,
22
                                    MsigDB.category = list(H = c("All"), C2 = c("BIOCARTA", "KEGG", "REACTOME"), C5 = c("BP")),
23
                                    GO.ont = "BP", simplify = T,
24
                                    minGSSize = 10, maxGSSize = 500,
25
                                    showCategory = 20
26
                                    ){
27
  require(enrichplot)
28
  require(clusterProfiler)
29
  require(tidyverse)
30
31
  
32
  geneType <- match.arg(geneType)
33
  db.type <- match.arg(db.type)  
34
  # 如果基因名字类型不是SYMBOL,则需要进行ID转换
35
  if(geneType == "ENTREZID"){
36
    cat("不需要转换\n")
37
    geneList <- gene
38
  }
39
  if(geneType %in% c("SYMBOL", "ENSEMBL")){
40
    genes <- IDConvert(genes = gene, fromType = geneType, toType = "ENTREZID")
41
    cat("转换前基因数目:", length(gene), "\n")
42
    index <- match(gene, genes[,1])
43
    geneList <- as.character(genes[na.omit(index),2])
44
    cat("转换后基因数目:", length(geneList), "\n")    
45
  }
46
  
47
  ## 第一种富集策略:MsigDB基因集合
48
  if(db.type == "MsigDB"){
49
      library(msigdbr)
50
    
51
      # 通路提取
52
      MsigDB.pathway <- tibble(gs_name = NULL, entrez_gene = NULL)
53
      
54
      # 将MsigDB所有基因作为背景基因
55
      #MsigDB.gene <- unique(msigdbr(species = "Homo sapiens") %>% dplyr::select(gs_name,entrez_gene))
56
      
57
      if(class(MsigDB.category) == "character"){
58
        MsigDB.pathway <- msigdbr(species = "Homo sapiens") %>% dplyr::select(gs_name, entrez_gene)
59
      }else{
60
        for(i in names(MsigDB.category)){
61
          if(MsigDB.category[[i]][1] == "All"){
62
            MsigDB.pathway <- rbind(MsigDB.pathway, msigdbr(species = "Homo sapiens", category = i) %>% dplyr::select(gs_name, entrez_gene))
63
          }else{
64
            for(j in MsigDB.category[[i]]){
65
              MsigDB.pathway <- rbind(MsigDB.pathway, msigdbr(species = "Homo sapiens", category = i, subcategory = j) %>% dplyr::select(gs_name, entrez_gene))
66
            }
67
          }
68
        }
69
      }
70
      em.res <- enricher(geneList, TERM2GENE = MsigDB.pathway, qvalueCutoff = qvalueCutoff, pvalueCutoff = pvalueCutoff, pAdjustMethod = pAdjustMethod, minGSSize = minGSSize, maxGSSize = maxGSSize)
71
  }
72
  
73
  if(db.type == "GO"){
74
    em.res <- enrichGO(gene = geneList,
75
                    OrgDb = org.Hs.eg.db,
76
                    ont = GO.ont,minGSSize = minGSSize, maxGSSize = maxGSSize,
77
                    pAdjustMethod = pAdjustMethod,
78
                    pvalueCutoff  = pvalueCutoff,
79
                    qvalueCutoff  = qvalueCutoff)
80
    #基于simplify method, we can remove redundant terms
81
    if(simplify){
82
      em.res <- simplify(em.res, cutoff = 0.7, by="p.adjust", select_fun = min)
83
    }
84
  }
85
  
86
  if(db.type == "KEGG"){
87
    em.res <- enrichKEGG(gene = geneList, organism = "hsa", keyType = "kegg",
88
                       pAdjustMethod = pAdjustMethod, minGSSize = minGSSize, maxGSSize = maxGSSize,
89
                       pvalueCutoff  = pvalueCutoff,
90
                       qvalueCutoff  = qvalueCutoff)
91
  }
92
  #转换为geneSymbol
93
  em.res.geneSymbol <- setReadable(em.res, org.Hs.eg.db, keyType = "ENTREZID")
94
    
95
  # if(nrow(em.res@result) > 0){
96
  #   write.csv(em.res.geneSymbol, file = file.path(saveDir, paste0(title, "_", "enricherResult.csv")))
97
  #   tryCatch({
98
  #     p1 <- barplot(em.res.geneSymbol, showCategory = showCategory, main = title) + theme(axis.text.x = element_text(size=4), axis.text.y = element_text(size=4))
99
  #     print(p1)
100
  #     p2 <- dotplot(em.res.geneSymbol, showCategory = showCategory, title = title) + theme(axis.text.x = element_text(size=4), axis.text.y = element_text(size=4))
101
  #     print(p2)
102
  #     p3 <- heatplot(em.res.geneSymbol, showCategory = showCategory) + theme(axis.text.x = element_text(size=4), axis.text.y = element_text(size=4))
103
  #     print(p3)
104
  #   }, warning = function(w){
105
  #     # 这里是出现warning状态时,应该怎么做,可以用print打印出来,可以执行其它命令
106
  #     cat("Warning in plot!\n")
107
  #   }, error = function(e){
108
  #     # 这里时出现Error状态时,应该怎么做,可以用print打印出来,也可以执行其它命令
109
  #     cat("Error in plot!\n")
110
  #   },finally = {
111
  #     # 这是运行正常时,应该怎么做,可以用print打印出来,也可以执行其它命令
112
  #     cat("Plot complete!\n")
113
  #   })
114
  # }
115
  return(em.res = list(em.res.entrezid = em.res, em.res.genesymbol = em.res.geneSymbol))
116
}
117
118
119
##### ID转换
120
#author: Zhilin long
121
#time: 2021.1.19
122
123
#' @author Zhilin Long
124
#' @param gene 字符型向量,需要转换的ID向量
125
#' @param fromType 字符向量,初始的ID类型
126
#' @param toType 字符向量,需要转换的ID类型
127
#' 
128
#' @return data.frame 已经去除了重复和NA,可直接使用
129
IDConvert <- function(genes, 
130
                      method = c("clusterProfiler", "org.Hs.eg.db"),
131
                      fromType = c("ENTREZID", "SYMBOL", "ENSEMBL"), 
132
                      toType = c("ENTREZID", "SYMBOL", "ENSEMBL")
133
                      ){
134
    require(org.Hs.eg.db)
135
    require(clusterProfiler)
136
137
    #规范参数输入
138
    method <- match.arg(method)
139
    fromType <- match.arg(fromType)
140
    toType <- match.arg(toType)
141
142
    if(method == "clusterProfiler"){
143
        gene.df <- bitr(genes, fromType = fromType, toType = toType, OrgDb = "org.Hs.eg.db")
144
        #默认会过滤没有map到的id,即条目会减少
145
    }else{
146
        gene.df <- mapIds(org.Hs.eg.db, 
147
                          keys = genes, 
148
                          column = toType, 
149
                          keytype = fromType,
150
                          multiVals="first")
151
        gene.df <- data.frame(ENSEMBL = names(gene.df), SYMBOL = gene.df)
152
        colnames(gene.df) <- c(fromType, toType)
153
        #没有map,则为NA,即条目不变
154
        gene.df <- na.omit(gene.df) #去除NA的行
155
    }
156
157
    #去除重复, 
158
    gene.df <- gene.df[!duplicated(gene.df[,2]),]
159
    gene.df <- gene.df[!duplicated(gene.df[,1]),]
160
    return(gene.df)
161
}