a b/R/enrichment_functions.R
1
#' Get interaction from ORA enrichment analysis
2
#'
3
#' Returns results of an ORA analysis as an interaction graph
4
#'
5
#' @param query a vector (or a list) of character with the ID to perform
6
#'      the ORA analysis
7
#' @param sources (optional) a character in
8
#'    (GO, KEGG, REAC, TF, MIRNA, CORUM, HP, HPA, WP)
9
#' @param organism (optional) a character (default = 'hsapiens')
10
#' @param signif.value (optional) a logical, default = ''
11
#'
12
#' @return
13
#' a graph object (or list of graph) containing the interaction between
14
#' the query and the target terms.
15
#'
16
#' @seealso \code{\link[gprofiler2]{gost}}  \code{\link[gprofiler2]{gconvert}}
17
#'
18
#' @examples
19
#' query <- c('IL15', 'CDHR5', 'TGFA', 'C4B')
20
#' get_interaction_from_ORA(query,
21
#'                          sources = 'GO')
22
#'
23
#' query <- list('All' = c('IL15', 'CDHR5', 'TGFA', 'C4B'),
24
#'               'c1' = c('IL15', 'CDHR5', 'TGFA'))
25
#' get_interaction_from_ORA(query,
26
#'                          sources = 'GO')
27
#'
28
#' @importFrom gprofiler2 gost gconvert
29
#' @importFrom dplyr pull select filter
30
#' @export
31
get_interaction_from_ORA <- function(query,
32
                                     sources = "GO",
33
                                     organism = "hsapiens",
34
                                     signif.value = TRUE) {
35
  # validate query (char)
36
  if (is(query, "list")) {
37
    query <- lapply(query,
38
                    function(x)
39
                      check_vector_char(x,
40
                                        var.name = "'query '"))
41
    if (is.null(names(query))) {
42
      names(query) <- seq_along(query)
43
    }
44
  } else {
45
    query <- check_vector_char(query)
46
  }
47
  
48
  # check organism
49
  organism = check_vector_char(
50
    X = organism,
51
    X.length = 1,
52
    default = "hsapiens",
53
    var.name = "'organism' "
54
  )
55
  
56
  # check source
57
  sources <- match.arg(
58
    arg = sources,
59
    choices = c("GO", "KEGG", "REAC", "TF", "MIRNA", "CORUM", "HP",
60
                "HPA", "WP"),
61
    several.ok = FALSE
62
  )
63
  sources <-
64
    check_vector_char(sources, default = "GO")  # default value
65
  
66
  # check signif
67
  signif.value <- return_true_false(signif.value, default = TRUE)
68
  
69
  if (is(query, "list")) {
70
    res.ora <- list()
71
    term_map <- list()
72
    res.graph <- list()
73
    for (i in names(query)) {
74
      res.ora[[i]] <- get_ORA(query = query[[i]],
75
                              sources = sources,
76
                              organism = organism)
77
      
78
      term_map_tmp <- gprofiler2::gconvert(query = query[[i]],
79
                                           organism = organism,
80
                                           target = sources)
81
      
82
      target_id <- (
83
        res.ora[[i]] %>%
84
          dplyr::filter(significant == signif.value) %>%
85
          dplyr::pull(term_id)
86
      )
87
      
88
      term_map[[i]] <- term_map_tmp %>%
89
        dplyr::filter(target %in% target_id) %>%
90
        dplyr::select(input, target) %>%
91
        unique %>%
92
        na.omit()
93
      
94
      res.graph[[i]] <-
95
        igraph::graph_from_data_frame(term_map[[i]],
96
                                      directed = FALSE)
97
      res.graph[[i]] <- set_vertex_attr(
98
        graph = res.graph[[i]],
99
        name = "mode",
100
        index = term_map[[i]]$input,
101
        value = "core"
102
      )
103
      res.graph[[i]] <- set_vertex_attr(
104
        graph = res.graph[[i]],
105
        name = "mode",
106
        index = term_map[[i]]$target,
107
        value = "extended"
108
      )
109
      class(res.graph) <-
110
        c("list.interaction.igraph", "list.igraph")
111
    }
112
  } else {
113
    # query is not a list
114
    res.ora <- get_ORA(query = query,
115
                       sources = sources,
116
                       organism = organism)
117
    
118
    term_map_tmp <- gprofiler2::gconvert(query = query,
119
                                         organism = organism,
120
                                         target = sources)
121
    
122
    target_id <- (res.ora %>%
123
                    dplyr::filter(significant == signif.value) %>%
124
                    dplyr::pull(term_id))
125
    
126
    if (is.null(term_map_tmp)) {
127
      return(NULL)
128
    }
129
    term_map <- term_map_tmp %>%
130
      dplyr::filter(target %in% target_id) %>%
131
      dplyr::select(input, target) %>%
132
      unique %>%
133
      na.omit()
134
    
135
    res.graph <-
136
      igraph::graph_from_data_frame(term_map, directed = FALSE)
137
    res.graph <- set_vertex_attr(
138
      graph = res.graph,
139
      name = "mode",
140
      index = term_map$input,
141
      value = "core"
142
    )
143
    res.graph <- set_vertex_attr(
144
      graph = res.graph,
145
      name = "mode",
146
      index = term_map$target,
147
      value = "extended"
148
    )
149
    
150
    class(res.graph) <- c("interaction.igraph", "igraph")
151
    
152
  }
153
  return(res.graph)
154
}
155
156
157
#' ORA enrichment analysis
158
#'
159
#' Returns results of an ORA analysis
160
#'
161
#' @param query a vector of character, a lit of ID
162
#' @param sources a character or list of character
163
#' @param organism a character (default = 'hsapiens')
164
#'
165
#' @return
166
#' a data.frame containing the enrichment result
167
#'
168
#' @seealso \code{\link[gprofiler2]{gost}}
169
#'
170
#' @importFrom gprofiler2 gost
171
get_ORA <- function(query,
172
                    sources = NULL,
173
                    organism = "hsapiens") {
174
  if (is(query, "list")) {
175
    res <- list()
176
    for (i in names(query)) {
177
      ORA <- gprofiler2::gost(
178
        query = query[[i]],
179
        organism = organism,
180
        significant = FALSE,
181
        sources = sources,
182
        multi_query = FALSE
183
      )
184
      ORA.res <- ORA$result
185
      if (!is.null(ORA.res)) {
186
        ORA.res <- ORA.res %>%
187
          mutate(cluster = i) %>%
188
          dplyr::select(
189
            "cluster",
190
            "term_id",
191
            "source",
192
            "term_name",
193
            "p_value",
194
            "significant",
195
            "term_size",
196
            "query_size",
197
            "intersection_size",
198
            "precision",
199
            "recall"
200
          )
201
        res[[i]] <- ORA.res
202
      }
203
    }
204
    RES <- purrr::map_dfr(res, ~ .x)
205
    
206
  } else {
207
    ORA <- gprofiler2::gost(
208
      query = query,
209
      organism = organism,
210
      significant = FALSE,
211
      sources = sources,
212
      multi_query = FALSE
213
    )
214
    ORA.res <- ORA$result
215
    if (!is.null(ORA.res)) {
216
      ORA.res <- ORA.res %>%
217
        mutate(cluster = "All") %>%
218
        dplyr::select(
219
          "cluster",
220
          "term_id",
221
          "source",
222
          "term_name",
223
          "p_value",
224
          "significant",
225
          "term_size",
226
          "query_size",
227
          "intersection_size",
228
          "precision",
229
          "recall"
230
        )
231
      RES <- ORA.res
232
    } else {
233
      RES <- NULL
234
    }
235
  }
236
  return(RES)
237
}
238
239
#' Get GO info
240
#'
241
#' From a GO terms (GOID), return definition, ontology and term values
242
#'  from GO.db
243
#'
244
#' @param go a character, GO term
245
#'
246
#' @return
247
#' a data.frame with the following columns: 'GOID', 'DEFINITION',
248
#' 'ONTOLOGY', 'TERM'
249
#'
250
#' @import GO.db
251
#' @importFrom AnnotationDbi keytypes
252
get_go_info <- function(go) {
253
  res <- AnnotationDbi::select(
254
    x = GO.db,
255
    keys = go,
256
    keytype = "GOID",
257
    columns = keytypes(GO.db)
258
  )
259
  return(res)
260
}