a b/R/feature_processing.R
1
#' Wrapper to extract features (nodes) from given pathways.
2
#'
3
#' Function to extract the features (nodes) from a given list of pathways. These
4
#' pathways have to be compiled with the
5
#' \code{\link[graphite:pathways]{pathways}} function. Features can only be
6
#' extracted for \'proteins\' or \'metabolites\'. Features will by default be
7
#' mapped to gene symbols.
8
#'
9
#' @param pathway A pathway created with \code{\link[graphite:pathways]{pathways}} command.
10
#' @param which Mode to extract the features, either \'proteins\' or
11
#'   \'metabolites\'.
12
#' @param org String specifying the organism, which is necessary for featureID
13
#'   mapping. Default: human
14
#' @param returntype String that specifies the returning ID type. Default:
15
#'   SYMBOL Options (genes/proteins): SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ
16
#'   Options (metabolites): HMDB, CAS, DTXCID, DTXSID, SID, CID, ChEBI, KEGG, Drugbank
17
#'
18
#' @return Feature list with gene symbols (genes/proteins) or CHEBI IDs
19
#'   (metabolites)
20
#'
21
#' @examples
22
#' pathways <- graphite::pathways("hsapiens", "kegg")[[1]]
23
#' getFeatures(pathways)
24
#' \donttest{
25
#' pathways <- graphite::pathways("mmusculus", "kegg")[[1]]
26
#' getFeatures(pathways, which = "metabolites", org = "mmusculus", returntype = "HMDB")
27
#'
28
#' pathways <- graphite::pathways("mmusculus", "kegg")[[1]]
29
#' getFeatures(pathways, which = "proteins", org = "mmusculus", returntype = "SYMBOL")
30
#' }
31
#'
32
#' @importFrom graphite nodes
33
#'
34
#' @export
35
getFeatures <- function(pathway, which = "proteins", org = "hsapiens", returntype = "SYMBOL") {
36
  if (which != "proteins" && which != "metabolites") {
37
    stop("Only 'proteins' and 'metabolites' are supported for 'which'.",
38
      call. = FALSE
39
    )
40
  }
41
42
  org <- tolower(org)
43
44
  ## check for the correct organism
45
  if (!(org %in% getOrganisms())) {
46
    stop("Please insert a correct organism name! Use getOrganisms()
47
              to see all supported organisms.",
48
      call. = FALSE
49
    )
50
  }
51
52
  ## extract the features (genes/proteins/metabolites) from the pathway nodes.
53
  features <- graphite::nodes(pathway, which = which)
54
55
  if (length(features) == 0) {
56
    return(list())
57
  }
58
59
  if (which == "proteins") {
60
    ## extract the keytype of the ID format
61
    kt <- gsub(":.*", "", features[1])
62
    mapped <- gsub("[A-Z]+:", "", features)
63
64
    ## get the mapping from the keytype to the user-specific type
65
    mapped <- getGeneMapping(
66
      features = mapped, keytype = kt,
67
      org = org, returntype = returntype
68
    )
69
  }
70
71
  ## its special for metabolites, because sometimes there are different
72
  ## identifiers used in the same pathway
73
  if (which == "metabolites") {
74
    chebi <- mapIDType(
75
      features = features, keytype = "CHEBI",
76
      maptype = "ChEBI", returntype = returntype
77
    )
78
79
    kegg <- mapIDType(
80
      features = features, keytype = "KEGGCOMP",
81
      maptype = "KEGG", returntype = returntype
82
    )
83
84
    pubchem <- mapIDType(
85
      features = features, keytype = "PUBCHEM",
86
      maptype = "CID", returntype = returntype
87
    )
88
89
    cas <- mapIDType(
90
      features = features, keytype = "CAS",
91
      maptype = "CAS", returntype = returntype
92
    )
93
94
    hmdb <- mapIDType(
95
      features = features, keytype = "HMDB",
96
      maptype = "HMDB", returntype = returntype
97
    )
98
99
    mapped <- c(chebi, kegg, pubchem, cas, hmdb)
100
  }
101
102
  return(mapped)
103
}
104
105
106
107
#' Mapping between pathway encoded genes/proteins and different ID formats.
108
#'
109
#' Function to retrieve the gene/protein identifier mapping. Ongoing from
110
#' genes/proteins retrieved from pathway definitions, which often include two or
111
#' more ID formats or a format that is not present in your omics measurement,
112
#' this function maps those IDs to a given format. Depending on the organism,
113
#' additional packages have to be installed.
114
#'
115
#' @param features List of identifiers to be mapped.
116
#' @param keytype String specifying the ID type, e.g., "ENTREZID" or "UNIPROT".
117
#' @param org String that defines the organism. Default: hsapiens
118
#'   Options: see \code{\link[multiGSEA]{getOrganisms}}
119
#' @param returntype String that specifies the returning ID type. Default:
120
#'   SYMBOL, Options: SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ
121
#'
122
#' @return List containing mapped gene/protein IDs.
123
#'
124
#' @examples
125
#' features <- graphite::nodes(graphite::pathways("hsapiens", "kegg")[[1]])
126
#' features <- gsub("ENTREZID:", "", features)
127
#' keytype <- "ENTREZID"
128
#' getGeneMapping(features, keytype)
129
#'
130
#' getGeneMapping(features, keytype, returntype = "UNIPROT")
131
#' \donttest{
132
#' features <- graphite::nodes(graphite::pathways("rnorvegicus", "reactome")[[1]])
133
#' features <- gsub("UNIPROT:", "", features)
134
#' getGeneMapping(features, keytype = "UNIPROT", org = "rnorvegicus")
135
#'
136
#' getGeneMapping(features,
137
#'   keytype = "UNIPROT",
138
#'   org = "rnorvegicus",
139
#'   returntype = "ENSEMBL"
140
#' )
141
#' }
142
#'
143
#' @importFrom AnnotationDbi select
144
#'
145
#' @export
146
getGeneMapping <- function(features, keytype, org = "hsapiens", returntype = "SYMBOL") {
147
  org <- tolower(org)
148
149
  ## check for the correct organism
150
  if (!(org %in% getOrganisms())) {
151
    stop("Please insert a correct organism name! Use getOrganisms()
152
              to see all supported organisms.",
153
      call. = FALSE
154
    )
155
  }
156
157
  db <- getIDMappingDatabase(org)
158
159
  supportedIDs <- c("SYMBOL", "ENTREZID", "UNIPROT", "ENSEMBL", "REFSEQ")
160
  if (org != "dmelanogaster" && !returntype %in% supportedIDs) {
161
    stop("Insert one of the following IDs to be returned (returntype):
162
              SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ.",
163
      call. = FALSE
164
    )
165
  }
166
167
  supportedIDs <- c(supportedIDs, "FLYBASE", "FLYBASECG")
168
  if (org == "dmelanogaster" && !returntype %in% supportedIDs) {
169
    stop("Insert one of the following IDs to be returned (returntype):
170
              SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ, FLYBASE, FLYBASECG.",
171
      call. = FALSE
172
    )
173
  }
174
175
  ## design the columns field such that we create a triple ID mapping between
176
  ## ENTREZIDs, UNIPROT, and gene symbols
177
  if (keytype == "UNIPROT") {
178
    col <- unique(c("SYMBOL", "ENTREZID", returntype))
179
  } else {
180
    col <- unique(c("SYMBOL", "UNIPROT", returntype))
181
  }
182
183
  ## run the actual mapping of IDS and return a list of the user-given type
184
  map <- tryCatch(
185
    {
186
      map <- AnnotationDbi::select(db,
187
        keys = features,
188
        columns = col, keytype = keytype
189
      )
190
      m <- match(unique(map[[keytype]]), map[[keytype]])
191
      map <- map[m, ]
192
      map[[returntype]][!is.na(map[[returntype]])]
193
    },
194
    error = function(cond) {
195
      return(list())
196
    }
197
  )
198
199
  return(map)
200
}
201
202
203
#' Mapping between pathway encoded metabolites and different metabolite ID
204
#' formats.
205
#'
206
#' Function to retrieve the metabolite identifier mapping. Ongoing from
207
#' metabolites retrieved from pathway definitions, which often include two or
208
#' more ID formats, this function maps those IDs to a given format. The complete
209
#' mapping table based on \href{https://comptox.epa.gov/dashboard}{Comptox
210
#' Dashboard}, \href{https://pubchem.ncbi.nlm.nih.gov/}{PubChem},
211
#' \href{https://hmdb.ca/}{HMDB}, and \href{https://www.ebi.ac.uk/chebi}{ChEBI}
212
#' is provided in the AnnotationHub package metaboliteIDmapping.
213
#'
214
#' @param features List of identifiers to be mapped.
215
#' @param keytype String specifying the ID type, e.g., "ChEBI" or "KEGGCOMP".
216
#'
217
#' @param returntype String that specifies the returning ID type.
218
#'        Default: HMDB
219
#'        Options: HMDB, CAS, DTXCID, DTXSID, SID, CID, ChEBI, KEGG, Drugbank
220
#'
221
#' @return List containing mapped gene/protein IDs.
222
#'
223
#' @examples
224
#' features <- graphite::nodes(graphite::pathways("hsapiens", "kegg")[[1]], which = "metabolites")
225
#' features <- gsub("KEGGCOMP:", "", features)
226
#' keytype <- "KEGG"
227
#'
228
#' getMetaboliteMapping(features, keytype)
229
#'
230
#' getMetaboliteMapping(features, keytype = "KEGG", returntype = "CID")
231
#'
232
#' @importFrom dplyr pull filter distinct
233
#' @importFrom metaboliteIDmapping metabolitesMapping
234
#' @importFrom magrittr %>%
235
#'
236
#' @export
237
getMetaboliteMapping <- function(features, keytype, returntype = "HMDB") {
238
  ## check for the correct metabolite mapping format
239
  supportedIDs <- c(
240
    "HMDB", "ChEBI", "KEGG", "CAS", "DTXCID",
241
    "DTXSID", "SID", "CID", "Drugbank"
242
  )
243
  if (!returntype %in% supportedIDs) {
244
    stop("Insert one of the following IDs to be returned (returntype):
245
              HMDB, CAS, ChEBI, KEGG, SID, CID, DTXCID, DTXSID, Drugbank, Name",
246
      call. = FALSE
247
    )
248
  }
249
250
  ## load the mapping table which is deposited in the
251
  ## metaboliteIDmapping package.
252
  if (!requireNamespace("metaboliteIDmapping", quietly = TRUE)) {
253
    stop("The necessary package metaboliteIDmapping is not installed.",
254
      call. = FALSE
255
    )
256
  }
257
258
259
  ## run the actual mapping of IDS and return a list of the user-given type
260
  map <- tryCatch(
261
    {
262
      ## to speed up the mapping, we need to subest the whole
263
      ## metabolitesIDmapping table in the first place to contain
264
      ## only thoses entries that match the given feature list
265
      SUBmappingTable <- metaboliteIDmapping::metabolitesMapping %>%
266
        dplyr::select(!!as.name(keytype), !!as.name(returntype)) %>%
267
        dplyr::filter(!!as.name(keytype) %in% unique(features)) %>%
268
        dplyr::distinct()
269
      colnames(SUBmappingTable) <- c("Original", "Mapped")
270
271
      SUBmappingTable %>% dplyr::pull("Mapped")
272
    },
273
    error = function(cond) {
274
      return(
275
        rep("NA", length(features))
276
      )
277
    }
278
  )
279
280
  map <- map[!is.na(map)]
281
  return(map)
282
}
283
284
285
286
#' Collect feature mapping for user given databases and omics layer.
287
#'
288
#' The functions makes use of the graphite R package to collect pathways from
289
#' user specified databases. Depending on the omics layer specified, the
290
#' function extracts either annotated genes/proteins (for transcriptome,
291
#' proteome layer) or metabolites (for metabolite layer). The data structure
292
#' that is returned is mandatory to calculate the multi-omics pathway
293
#' enrichment.
294
#'
295
#' @param dbs List of databases that should be queried for pathways. Default:
296
#'   all available databases
297
#' @param layer List of omics layer that should be addressed. Default: all three
298
#'   layer (transcriptome, proteome, metabolome)
299
#' @param returnTranscriptome String specifying the returned gene ID format.
300
#'   Default: SYMBOL Options: SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ
301
#' @param returnProteome String specifying the returned protein ID format.
302
#'   Default: SYMBOL Options: SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ
303
#' @param returnMetabolome String specifying the returned metabolite ID format.
304
#'   Default: HMDB Options: HMDB, CAS, DTXCID, DTXSID, SID, CID, ChEBI, KEGG, Drugbank
305
#' @param organism String specifying the organism of interest. This has direct
306
#'   influence on the available pathway databases. Default: "hsapiens"
307
#'   Options: see \code{\link[multiGSEA]{getOrganisms}}
308
#' @param useLocal Boolean to use local pathway/feature descriptions. In case
309
#'   useLocal is set to FALSE, pathway definitions and feature extraction
310
#'   will be recalculated. This could take several minutes depending on the
311
#'   database used. Pathbank, for example, contains nearly 50000 pathway
312
#'   definition that have to be re-mapped. useLocal has no effect when
313
#'   pathway definitions are retrieved for the first time. Default: TRUE
314
#'
315
#' @return Nested list with extracted and mapped pathway features.
316
#'
317
#' @examples
318
#'
319
#' getMultiOmicsFeatures(
320
#'   dbs = c("kegg"),
321
#'   layer = c("transcriptome", "proteome"),
322
#'   organism = "hsapiens"
323
#' )
324
#' \donttest{
325
#' getMultiOmicsFeatures(
326
#'   dbs = c("kegg", "reactome"),
327
#'   layer = c("transcriptome", "metabolome"),
328
#'   organism = "mmusculus"
329
#' )
330
#'
331
#' getMultiOmicsFeatures(
332
#'   dbs = c("reactome"),
333
#'   layer = c("proteome"),
334
#'   organism = "rnorvegicus",
335
#'   returnProteome = "ENTREZID"
336
#' )
337
#' }
338
#' @importFrom graphite pathwayDatabases pathways
339
#' @importFrom magrittr %>%
340
#' @importFrom dplyr filter pull
341
#' @importFrom rlang .data
342
#'
343
#' @export
344
getMultiOmicsFeatures <- function(dbs = c("all"), layer = c("all"),
345
                                  returnTranscriptome = "SYMBOL",
346
                                  returnProteome = "SYMBOL",
347
                                  returnMetabolome = "HMDB",
348
                                  organism = "hsapiens",
349
                                  useLocal = TRUE) {
350
  layers <- c("all", "metabolome", "proteome", "transcriptome")
351
  organism <- tolower(organism)
352
353
  returnTranscriptome <- toupper(returnTranscriptome)
354
  returnProteome <- toupper(returnProteome)
355
  returnMetabolome <- toupper(returnMetabolome)
356
  if(returnMetabolome == "CHEBI") returnMetabolome <- "ChEBI"
357
  if(returnMetabolome == "DRUGBANK") returnMetabolome <- "Drugbank"
358
359
  ## check for the correct transcriptome mapping format
360
  supportedIDs <- c("SYMBOL", "ENTREZID", "UNIPROT", "ENSEMBL", "REFSEQ")
361
  if (!returnTranscriptome %in% supportedIDs) {
362
    stop("Insert one of the following IDs to be returned (returnTranscriptome):
363
              SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ",
364
      call. = FALSE
365
    )
366
  }
367
368
  ## check for the correct proteome mapping format
369
  if (!returnProteome %in% supportedIDs) {
370
    stop("Insert one of the following IDs to be returned (returnProteome):
371
              SYMBOL, ENTREZID, UNIPROT, ENSEMBL, REFSEQ",
372
      call. = FALSE
373
    )
374
  }
375
376
  ## check for the correct metabolite mapping format
377
  supportedIDs <- c(
378
    "HMDB", "ChEBI", "KEGG", "CAS", "DTXCID",
379
    "DTXSID", "SID", "CID", "Drugbank"
380
  )
381
  if (!returnMetabolome %in% supportedIDs) {
382
    stop("Insert one of the following IDs to be returned (returnMetabolome):
383
              HMDB, CAS, ChEBI, KEGG, SID, CID, DTXCID, DTXSID, Drugbank",
384
      call. = FALSE
385
    )
386
  }
387
388
  ## check if the given organism is supported
389
  if (!(organism %in% getOrganisms())) {
390
    stop("You entered an organism that is not supported!
391
              Use getOrganisms() to get a list of all suported organisms.",
392
      call. = FALSE
393
    )
394
  }
395
396
  if (sum(tolower(layer) %in% layers) != length(layer)) {
397
    stop("You entered wrong input for the omics layer specification.
398
              Options are: all, transcriptome, proteome, metabolome, or a combination thereof.",
399
      call. = FALSE
400
    )
401
  }
402
403
  pDBs <- graphite::pathwayDatabases()
404
  dbs0 <- pDBs %>%
405
    dplyr::filter(.data$species == organism) %>%
406
    dplyr::pull(.data$database)
407
  databases <- c("all", as.vector(dbs0))
408
409
  if (sum(tolower(dbs) %in% databases) != length(dbs)) {
410
    stop(paste0("You entered wrong input for the omics layer specification.
411
              Options are: ", paste(databases, collapse = " "), " or a combination thereof."),
412
      call. = FALSE
413
    )
414
  }
415
416
  if ("all" %in% dbs) dbs <- as.vector(dbs0)
417
418
  if ("all" %in% layer) {
419
    layer <- c("transcriptome", "proteome", "metabolome")
420
  }
421
422
  pathways <- lapply(dbs, function(x) {
423
    graphite::pathways(organism, x)
424
  })
425
  names(pathways) <- dbs
426
427
  features <- list()
428
  if ("transcriptome" %in% layer) {
429
    features$transcriptome <- getMappedFeatures(
430
      pathways = pathways,
431
      organism = organism,
432
      returnID = returnTranscriptome,
433
      useLocal = useLocal
434
    )
435
436
    ## adapt for duplicated pathways
437
    names(features$transcriptome) <- rename_duplicates(names(features$transcriptome))
438
  }
439
440
  if ("proteome" %in% layer) {
441
    if ("transcriptome" %in% layer && returnProteome == returnTranscriptome) {
442
      features$proteome <- features$transcriptome
443
    } else {
444
      features$proteome <- getMappedFeatures(
445
        pathways = pathways,
446
        organism = organism,
447
        returnID = returnProteome,
448
        useLocal = useLocal
449
      )
450
451
      ## adapt for duplicated pathways
452
      names(features$proteome) <- rename_duplicates(names(features$proteome))
453
    }
454
  }
455
456
457
  if ("metabolome" %in% layer) {
458
    features$metabolome <- getMappedFeatures(
459
      pathways = pathways,
460
      organism = organism,
461
      returnID = returnMetabolome,
462
      which = "metabolites",
463
      useLocal = useLocal
464
    )
465
466
    ## adapt for duplicated pathways
467
    names(features$metabolome) <- rename_duplicates(names(features$metabolome))
468
  }
469
470
  return(features)
471
}
472
473
474
475
#' Wrapper to get feature mappings.
476
#'
477
#' Feature mappings will be used from hard disk in case they have been
478
#' mapped before and `useLocal` is not set to be FALSE.
479
#' In other cases, a feature extraction will be done and the results are
480
#' stored for a following occasion.
481
#'
482
#' @param pathways List of pathway definitions.
483
#' @param returnID String specifying the returned ID format.
484
#' @param organism String defining the organism of analysis.
485
#' @param which Mode to extract the features, either \'proteins\' or
486
#'   \'metabolites\'.
487
#' @param useLocal Boolean specifying whether or not to use the local
488
#'              preprocessed mapping.
489
#'
490
#' @return List of mapped features for an omics layer.
491
getMappedFeatures <- function(pathways, returnID = "SYMBOL", organism = "hsapiens", which = "proteins", useLocal = TRUE) {
492
  feat <- unlist(lapply(names(pathways), function(db) {
493
    ap <- archivePath(paste0(organism, "_", db, "_", returnID))
494
495
    if (file.exists(ap) && useLocal) {
496
      loadLocal(ap)
497
    } else {
498
      tmp <- lapply(pathways[[db]], function(p) {
499
        getFeatures(
500
          pathway = p, org = organism,
501
          which = which,
502
          returntype = returnID
503
        )
504
      })
505
506
      header <- rep(paste0("(", toupper(db), ") "), length(pathways[[db]]))
507
      names(tmp) <- paste0(header, names(tmp))
508
      saveRDS(tmp, file = ap)
509
      tmp
510
    }
511
  }), recursive = FALSE)
512
513
  return(feat)
514
}
515
516
517
518
#' Helper function to map only a subset of metabolite IDs
519
#'
520
#' This helper function becomes necessary since there are sometimes multiple ID
521
#' formats used in a single pathway definition.
522
#'
523
#' @param features List of metabolite feature IDs of the pathway.
524
#' @param keytype String specifying the ID format in pathway definition.
525
#' @param maptype String specifying the corresponding ID format in multiGSEA.
526
#' @param returntype String identifying the ID type that should be mapped.
527
#'
528
#' @return List of mapped metabolite IDs.
529
mapIDType <- function(features, keytype = "CHEBI", maptype = "ChEBI", returntype = "HMDB") {
530
  mapped <- c()
531
  ids <- gsub(paste0(keytype, ":"), "", features[grep(keytype, features)])
532
533
  if (returntype != maptype) {
534
    mapped <- getMetaboliteMapping(
535
      features = ids,
536
      keytype = maptype,
537
      returntype = returntype
538
    )
539
  } else {
540
    mapped <- ids
541
  }
542
543
  return(mapped)
544
}
545
546
547
548
#' Get list of supported organisms
549
#'
550
#' Get a list of organisms that are covered in our workflow through a supporting
551
#' `AnnotationDBi` package. Without such a package we would not be able to map
552
#' transcript and protein identifier between different formats. All the
553
#' organisms that are listed here have at lest kegg and or reactome pathway
554
#' annotation that can be queried by `graphite`.
555
#'
556
#' @return List of supported organisms
557
#'
558
#' @examples
559
#' getOrganisms()
560
#' @export
561
getOrganisms <- function() {
562
  orglist <- c(
563
    "hsapiens", "rnorvegicus", "mmusculus", "sscrofa",
564
    "btaurus", "celegans", "dmelanogaster", "drerio",
565
    "ggallus", "xlaevis", "cfamiliaris"
566
  )
567
568
  return(orglist)
569
}
570
571
572
573
#' Get the correct ID mapping database
574
#'
575
#' Check by means of the given organism name if the required `AnnotationDbi`
576
#' package is installed. Select the ID mapping table based on the organism name
577
#' and return it.
578
#'
579
#' @param organism String that defines the organism.
580
#'
581
#' @return AnnotationDbi database for ID mapping.
582
getIDMappingDatabase <- function(organism) {
583
  map <- c(
584
    hsapiens = "org.Hs.eg.db", rnorvegicus = "org.Rn.eg.db",
585
    mmusculus = "org.Mm.eg.db", sscrofa = "org.Ss.eg.db",
586
    btaurus = "org.Bt.eg.db", celegans = "org.Ce.eg.db",
587
    dmelanogaster = "org.Dm.eg.db", drerio = "org.Dr.eg.db",
588
    ggallus = "org.Gg.eg.db", xlaevis = "org.Xl.eg.db",
589
    cfamiliaris = "org.Cf.eg.db"
590
  )
591
592
  stopifnot(organism %in% names(map))
593
  pkg <- map[[organism]]
594
595
  if (!requireNamespace(pkg, quietly = TRUE)) {
596
    stop(paste0("The necessary package ", pkg, " is not installed."),
597
      call. = FALSE
598
    )
599
  }
600
601
  return(get(pkg, envir = getNamespace(pkg)))
602
}
603
604
605
606
#' Pre-rank features prior to calculating enrichment scores.
607
#'
608
#' Rank features based on the direction of their fold change and their magnitude
609
#' implicated through their assigned p-value.
610
#'
611
#' @param logFC Vector containing the log-transformed fold changes of features.
612
#' @param pvalues Vector containing the p-values associated with those logFCs.
613
#' @param base Integer specifying the base of the logarithm. Default: 10
614
#'
615
#' @return Vector of pre-ranked features, still unsorted
616
#'
617
#' @examples
618
#' logFC <- rnorm(10)
619
#' pvalues <- runif(10)
620
#' rankFeatures(logFC, pvalues)
621
#'
622
#' @export
623
rankFeatures <- function(logFC, pvalues, base = 10) {
624
  return(sign(logFC) * -log(pvalues, base = base))
625
}
626
627
628
629
#' Create an empty data structure for measured omics features
630
#'
631
#' This function creates a data structure of nested but empty lists. One list
632
#' for each omics layer. By default all three supported omics layer are used to
633
#' create a data structures with three empty sublists: transcriptome, proteome,
634
#' and metabolome.
635
#'
636
#' @param layer List specifying the omics layer which should be created
637
#'
638
#' @return List with length(layer) empty sublists
639
#'
640
#' @examples
641
#' initOmicsDataStructure()
642
#' initOmicsDataStructure(c("transcriptome", "proteome"))
643
#' initOmicsDataStructure(c("Transcriptome", "Metabolome"))
644
#' @export
645
initOmicsDataStructure <- function(layer = c("transcriptome", "proteome", "metabolome")) {
646
  l <- c()
647
  layer <- tolower(layer)
648
  if (length(grep("trans*", layer)) > 0) l <- c(l, "transcriptome")
649
  if (length(grep("prote*", layer)) > 0) l <- c(l, "proteome")
650
  if (length(grep("metabo*", layer)) > 0) l <- c(l, "metabolome")
651
652
  if (length(l) != length(layer)) {
653
    stop("Not all your omics layer could be mapped to
654
              'transcriptome', 'proteome', or 'metabolome'. ",
655
      call. = TRUE
656
    )
657
  }
658
659
  empty_structure <- rep(list(list()), length(l))
660
  names(empty_structure) <- l
661
662
  return(empty_structure)
663
}
664
665
666
667
#' Helper function to get all different metabolite ID formats
668
#'
669
#' This helper function extracts all used ID formats in all pathways
670
#' and returns a nested list for each pathway database.
671
#'
672
#' @param pathways List of pathway databases and their pathway definition.
673
#'
674
#' @return List of metabolite ID formats.
675
#'
676
#' @importFrom graphite nodes
677
getMetaboliteIDformats <- function(pathways) {
678
  n1 <- lapply(names(pathways), function(dbs) {
679
    n2 <- lapply(pathways[[dbs]], function(p) {
680
      unique(gsub(":.*", "", graphite::nodes(p, which = "metabolites")))
681
    })
682
683
    unique(unlist(n2))
684
  })
685
686
  names(n1) <- names(pathways)
687
  return(n1)
688
}