Diff of /R/fetch_online.R [000000] .. [9abfcf]

Switch to unified view

a b/R/fetch_online.R
1
#'  Fetch online genome annotations from Ensembldb database
2
#'
3
#' @param EnsDb_annotations (EndsDb object) - Ensembldb database (default: EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86
4
#'
5
#' @return gene_range (GRanges object) - Genome annotations
6
#' @export
7
#'
8
#' @examples gene_range = get_genome_annotations(EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86)
9
get_genome_annotations <- function(
10
  ensdb_annotations = EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86
11
  ) {
12
  # Get genome annotations from Ensembldb database
13
  gene_range <- Signac::GetGRangesFromEnsDb(ensdb_annotations)
14
15
  ucsc.levels <- stringr::str_replace(
16
    string = paste("chr", Signac::seqlevels(gene_range), sep = ""),
17
    pattern = "chrMT",
18
    replacement = "chrM") # Change chromosome names to UCSC format
19
20
  Signac::seqlevels(gene_range) <- ucsc.levels
21
  # check if Signac is the good package
22
23
  return(gene_range) # Return genome annotations
24
}
25
26
#' Fetch online TF motifs from JASPAR2020 and chromVARmotifs
27
#'
28
#' @param species (character) - Species name (default: "human")
29
#'
30
#' @return motifs_db (motifs_db object) - TF2motifs + motifs PWMs
31
#' @export
32
#'
33
#' @examples motifs_db = get_tf2motifs(species = "human")
34
get_tf2motifs <- function(species = "human") {
35
  #TF motifs using the union of databases: JASPAR and cis-BP
36
  # included in chromVAR
37
  getMatrixSet <- TFBSTools::getMatrixSet
38
39
  # If species is human or mouse
40
  if (species == "human") {
41
    # Parameters for JASPAR2020
42
    opts <- list(collection = "CORE",
43
              species    = "Homo sapiens",
44
              all_versions = FALSE)
45
    JASPAR_PWM <- TFBSTools::toPWM(getMatrixSet(JASPAR2020::JASPAR2020, opts))
46
    # Load data from JASPAR2020
47
    # Load data from chromVARmotifs
48
    # Original data accessible at https://github.com/GreenleafLab/chromVARmotifs
49
    data("human_pwms_v2")
50
    # Load data from chromVARmotifs
51
    motifs <- human_pwms_v2
52
    # Motifs from chromVARmotifs
53
  } else if (species == "mouse") {
54
    # Parameters for JASPAR2020
55
    opts <- list(collection = "CORE",
56
              species    = "Mus musculus",
57
              all_versions = FALSE)
58
    JASPAR_PWM <- TFBSTools::toPWM(getMatrixSet(JASPAR2020::JASPAR2020, opts))
59
    # Load data from JASPAR2020
60
    data("mouse_pwms_v2")
61
    # Load data from chromVARmotifs
62
    # Original data accessible at https://github.com/GreenleafLab/chromVARmotifs
63
    motifs <- mouse_pwms_v2
64
    # Motifs from chromVARmotifs
65
  }
66
67
  for (name in names(JASPAR_PWM)){
68
    # Combine motifs of JASPAR20202 and chromVARmotif
69
    motifs[name] <- JASPAR_PWM[name]
70
  }
71
72
  # Initiate final TF motifs table
73
  tf2motifs <- data.frame(motif = character(),
74
                          tf = character(),
75
                          stringsAsFactors = FALSE)
76
  for (i in seq_along(TFBSTools::name(motifs))){  # Fill TF motif table
77
  # TFBSTools::name(motifs) returns names of TFs associated to each PWMatrix
78
    tfs <- strsplit(TFBSTools::name(motifs)[i], "::")[[1]]
79
    # splitting TFs that are given as "name1::name2"
80
    for (tf in tfs){
81
      tf <- strsplit(tf, "(", fixed = TRUE)[[1]][1]
82
      # only keeping <NAME> in identifier "<NAME>(var.n)"
83
      tf2motifs <- rbind(tf2motifs, data.frame(motif = names(motifs)[i],
84
                                                             tf = tf))
85
    }
86
  }
87
88
  return(new("motifs_db",
89
             tf2motifs = tf2motifs,
90
             motifs = motifs,
91
             tfs = unique(tf2motifs$tf))) # Return motifs_db <- TF2motifs + motifs PWMs
92
}