[9987e3]: / R / fetch_online.R

Download this file

93 lines (84 with data), 3.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#' Fetch online genome annotations from Ensembldb database
#'
#' @param EnsDb_annotations (EndsDb object) - Ensembldb database (default: EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86
#'
#' @return gene_range (GRanges object) - Genome annotations
#' @export
#'
#' @examples gene_range = get_genome_annotations(EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86)
get_genome_annotations <- function(
ensdb_annotations = EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86
) {
# Get genome annotations from Ensembldb database
gene_range <- Signac::GetGRangesFromEnsDb(ensdb_annotations)
ucsc.levels <- stringr::str_replace(
string = paste("chr", Signac::seqlevels(gene_range), sep = ""),
pattern = "chrMT",
replacement = "chrM") # Change chromosome names to UCSC format
Signac::seqlevels(gene_range) <- ucsc.levels
# check if Signac is the good package
return(gene_range) # Return genome annotations
}
#' Fetch online TF motifs from JASPAR2020 and chromVARmotifs
#'
#' @param species (character) - Species name (default: "human")
#'
#' @return motifs_db (motifs_db object) - TF2motifs + motifs PWMs
#' @export
#'
#' @examples motifs_db = get_tf2motifs(species = "human")
get_tf2motifs <- function(species = "human") {
#TF motifs using the union of databases: JASPAR and cis-BP
# included in chromVAR
getMatrixSet <- TFBSTools::getMatrixSet
# If species is human or mouse
if (species == "human") {
# Parameters for JASPAR2020
opts <- list(collection = "CORE",
species = "Homo sapiens",
all_versions = FALSE)
JASPAR_PWM <- TFBSTools::toPWM(getMatrixSet(JASPAR2020::JASPAR2020, opts))
# Load data from JASPAR2020
# Load data from chromVARmotifs
# Original data accessible at https://github.com/GreenleafLab/chromVARmotifs
data("human_pwms_v2")
# Load data from chromVARmotifs
motifs <- human_pwms_v2
# Motifs from chromVARmotifs
} else if (species == "mouse") {
# Parameters for JASPAR2020
opts <- list(collection = "CORE",
species = "Mus musculus",
all_versions = FALSE)
JASPAR_PWM <- TFBSTools::toPWM(getMatrixSet(JASPAR2020::JASPAR2020, opts))
# Load data from JASPAR2020
data("mouse_pwms_v2")
# Load data from chromVARmotifs
# Original data accessible at https://github.com/GreenleafLab/chromVARmotifs
motifs <- mouse_pwms_v2
# Motifs from chromVARmotifs
}
for (name in names(JASPAR_PWM)){
# Combine motifs of JASPAR20202 and chromVARmotif
motifs[name] <- JASPAR_PWM[name]
}
# Initiate final TF motifs table
tf2motifs <- data.frame(motif = character(),
tf = character(),
stringsAsFactors = FALSE)
for (i in seq_along(TFBSTools::name(motifs))){ # Fill TF motif table
# TFBSTools::name(motifs) returns names of TFs associated to each PWMatrix
tfs <- strsplit(TFBSTools::name(motifs)[i], "::")[[1]]
# splitting TFs that are given as "name1::name2"
for (tf in tfs){
tf <- strsplit(tf, "(", fixed = TRUE)[[1]][1]
# only keeping <NAME> in identifier "<NAME>(var.n)"
tf2motifs <- rbind(tf2motifs, data.frame(motif = names(motifs)[i],
tf = tf))
}
}
return(new("motifs_db",
tf2motifs = tf2motifs,
motifs = motifs,
tfs = unique(tf2motifs$tf))) # Return motifs_db <- TF2motifs + motifs PWMs
}