[fe0e8b]: / atac / archR / load_motif_annotation.R

Download this file

48 lines (31 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# opts$motif_annotation <- "Motif_cisbp" # "Motif_JASPAR2020"
motif2gene_file <- sprintf("%s/Annotations/%s_TFs.txt.gz",io$archR.directory,opts$motif_annotation)
if (file.exists(motif2gene_file)) {
motif2gene.dt <- fread(motif2gene_file)
} else {
peakAnnotation <- readRDS(sprintf("%s/Annotations/peakAnnotation.rds",io$archR.directory))
stopifnot(opts$motif_annotation%in%names(peakAnnotation))
motif2gene.dt <- peakAnnotation[[opts$motif_annotation]]$motifSummary %>%
as.data.table(keep.rownames = T) %>% setnames("rn","motif") %>% .[,strand:=NULL] %>% setnames("symbol","gene")
# Rename genes
if (grepl("cisbp",opts$motif_annotation, ignore.case = T)) {
tf2gene_rename <- c(
"Tcfe"="Tfe", "Nkx1"="Nkx1-", "Nkx2"="Nkx2-", "Nkx3"="Nkx3-", "Nkx4"="Nkx4-", "Nkx5"="Nkx5-", "Nkx6"="Nkx6-", "Foxf1a"="Foxf1",
"Hmga1rs1"="rs1", "Mycl1$"="Mycl", "Dux$"="Duxf3", "Duxbl$"="Duxbl1", "Pit1$"="Prop1",
"ENSMUSG00000079994"="Sox1", "Tcfap"="Tfap"
)
motif2gene.dt[,gene:=stringr::str_replace_all(gene,tf2gene_rename)]
} else if (grepl("JASPAR",opts$motif_annotation, ignore.case = T)) {
# conflictive motifs: fusion proteins (UN::JUNB) and versions (TFAP2A(var.2))
# stop("To-do")
tf2gene_rename <- c("TBXT"="T")
motif2gene.dt[,gene:=stringr::str_replace_all(gene,tf2gene_rename)]
# for JASPAR motifs
motif2gene.dt[,motif:=str_replace(motif,"\\.VAR\\.","\\.var\\."),]
} else {
stop("Motif annotation not recognised")
}
motif2gene.dt[,c("motif","gene"):=list(toupper(motif),toupper(gene))]
# Save
fwrite(motif2gene.dt, motif2gene_file, sep="\t", quote=F)
}