a b/data-raw/mirror.R
1
#!/usr/bin/env Rscript
2
## code to mirror all raw datasets (which can be downloaded by users) of UCSC Xena
3
4
# Update to the recent UCSCXenaTools
5
# BiocManager::install("UCSCXenaTools")
6
7
suppressMessages(library(UCSCXenaTools))
8
library(furrr)
9
10
args <- commandArgs(trailingOnly = TRUE)
11
12
if (length(args) != 2) {
13
    message("Error: two arguments are required!")
14
    message()
15
    message("Usage: ./mirror.R <destdir> <thread>")
16
    message("Set thread to 0 to run sequentially.")
17
    quit("no", status = 1)
18
}
19
20
destdir <- path.expand(args[1])
21
threads <- as.integer(args[2])
22
23
sink(file = file.path(destdir, "mirror.txt"))
24
on.exit(sink(NULL))
25
message("destdir: ", destdir)
26
message("threads: ", threads)
27
28
if (!dir.exists(destdir)) dir.create(destdir, recursive = TRUE)
29
if (threads != 0) {
30
    future::plan(multisession, workers = threads)
31
    call_fun <- furrr::future_map
32
} else {
33
    call_fun <- purrr::map
34
}
35
36
download_dataset <- function(x, destdir) {
37
    x %>%
38
        XenaGenerate() %>%
39
        XenaQuery() %>%
40
        XenaDownload(
41
            destdir = destdir,
42
            download_probeMap = TRUE,
43
            trans_slash = FALSE,
44
            force = TRUE,
45
            method = "curl", extra = "-C -") # 断点续传
46
}
47
48
access_datasets <- XenaData # XenaDataUpdate(saveTolocal = FALSE)
49
hubs <- unique(access_datasets$XenaHostNames)
50
51
dataset_list <- call_fun(hubs, function(h) {
52
    sink(file = file.path(destdir, "mirror.txt"), append = TRUE)
53
    x <- subset(access_datasets, XenaHostNames == h)
54
    message("Mirroring hub: ", h)
55
    message("================================")
56
    download_dataset(x, file.path(destdir, h))
57
})
58
59
mapping_df <- Reduce(rbind, dataset_list)
60
61
message("================================")
62
message("The mirror work has done, please check the mirror.txt.")
63
64
write.csv(mapping_df,
65
          file = file.path(destdir, "mapping_dataframe.csv"),
66
          row.names = FALSE, quote = FALSE)