Diff of /R/XenaDownload.R [000000] .. [0bdad5]

Switch to unified view

a b/R/XenaDownload.R
1
##' Download Datasets from UCSC Xena Hubs
2
##'
3
##' Avaliable datasets list: <https://xenabrowser.net/datapages/>
4
##'
5
##' @author Shixiang Wang <w_shixiang@163.com>
6
##' @param xquery a tibble object generated by [XenaQuery] function.
7
##' @param destdir specify a location to store download data. Default is system temp directory.
8
##' @param download_probeMap if `TRUE`, also download ProbeMap data, which used for id mapping.
9
##' @param trans_slash logical, default is `FALSE`. If `TRUE`, transform slash '/' in dataset id
10
##' to '__'. This option is for backwards compatibility.
11
##' @param max_try time limit to try downloading the data.
12
##' @param force logical. if `TRUE`, force to download data no matter whether files exist.
13
##'  Default is `FALSE`.
14
##' @param ... other argument to `download.file` function
15
##' @return a `tibble`
16
##' @export
17
##' @importFrom utils download.file
18
##' @importFrom dplyr filter
19
##' @examples
20
##' \dontrun{
21
##' xe = XenaGenerate(subset = XenaHostNames == "tcgaHub")
22
##' hosts(xe)
23
##' xe_query = XenaQuery(xe)
24
##' xe_download = XenaDownload(xe_query)
25
##' }
26
27
XenaDownload <- function(xquery,
28
                         destdir = tempdir(),
29
                         download_probeMap = FALSE,
30
                         trans_slash = FALSE,
31
                         force = FALSE,
32
                         max_try = 3L,
33
                         ...) {
34
  stopifnot(is.data.frame(xquery), c("url") %in% names(xquery), is.logical(download_probeMap))
35
36
  if (download_probeMap) {
37
    xquery_probe <- UCSCXenaTools::XenaData %>%
38
      dplyr::filter(XenaDatasets %in% xquery$datasets) %>%
39
      XenaGenerate() %>%
40
      XenaQueryProbeMap()
41
    xquery <- dplyr::bind_rows(xquery, xquery_probe)
42
  }
43
44
  if (trans_slash) {
45
    xquery$fileNames <- gsub(
46
      pattern = "/",
47
      replacement = "__",
48
      x = xquery$datasets
49
    )
50
  } else {
51
    xquery$fileNames <- xquery$datasets
52
  }
53
54
  xquery$fileNames <- ifelse(grepl("\\.gz", xquery$url),
55
    paste0(xquery$fileNames, ".gz"),
56
    xquery$fileNames
57
  )
58
  # destdir = paste0(destdir,"/")
59
  xquery$destfiles <- file.path(destdir, xquery$fileNames)
60
61
  if (!dir.exists(destdir)) {
62
    dir.create(destdir, recursive = TRUE, showWarnings = FALSE)
63
  }
64
65
  message("All downloaded files will under directory ", destdir, ".")
66
  if (!trans_slash) {
67
    dir_names <- dirname(xquery$destfiles)
68
    message("The 'trans_slash' option is FALSE, keep same directory structure as Xena.")
69
    message("Creating directories for datasets...")
70
    for (i in dir_names) {
71
      dir.create(i, recursive = TRUE, showWarnings = FALSE)
72
    }
73
  }
74
75
  # Make sure the order is right
76
  xquery <- dplyr::select(xquery, c("hosts", "datasets", "url", "fileNames", "destfiles"), dplyr::everything())
77
  download_dataset <- function(x) { # nocov start
78
    tryCatch(
79
      {
80
        if (!file.exists(x[5]) | force) {
81
          message("Downloading ", x[4])
82
          download.file2(x[3], destfile = x[5], max_try = max_try, ...)
83
        } else {
84
          message(x[5], ", the file has been download!")
85
        }
86
      },
87
      error = function(e) {
88
        message(
89
          "Can not find file",
90
          x[4],
91
          ", this file maybe not compressed."
92
        )
93
        x[3] <- gsub(pattern = "\\.gz$", "", x[3])
94
        x[4] <- gsub(pattern = "\\.gz$", "", x[4])
95
        x[5] <- gsub(pattern = "\\.gz$", "", x[5])
96
        message("Try downloading file", x[4], "...")
97
        tryCatch(
98
          {
99
            download.file2(x[3], destfile = x[5], max_try = max_try, ...)
100
          },
101
          error = function(e) {
102
            message("Your network is bad (try again) or the data source is invalid (report to the developer).")
103
            invisible(NULL)
104
          }
105
        )
106
      }
107
    )
108
  }
109
110
  apply(xquery, 1, download_dataset) # nocov end
111
112
  if (trans_slash) {
113
    message(
114
      "Note file names inherit from names in datasets column\n  and '/' all changed to '__'."
115
    )
116
  }
117
118
  invisible(xquery)
119
}
120
121
download.file2 <- function(url, destfile,
122
                           max_try = 3L,
123
                           ...) {
124
  Sys.sleep(0.01)
125
  tryCatch(
126
    {
127
      if (abs(max_try - 4L) > 1) {
128
        message("==> Trying #", abs(max_try - 4L))
129
      }
130
      download.file(url, destfile, ...)
131
    },
132
    error = function(e) {
133
      if (max_try == 1) {
134
        message("Tried 3 times but failed, please check your internet connection!")
135
        invisible(NULL)
136
      } else {
137
        download.file2(url, destfile, max_try = max_try - 1L, ...)
138
      }
139
    }
140
  )
141
142
}