Diff of /R/XenaDownload.R [000000] .. [0bdad5]

Switch to side-by-side view

--- a
+++ b/R/XenaDownload.R
@@ -0,0 +1,142 @@
+##' Download Datasets from UCSC Xena Hubs
+##'
+##' Avaliable datasets list: <https://xenabrowser.net/datapages/>
+##'
+##' @author Shixiang Wang <w_shixiang@163.com>
+##' @param xquery a tibble object generated by [XenaQuery] function.
+##' @param destdir specify a location to store download data. Default is system temp directory.
+##' @param download_probeMap if `TRUE`, also download ProbeMap data, which used for id mapping.
+##' @param trans_slash logical, default is `FALSE`. If `TRUE`, transform slash '/' in dataset id
+##' to '__'. This option is for backwards compatibility.
+##' @param max_try time limit to try downloading the data.
+##' @param force logical. if `TRUE`, force to download data no matter whether files exist.
+##'  Default is `FALSE`.
+##' @param ... other argument to `download.file` function
+##' @return a `tibble`
+##' @export
+##' @importFrom utils download.file
+##' @importFrom dplyr filter
+##' @examples
+##' \dontrun{
+##' xe = XenaGenerate(subset = XenaHostNames == "tcgaHub")
+##' hosts(xe)
+##' xe_query = XenaQuery(xe)
+##' xe_download = XenaDownload(xe_query)
+##' }
+
+XenaDownload <- function(xquery,
+                         destdir = tempdir(),
+                         download_probeMap = FALSE,
+                         trans_slash = FALSE,
+                         force = FALSE,
+                         max_try = 3L,
+                         ...) {
+  stopifnot(is.data.frame(xquery), c("url") %in% names(xquery), is.logical(download_probeMap))
+
+  if (download_probeMap) {
+    xquery_probe <- UCSCXenaTools::XenaData %>%
+      dplyr::filter(XenaDatasets %in% xquery$datasets) %>%
+      XenaGenerate() %>%
+      XenaQueryProbeMap()
+    xquery <- dplyr::bind_rows(xquery, xquery_probe)
+  }
+
+  if (trans_slash) {
+    xquery$fileNames <- gsub(
+      pattern = "/",
+      replacement = "__",
+      x = xquery$datasets
+    )
+  } else {
+    xquery$fileNames <- xquery$datasets
+  }
+
+  xquery$fileNames <- ifelse(grepl("\\.gz", xquery$url),
+    paste0(xquery$fileNames, ".gz"),
+    xquery$fileNames
+  )
+  # destdir = paste0(destdir,"/")
+  xquery$destfiles <- file.path(destdir, xquery$fileNames)
+
+  if (!dir.exists(destdir)) {
+    dir.create(destdir, recursive = TRUE, showWarnings = FALSE)
+  }
+
+  message("All downloaded files will under directory ", destdir, ".")
+  if (!trans_slash) {
+    dir_names <- dirname(xquery$destfiles)
+    message("The 'trans_slash' option is FALSE, keep same directory structure as Xena.")
+    message("Creating directories for datasets...")
+    for (i in dir_names) {
+      dir.create(i, recursive = TRUE, showWarnings = FALSE)
+    }
+  }
+
+  # Make sure the order is right
+  xquery <- dplyr::select(xquery, c("hosts", "datasets", "url", "fileNames", "destfiles"), dplyr::everything())
+  download_dataset <- function(x) { # nocov start
+    tryCatch(
+      {
+        if (!file.exists(x[5]) | force) {
+          message("Downloading ", x[4])
+          download.file2(x[3], destfile = x[5], max_try = max_try, ...)
+        } else {
+          message(x[5], ", the file has been download!")
+        }
+      },
+      error = function(e) {
+        message(
+          "Can not find file",
+          x[4],
+          ", this file maybe not compressed."
+        )
+        x[3] <- gsub(pattern = "\\.gz$", "", x[3])
+        x[4] <- gsub(pattern = "\\.gz$", "", x[4])
+        x[5] <- gsub(pattern = "\\.gz$", "", x[5])
+        message("Try downloading file", x[4], "...")
+        tryCatch(
+          {
+            download.file2(x[3], destfile = x[5], max_try = max_try, ...)
+          },
+          error = function(e) {
+            message("Your network is bad (try again) or the data source is invalid (report to the developer).")
+            invisible(NULL)
+          }
+        )
+      }
+    )
+  }
+
+  apply(xquery, 1, download_dataset) # nocov end
+
+  if (trans_slash) {
+    message(
+      "Note file names inherit from names in datasets column\n  and '/' all changed to '__'."
+    )
+  }
+
+  invisible(xquery)
+}
+
+download.file2 <- function(url, destfile,
+                           max_try = 3L,
+                           ...) {
+  Sys.sleep(0.01)
+  tryCatch(
+    {
+      if (abs(max_try - 4L) > 1) {
+        message("==> Trying #", abs(max_try - 4L))
+      }
+      download.file(url, destfile, ...)
+    },
+    error = function(e) {
+      if (max_try == 1) {
+        message("Tried 3 times but failed, please check your internet connection!")
+        invisible(NULL)
+      } else {
+        download.file2(url, destfile, max_try = max_try - 1L, ...)
+      }
+    }
+  )
+
+}