a b/R/downloadAndPrepareCELFiles.R
1
#' Download GEO .CEL files
2
#'
3
#' Function for downloading the CEL files of a GEO dataset and prepare it for
4
#' preprocessing.
5
#' The function uses \code{getGEOSuppFiles} which downloads the data. The
6
#' function then unzips the data files in the GEO series.
7
#'
8
#' @param geo_nbr The GEO ascession number.
9
#' @param destdir The destination dir of the downloaded files.
10
#' @param clean Should the strictly unnessesary files be deleted?
11
#' @param verbose Signal the process.
12
#' @note The function will overwrite existing files in the \code{destdir}.
13
#' @author
14
#'   Anders Ellern Bilgrau,
15
#'   Steffen Falgreen Larsen
16
#' @examples
17
#' \dontrun{
18
#' downloadAndPrepareCELFiles(geo_nbr = "GSE18376",
19
#'                           destdir = tempdir())
20
#' }
21
#' @importFrom GEOquery getGEOSuppFiles gunzip
22
#' @keywords internal
23
#' @export
24
downloadAndPrepareCELFiles <- function(geo_nbr,
25
                                       destdir = getwd(),
26
                                       clean = FALSE,
27
                                       verbose = TRUE) {
28
  options("timeout" = 240) # Increase timeout (in seconds)
29
  on.exit(options("timeout" = 60)) # Reset to standard timeout on exit
30
31
  if (verbose) message("Preparing GEO ", geo_nbr, " data")
32
  dl_dir <- file.path(destdir, geo_nbr)
33
34
  # Download data if nessesary
35
  raw_file <- paste0(file.path(dl_dir, geo_nbr), "_RAW.tar")
36
  if (!file.exists(raw_file)) {
37
    if (verbose) message("Downloading .CEL files...\n")
38
    dir.create(dl_dir, showWarnings = FALSE)
39
    getGEOSuppFiles(GEO = geo_nbr, makeDirectory = FALSE, baseDir = dl_dir)
40
  } else {
41
    if (verbose) message("Compressed .CEL files already downloaded...")
42
  }
43
44
  # List celfiles (if any)
45
  cel_files <- list.files(dl_dir, pattern = "\\.cel$",
46
                          ignore.case = TRUE, full.names = TRUE)
47
48
  if (identical(cel_files, character(0))) {
49
50
    # Untar the file bundle
51
    if (verbose) message("Untaring the RAW file...\n")
52
    tar_file <- list.files(path = dl_dir, pattern = "RAW.tar$", full.names = TRUE)
53
    untar(tarfile = tar_file, exdir = dl_dir)
54
55
    # Unzip the files
56
    if (verbose) message("Unzipping the .CEL files...\n")
57
    gz_files <- list.files(dl_dir, pattern = "\\.cel.gz$", ignore.case = TRUE,
58
                           full.names = TRUE)
59
    for (file in gz_files) {
60
      gunzip(file, overwrite = TRUE, remove = TRUE)
61
    }
62
63
    # List celfiles
64
    cel_files <- list.files(dl_dir, pattern = "\\.cel$",
65
                            ignore.case = TRUE, full.names = TRUE)
66
  } else {
67
    message("(Some) .CEL files already unpacked...\n")
68
  }
69
70
  # Clean-up if wanted
71
  if (clean) {
72
    if (verbose) message("Removing all non CEL files...\n")
73
    file.remove(setdiff(list.files(dl_dir, full.names = TRUE), cel_files))
74
  }
75
76
  cel_files <- normalizePath(cel_files)
77
  if (verbose) message("Download and preparation of CEL files done.\n")
78
  return(invisible(cel_files))
79
}