--- a +++ b/R/downloadAndPrepareCELFiles.R @@ -0,0 +1,79 @@ +#' Download GEO .CEL files +#' +#' Function for downloading the CEL files of a GEO dataset and prepare it for +#' preprocessing. +#' The function uses \code{getGEOSuppFiles} which downloads the data. The +#' function then unzips the data files in the GEO series. +#' +#' @param geo_nbr The GEO ascession number. +#' @param destdir The destination dir of the downloaded files. +#' @param clean Should the strictly unnessesary files be deleted? +#' @param verbose Signal the process. +#' @note The function will overwrite existing files in the \code{destdir}. +#' @author +#' Anders Ellern Bilgrau, +#' Steffen Falgreen Larsen +#' @examples +#' \dontrun{ +#' downloadAndPrepareCELFiles(geo_nbr = "GSE18376", +#' destdir = tempdir()) +#' } +#' @importFrom GEOquery getGEOSuppFiles gunzip +#' @keywords internal +#' @export +downloadAndPrepareCELFiles <- function(geo_nbr, + destdir = getwd(), + clean = FALSE, + verbose = TRUE) { + options("timeout" = 240) # Increase timeout (in seconds) + on.exit(options("timeout" = 60)) # Reset to standard timeout on exit + + if (verbose) message("Preparing GEO ", geo_nbr, " data") + dl_dir <- file.path(destdir, geo_nbr) + + # Download data if nessesary + raw_file <- paste0(file.path(dl_dir, geo_nbr), "_RAW.tar") + if (!file.exists(raw_file)) { + if (verbose) message("Downloading .CEL files...\n") + dir.create(dl_dir, showWarnings = FALSE) + getGEOSuppFiles(GEO = geo_nbr, makeDirectory = FALSE, baseDir = dl_dir) + } else { + if (verbose) message("Compressed .CEL files already downloaded...") + } + + # List celfiles (if any) + cel_files <- list.files(dl_dir, pattern = "\\.cel$", + ignore.case = TRUE, full.names = TRUE) + + if (identical(cel_files, character(0))) { + + # Untar the file bundle + if (verbose) message("Untaring the RAW file...\n") + tar_file <- list.files(path = dl_dir, pattern = "RAW.tar$", full.names = TRUE) + untar(tarfile = tar_file, exdir = dl_dir) + + # Unzip the files + if (verbose) message("Unzipping the .CEL files...\n") + gz_files <- list.files(dl_dir, pattern = "\\.cel.gz$", ignore.case = TRUE, + full.names = TRUE) + for (file in gz_files) { + gunzip(file, overwrite = TRUE, remove = TRUE) + } + + # List celfiles + cel_files <- list.files(dl_dir, pattern = "\\.cel$", + ignore.case = TRUE, full.names = TRUE) + } else { + message("(Some) .CEL files already unpacked...\n") + } + + # Clean-up if wanted + if (clean) { + if (verbose) message("Removing all non CEL files...\n") + file.remove(setdiff(list.files(dl_dir, full.names = TRUE), cel_files)) + } + + cel_files <- normalizePath(cel_files) + if (verbose) message("Download and preparation of CEL files done.\n") + return(invisible(cel_files)) +}