[28aa3b]: / R / downloadAndPrepareCELFiles.R

Download this file

80 lines (70 with data), 2.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#' Download GEO .CEL files
#'
#' Function for downloading the CEL files of a GEO dataset and prepare it for
#' preprocessing.
#' The function uses \code{getGEOSuppFiles} which downloads the data. The
#' function then unzips the data files in the GEO series.
#'
#' @param geo_nbr The GEO ascession number.
#' @param destdir The destination dir of the downloaded files.
#' @param clean Should the strictly unnessesary files be deleted?
#' @param verbose Signal the process.
#' @note The function will overwrite existing files in the \code{destdir}.
#' @author
#' Anders Ellern Bilgrau,
#' Steffen Falgreen Larsen
#' @examples
#' \dontrun{
#' downloadAndPrepareCELFiles(geo_nbr = "GSE18376",
#' destdir = tempdir())
#' }
#' @importFrom GEOquery getGEOSuppFiles gunzip
#' @keywords internal
#' @export
downloadAndPrepareCELFiles <- function(geo_nbr,
destdir = getwd(),
clean = FALSE,
verbose = TRUE) {
options("timeout" = 240) # Increase timeout (in seconds)
on.exit(options("timeout" = 60)) # Reset to standard timeout on exit
if (verbose) message("Preparing GEO ", geo_nbr, " data")
dl_dir <- file.path(destdir, geo_nbr)
# Download data if nessesary
raw_file <- paste0(file.path(dl_dir, geo_nbr), "_RAW.tar")
if (!file.exists(raw_file)) {
if (verbose) message("Downloading .CEL files...\n")
dir.create(dl_dir, showWarnings = FALSE)
getGEOSuppFiles(GEO = geo_nbr, makeDirectory = FALSE, baseDir = dl_dir)
} else {
if (verbose) message("Compressed .CEL files already downloaded...")
}
# List celfiles (if any)
cel_files <- list.files(dl_dir, pattern = "\\.cel$",
ignore.case = TRUE, full.names = TRUE)
if (identical(cel_files, character(0))) {
# Untar the file bundle
if (verbose) message("Untaring the RAW file...\n")
tar_file <- list.files(path = dl_dir, pattern = "RAW.tar$", full.names = TRUE)
untar(tarfile = tar_file, exdir = dl_dir)
# Unzip the files
if (verbose) message("Unzipping the .CEL files...\n")
gz_files <- list.files(dl_dir, pattern = "\\.cel.gz$", ignore.case = TRUE,
full.names = TRUE)
for (file in gz_files) {
gunzip(file, overwrite = TRUE, remove = TRUE)
}
# List celfiles
cel_files <- list.files(dl_dir, pattern = "\\.cel$",
ignore.case = TRUE, full.names = TRUE)
} else {
message("(Some) .CEL files already unpacked...\n")
}
# Clean-up if wanted
if (clean) {
if (verbose) message("Removing all non CEL files...\n")
file.remove(setdiff(list.files(dl_dir, full.names = TRUE), cel_files))
}
cel_files <- normalizePath(cel_files)
if (verbose) message("Download and preparation of CEL files done.\n")
return(invisible(cel_files))
}