Diff of /R/downloadAndProcessGEO.R [000000] .. [28aa3b]

Switch to unified view

a b/R/downloadAndProcessGEO.R
1
#' Download GEO dataset and preprocess it
2
#'
3
#' Functionality for automatically downloading, RMA preprocessing the array
4
#' data and formatting the phenotype data, and saving the results in a
5
#' \code{.Rds} file.
6
#'
7
#' @param geo_nbr The GEO ascession number.
8
#' @param destdir The destination dir of the downloaded files.
9
#' @param ... Arguments passed to \code{\link{preprocessCELFiles}}
10
#' @param clean Should the strictly unnessesary files be deleted?
11
#' @param verbose Signal the process.
12
#' @return
13
#'   Creates a folder with the downloaded and processed files.
14
#'   The processed files are saved as an \code{.Rds} object named
15
#'   after the used parameters.
16
#'   Invisibly returns the saved object.
17
#' @note The function will overwrite existing files in the \code{destdir}.
18
#'
19
#'   Arguments after \code{\dots} must be named.
20
#' @author
21
#'   Anders Ellern Bilgrau,
22
#'   Steffen Falgreen Larsen
23
#' @examples
24
#' \dontrun{
25
#' print(DLBCL_overview)
26
#' geo_nbr <- DLBCL_overview[6,1]
27
#' res <- downloadAndProcessGEO(geo_nbr = geo_nbr, cdf = "brainarray",
28
#'                              target = "ENSG", clean = FALSE)
29
#' head(exprs(res$es$Batch1))
30
#' }
31
#' @export
32
downloadAndProcessGEO <- function(geo_nbr,
33
                                  destdir = getwd(),
34
                                  ...,
35
                                  clean = FALSE,
36
                                  verbose = TRUE) {
37
38
  # Download metadata
39
  meta_data <- downloadAndPrepareMetadata(geo_nbr = geo_nbr, destdir = destdir,
40
                                          clean = clean, verbose = verbose)
41
42
   # Process metadata
43
  clean_meta_data <- cleanMetadata(meta_data)
44
  GSM_met <- basenameSansCEL(rownames(clean_meta_data))
45
  stopifnot(all(GSM_met == clean_meta_data$GSM))
46
47
  # Download array data
48
  cel_files <- downloadAndPrepareCELFiles(geo_nbr = geo_nbr, destdir = destdir,
49
                                          clean = clean, verbose = verbose)
50
  GSM_cel <- gsub("(GSM[0-9]+).*$", "\\1", basenameSansCEL(cel_files))
51
52
  # Add local filenames to cleaned data
53
  clean_meta_data$file <- cel_files[pmatch(GSM_met, GSM_cel)]
54
55
  # Checks and warnings
56
  if (!all(GSM_cel %in% GSM_met)) {
57
    warning("Not all downloaded CEL files are in the metadata")
58
  }
59
  if (!all(GSM_met %in% GSM_cel)) {
60
    warning("Not all GSM numbers in the metadata have CEL files")
61
  }
62
63
  # Preprocess array data by RMA for each batch
64
  if (is.null(clean_meta_data$Batch)) {
65
    es <- preprocessCELFiles(cel_files, ...)
66
  } else {
67
    if (verbose) {
68
      message("Batches detected. RMA normalizing each of the batches: ",
69
              paste0(unique(clean_meta_data$Batch), collapse = ", "))
70
    }
71
    batch_list <- with(clean_meta_data, split(file, Batch))
72
    es <- lapply(batch_list, preprocessCELFiles, ...)
73
  }
74
75
  # Save Rds file
76
  a         <- list()
77
  a$cdf     <- tolower(finfo(es, "cdf"))
78
  a$target  <- finfo(es, "target")
79
  a$version <- finfo(es, "version")
80
  file_name <-
81
    paste0(geo_nbr, "_", a$cdf,
82
           ifelse(a$target != "", "_", ""),
83
           tolower(a$target),
84
           ifelse(a$cdf == "affy", "", paste0("_", a$version)),
85
           ".Rds")
86
  output <- list(es = es, metadata = clean_meta_data, call = match.call())
87
  saveRDS(output, file = file.path(destdir, geo_nbr, file_name))
88
89
  # Clean if wanted
90
  if (clean) file.remove(cel_files)
91
  if (verbose) message("done.\n")
92
93
  return(invisible(output))
94
}
95
96
finfo <- function(x, y) {
97
  if (!is.list(x)) {
98
    x <- list(x)
99
  }
100
  info <- unique(unlist(lapply(x, function(e) attributes(e)[[y]])))
101
  ans <- paste(info, collapse = "-")
102
  return(ans)
103
}