[28aa3b]: / R / downloadAndProcessGEO.R

Download this file

104 lines (93 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
#' Download GEO dataset and preprocess it
#'
#' Functionality for automatically downloading, RMA preprocessing the array
#' data and formatting the phenotype data, and saving the results in a
#' \code{.Rds} file.
#'
#' @param geo_nbr The GEO ascession number.
#' @param destdir The destination dir of the downloaded files.
#' @param ... Arguments passed to \code{\link{preprocessCELFiles}}
#' @param clean Should the strictly unnessesary files be deleted?
#' @param verbose Signal the process.
#' @return
#' Creates a folder with the downloaded and processed files.
#' The processed files are saved as an \code{.Rds} object named
#' after the used parameters.
#' Invisibly returns the saved object.
#' @note The function will overwrite existing files in the \code{destdir}.
#'
#' Arguments after \code{\dots} must be named.
#' @author
#' Anders Ellern Bilgrau,
#' Steffen Falgreen Larsen
#' @examples
#' \dontrun{
#' print(DLBCL_overview)
#' geo_nbr <- DLBCL_overview[6,1]
#' res <- downloadAndProcessGEO(geo_nbr = geo_nbr, cdf = "brainarray",
#' target = "ENSG", clean = FALSE)
#' head(exprs(res$es$Batch1))
#' }
#' @export
downloadAndProcessGEO <- function(geo_nbr,
destdir = getwd(),
...,
clean = FALSE,
verbose = TRUE) {
# Download metadata
meta_data <- downloadAndPrepareMetadata(geo_nbr = geo_nbr, destdir = destdir,
clean = clean, verbose = verbose)
# Process metadata
clean_meta_data <- cleanMetadata(meta_data)
GSM_met <- basenameSansCEL(rownames(clean_meta_data))
stopifnot(all(GSM_met == clean_meta_data$GSM))
# Download array data
cel_files <- downloadAndPrepareCELFiles(geo_nbr = geo_nbr, destdir = destdir,
clean = clean, verbose = verbose)
GSM_cel <- gsub("(GSM[0-9]+).*$", "\\1", basenameSansCEL(cel_files))
# Add local filenames to cleaned data
clean_meta_data$file <- cel_files[pmatch(GSM_met, GSM_cel)]
# Checks and warnings
if (!all(GSM_cel %in% GSM_met)) {
warning("Not all downloaded CEL files are in the metadata")
}
if (!all(GSM_met %in% GSM_cel)) {
warning("Not all GSM numbers in the metadata have CEL files")
}
# Preprocess array data by RMA for each batch
if (is.null(clean_meta_data$Batch)) {
es <- preprocessCELFiles(cel_files, ...)
} else {
if (verbose) {
message("Batches detected. RMA normalizing each of the batches: ",
paste0(unique(clean_meta_data$Batch), collapse = ", "))
}
batch_list <- with(clean_meta_data, split(file, Batch))
es <- lapply(batch_list, preprocessCELFiles, ...)
}
# Save Rds file
a <- list()
a$cdf <- tolower(finfo(es, "cdf"))
a$target <- finfo(es, "target")
a$version <- finfo(es, "version")
file_name <-
paste0(geo_nbr, "_", a$cdf,
ifelse(a$target != "", "_", ""),
tolower(a$target),
ifelse(a$cdf == "affy", "", paste0("_", a$version)),
".Rds")
output <- list(es = es, metadata = clean_meta_data, call = match.call())
saveRDS(output, file = file.path(destdir, geo_nbr, file_name))
# Clean if wanted
if (clean) file.remove(cel_files)
if (verbose) message("done.\n")
return(invisible(output))
}
finfo <- function(x, y) {
if (!is.list(x)) {
x <- list(x)
}
info <- unique(unlist(lapply(x, function(e) attributes(e)[[y]])))
ans <- paste(info, collapse = "-")
return(ans)
}