[28aa3b]: / R / preprocessCELFiles.R

Download this file

86 lines (77 with data), 3.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#' RMA preprocess CEL files
#'
#' General function for RMA processing microarray data. Automatically downloads
#' custom Brainarray chip definition files (cdf) if wanted.
#'
#' @param cel_files A character vector of .CEL files.
#' @param cdf A character specifying the CDF type to use. Should be either
#' \code{"brainarray"} or \code{"affy"}.
#' @param target Specifies the "target" CDF to use depending on \code{cdf}.
#' If \code{cdf} is \code{"affy"} then \code{target} is e.g. \code{"core"}.
#' If \code{cdf} is \code{"brainarray"} then \code{target} is
#' e.g. \code{"ensg"}, \code{"entrez"}.
#' See \code{listTargets()} to list the available targets.
#' @param version A character giving the brainarray version to be used.
#' Only used if \code{cdf} is \code{"brainarray"}. See \code{listVersions()}.
#' @param background boolean. Should RMA background correction be performed?
#' @param normalize boolean. Should RMA normalization be performed?
#' @return An expression set object.
#' @importFrom affy read.affybatch just.rma
#' @importFrom affyio read.celfile.header
#' @importFrom oligo rma
#' @export
preprocessCELFiles <- function(cel_files,
cdf = c("affy", "brainarray"),
target,
version = getLatestVersion(),
background = TRUE,
normalize = TRUE) {
getCDF <- function(file) {
return(read.celfile.header(file)$cdfName)
}
array_type <- sapply(cel_files, getCDF)
array_types <- unique(array_type)
if (length(array_types) != 1) {
stop("The CEL files are from more that one array platform. The unique ",
"platforms are: ", paste(array_types, collapse = ", "),
". The arrays should be preprocessed in batches of array platforms.")
}
array_type <- array_type[1]
cel_files <- normalizePath(cel_files)
cdf <- match.arg(cdf)
if (tolower(cdf) == "affy") {
# Load expression set
es <- oligo::read.celfiles(cel_files)
# RMA normalize
if (class(es) %in% c("ExonFeatureSet","HTAFeatureSet","GeneFeatureSet")) {
if (missing(target)) stop("No target provided.")
es_rma <- oligo::rma(es, background = background,
normalize = normalize, target = target)
attr(es_rma, "target") <- target
} else {
es_rma <- oligo::rma(es, background = background,
normalize = normalize)
attr(es_rma, "target") <- NULL
}
} else if (tolower(cdf) == "brainarray") {
if (missing(target)) stop("No target provided.")
req <- requireBrainarray(array_type = array_type,
custom_cdf = target,
version = version)
suppressWarnings({
es_rma <- just.rma(filenames = cel_files,
verbose = TRUE,
cdfname = getCustomCDFName(req$brain_dat, array_type))
})
attr(es_rma, "target") <- target
} else {
stop("cdf == '", cdf,
"' not supported. Should be either 'affy' or 'brainarray'")
}
# Add attributes
attr(es_rma, "cdf") <- cdf
attr(es_rma, "version") <- version
attr(es_rma, "background") <- background
attr(es_rma, "normalize") <- normalize
return(es_rma)
}