|
a |
|
b/R/DIscBIO-generic-Normalizedata.R |
|
|
1 |
#' @title Normalizing and filtering |
|
|
2 |
#' @description This function allows filtering of genes and cells to be used in |
|
|
3 |
#' the downstream analysis. |
|
|
4 |
#' @param object \code{DISCBIO} class object. |
|
|
5 |
#' @param mintotal minimum total transcript number required. Cells with less |
|
|
6 |
#' than \code{mintotal} transcripts are filtered out. Default is 1000. |
|
|
7 |
#' @param minexpr minimum required transcript count of a gene in at least |
|
|
8 |
#' \code{minnumber} cells. All other genes are filtered out. Default is 0. |
|
|
9 |
#' @param minnumber minimum number of cells that are expressing each gene at |
|
|
10 |
#' minexpr transcripts. Default is 0. |
|
|
11 |
#' @param maxexpr maximum allowed transcript count of a gene in at least a |
|
|
12 |
#' single cell after normalization or downsampling. All other genes are |
|
|
13 |
#' filtered out. Default is Inf. |
|
|
14 |
#' @param downsample A logical vector. Default is FALSE. If downsample is set to |
|
|
15 |
#' TRUE, then transcript counts are downsampled to mintotal transcripts per |
|
|
16 |
#' cell, instead of the normalization. Downsampled versions of the transcript |
|
|
17 |
#' count data are averaged across dsn samples |
|
|
18 |
#' @param dsn A numeric value of the number of samples to be used to average the |
|
|
19 |
#' downsampled versions of the transcript count data. Default is 1 which means |
|
|
20 |
#' that sampling noise should be comparable across cells. For high numbers of |
|
|
21 |
#' dsn the data will become similar to the median normalization. |
|
|
22 |
#' @param rseed Random integer to enforce reproducible clustering. |
|
|
23 |
#' results |
|
|
24 |
#' @include DIscBIO-classes.R |
|
|
25 |
#' @return The DISCBIO-class object input with the ndata and fdata slots filled. |
|
|
26 |
#' @examples |
|
|
27 |
#' sc <- DISCBIO(valuesG1msTest) # changes signature of data |
|
|
28 |
#' |
|
|
29 |
#' # In this case this function is used to normalize the reads |
|
|
30 |
#' sc_normal <- Normalizedata( |
|
|
31 |
#' sc, |
|
|
32 |
#' mintotal = 1000, minexpr = 0, minnumber = 0, maxexpr = Inf, |
|
|
33 |
#' downsample = FALSE, dsn = 1, rseed = 17000 |
|
|
34 |
#' ) |
|
|
35 |
#' summary(sc_normal@fdata) |
|
|
36 |
#' |
|
|
37 |
setGeneric( |
|
|
38 |
"Normalizedata", |
|
|
39 |
function(object, mintotal = 1000, minexpr = 0, minnumber = 0, maxexpr = Inf, |
|
|
40 |
downsample = FALSE, dsn = 1, rseed = NULL) { |
|
|
41 |
standardGeneric("Normalizedata") |
|
|
42 |
} |
|
|
43 |
) |
|
|
44 |
|
|
|
45 |
#' @export |
|
|
46 |
#' @rdname Normalizedata |
|
|
47 |
setMethod( |
|
|
48 |
"Normalizedata", |
|
|
49 |
signature = "DISCBIO", |
|
|
50 |
definition = function( |
|
|
51 |
object, mintotal, minexpr, minnumber, maxexpr, downsample, dsn, rseed |
|
|
52 |
) { |
|
|
53 |
# Validation |
|
|
54 |
if (!is.numeric(mintotal)) { |
|
|
55 |
stop("mintotal has to be a positive number") |
|
|
56 |
} else if (mintotal <= 0) { |
|
|
57 |
stop("mintotal has to be a positive number") |
|
|
58 |
} |
|
|
59 |
if (!is.numeric(minexpr)) { |
|
|
60 |
stop("minexpr has to be a non-negative number") |
|
|
61 |
} else if (minexpr < 0) { |
|
|
62 |
stop("minexpr has to be a non-negative number") |
|
|
63 |
} |
|
|
64 |
if (!is.numeric(minnumber)) { |
|
|
65 |
stop("minnumber has to be a non-negative integer number") |
|
|
66 |
} else if (round(minnumber) != minnumber | minnumber < 0) { |
|
|
67 |
stop("minnumber has to be a non-negative integer number") |
|
|
68 |
} |
|
|
69 |
if (!(is.numeric(downsample) | is.logical(downsample))) { |
|
|
70 |
stop("downsample has to be logical (TRUE or FALSE)") |
|
|
71 |
} |
|
|
72 |
if (!is.numeric(dsn)) { |
|
|
73 |
stop("dsn has to be a positive integer number") |
|
|
74 |
} else if (round(dsn) != dsn | dsn <= 0) { |
|
|
75 |
stop("dsn has to be a positive integer number") |
|
|
76 |
} |
|
|
77 |
object@filterpar <- list( |
|
|
78 |
mintotal = mintotal, |
|
|
79 |
minexpr = minexpr, |
|
|
80 |
minnumber = minnumber, |
|
|
81 |
maxexpr = maxexpr, |
|
|
82 |
downsample = downsample, |
|
|
83 |
dsn = dsn |
|
|
84 |
) |
|
|
85 |
cols <- apply(object@expdata, 2, sum, na.rm = TRUE) >= mintotal |
|
|
86 |
object@ndata <- object@expdata[, cols] |
|
|
87 |
if (downsample) { |
|
|
88 |
set.seed(rseed) |
|
|
89 |
object@ndata <- downsample(object@expdata, n = mintotal, dsn = dsn) |
|
|
90 |
} else { |
|
|
91 |
x <- object@ndata |
|
|
92 |
object@ndata <- as.data.frame(t(t(x) / apply(x, 2, sum)) * |
|
|
93 |
median(apply(x, 2, sum, na.rm = TRUE)) + .1) |
|
|
94 |
} |
|
|
95 |
x <- object@ndata |
|
|
96 |
object@fdata <- |
|
|
97 |
x[apply(x >= minexpr, 1, sum, na.rm = TRUE) >= minnumber, ] |
|
|
98 |
x <- object@fdata |
|
|
99 |
object@fdata <- x[apply(x, 1, max, na.rm = TRUE) < maxexpr, ] |
|
|
100 |
return(object) |
|
|
101 |
} |
|
|
102 |
) |