Switch to unified view

a b/R/DIscBIO-generic-Normalizedata.R
1
#' @title Normalizing and filtering
2
#' @description This function allows filtering of genes and cells to be used in
3
#'   the downstream analysis.
4
#' @param object \code{DISCBIO} class object.
5
#' @param mintotal minimum total transcript number required. Cells with less
6
#'   than \code{mintotal} transcripts are filtered out. Default is 1000.
7
#' @param minexpr minimum required transcript count of a gene in at least
8
#'   \code{minnumber} cells. All other genes are filtered out. Default is 0.
9
#' @param minnumber minimum number of cells that are expressing each gene at
10
#'   minexpr transcripts. Default is 0.
11
#' @param maxexpr maximum allowed transcript count of a gene in at least a
12
#'   single cell after normalization or downsampling. All other genes are
13
#'   filtered out. Default is Inf.
14
#' @param downsample A logical vector. Default is FALSE. If downsample is set to
15
#'   TRUE, then transcript counts are downsampled to mintotal transcripts per
16
#'   cell, instead of the normalization. Downsampled versions of the transcript
17
#'   count data are averaged across dsn samples
18
#' @param dsn A numeric value of the number of samples to be used to average the
19
#'   downsampled versions of the transcript count data. Default is 1 which means
20
#'   that sampling noise should be comparable across cells. For high numbers of
21
#'   dsn the data will become similar to the median normalization.
22
#' @param rseed Random integer to enforce reproducible clustering.
23
#'   results
24
#' @include DIscBIO-classes.R
25
#' @return The DISCBIO-class object input with the ndata and fdata slots filled.
26
#' @examples
27
#' sc <- DISCBIO(valuesG1msTest) # changes signature of data
28
#'
29
#' # In this case this function is used to normalize the reads
30
#' sc_normal <- Normalizedata(
31
#'   sc,
32
#'   mintotal = 1000, minexpr = 0, minnumber = 0, maxexpr = Inf,
33
#'   downsample = FALSE, dsn = 1, rseed = 17000
34
#' )
35
#' summary(sc_normal@fdata)
36
#'
37
setGeneric(
38
  "Normalizedata",
39
  function(object, mintotal = 1000, minexpr = 0, minnumber = 0, maxexpr = Inf,
40
           downsample = FALSE, dsn = 1, rseed = NULL) {
41
    standardGeneric("Normalizedata")
42
  }
43
)
44
45
#' @export
46
#' @rdname Normalizedata
47
setMethod(
48
  "Normalizedata",
49
  signature = "DISCBIO",
50
  definition = function(
51
    object, mintotal, minexpr, minnumber, maxexpr, downsample, dsn, rseed
52
  ) {
53
    # Validation
54
    if (!is.numeric(mintotal)) {
55
      stop("mintotal has to be a positive number")
56
    } else if (mintotal <= 0) {
57
      stop("mintotal has to be a positive number")
58
    }
59
    if (!is.numeric(minexpr)) {
60
      stop("minexpr has to be a non-negative number")
61
    } else if (minexpr < 0) {
62
      stop("minexpr has to be a non-negative number")
63
    }
64
    if (!is.numeric(minnumber)) {
65
      stop("minnumber has to be a non-negative integer number")
66
    } else if (round(minnumber) != minnumber | minnumber < 0) {
67
      stop("minnumber has to be a non-negative integer number")
68
    }
69
    if (!(is.numeric(downsample) | is.logical(downsample))) {
70
      stop("downsample has to be logical (TRUE or FALSE)")
71
    }
72
    if (!is.numeric(dsn)) {
73
      stop("dsn has to be a positive integer number")
74
    } else if (round(dsn) != dsn | dsn <= 0) {
75
      stop("dsn has to be a positive integer number")
76
    }
77
    object@filterpar <- list(
78
      mintotal = mintotal,
79
      minexpr = minexpr,
80
      minnumber = minnumber,
81
      maxexpr = maxexpr,
82
      downsample = downsample,
83
      dsn = dsn
84
    )
85
    cols <- apply(object@expdata, 2, sum, na.rm = TRUE) >= mintotal
86
    object@ndata <- object@expdata[, cols]
87
    if (downsample) {
88
      set.seed(rseed)
89
      object@ndata <- downsample(object@expdata, n = mintotal, dsn = dsn)
90
    } else {
91
      x <- object@ndata
92
      object@ndata <- as.data.frame(t(t(x) / apply(x, 2, sum)) *
93
        median(apply(x, 2, sum, na.rm = TRUE)) + .1)
94
    }
95
    x <- object@ndata
96
    object@fdata <-
97
      x[apply(x >= minexpr, 1, sum, na.rm = TRUE) >= minnumber, ]
98
    x <- object@fdata
99
    object@fdata <- x[apply(x, 1, max, na.rm = TRUE) < maxexpr, ]
100
    return(object)
101
  }
102
)