Diff of /R/utils.R [000000] .. [9abfcf]

Switch to unified view

a b/R/utils.R
1
`%||%` <- rlang::`%||%`
2
3
#' @title Extract TF names from scRNA data and tf2motifs
4
#'
5
#' @param species (character) - Species name. Default: "human".
6
#' @param genes (vector(character)) - List of expressed genes.
7
#' @param output_file (character) - Path to output file.
8
#' @param tf2motifs (data.frame) - TF to motifs names mapping.
9
#' Columns: motif, tf.
10
#' @param verbose (integer) - Verbosity level. Default: 1.
11
#'
12
#' @return TFs (vector(character)) - List of TFs expressed with motifs.
13
#' @export
14
#'
15
get_tfs <- function(
16
    hummus,
17
    assay = NULL,
18
    store_tfs = TRUE,
19
    output_file = NULL,
20
    verbose = 0
21
    ) {
22
  # Check if the hummus object has motifs_db slot
23
  if (is.null(hummus@motifs_db)) {
24
    stop("The hummus object does not have a motifs_db slot")
25
  }
26
  
27
  # Check if the assay is present in the seurat object
28
  if (! is.null(assay)) {
29
    if (!assay %in% names(hummus@assays)) {
30
        stop("The gene assay is not present in the seurat object")
31
    }
32
    # Get the expressed genes
33
    expr_genes <- rownames(hummus@assays[[assay]])
34
    tfs <- intersect(unique(as.character(hummus@motifs_db@tf2motifs$tf)),
35
                                        expr_genes)
36
    if (verbose > 0) {
37
        cat("\t", length(tfs), "TFs expressed\n")
38
        }
39
  } else { # If no assay is provided, get all TFs with motifs
40
    tfs <- unique(as.character(hummus@motifs_db@tf2motifs$tf))
41
    if (verbose > 0) {
42
      cat("\t", length(tfs), "TFs with motif. No check if expressed or not.\n")
43
    }
44
  }
45
  # Store TFs in a file if specified
46
  if (store_tfs) {
47
    if (is.null(output_file)) {
48
      stop("Please provide an output file name")
49
    }
50
    write.table(tfs, output_file, # Store TFs
51
              col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
52
  }
53
54
  return(tfs)
55
}
56
57
# Code from Pando github.com/quadbiolab/Pando
58
#' @import sparseMatrixStats
59
summary_fun <- list(
60
    "mean" = sparseMatrixStats::colMeans2,
61
    "median" = sparseMatrixStats::colMedians,
62
    "max" = sparseMatrixStats::colMaxs,
63
    "min" = sparseMatrixStats::colMins,
64
    "count" = sparseMatrixStats::colCounts,
65
    "any" = sparseMatrixStats::colAnys,
66
    "all" = sparseMatrixStats::colAlls,
67
    "sd" = sparseMatrixStats::colSds,
68
    "mad" = sparseMatrixStats::colMads
69
)
70
71
#' Copy of the aggregate.Matrix function from the Matrix.utils package,
72
#' since this is off CRAN and does not seem to be maintained anymore
73
#' internally
74
#'
75
fast_aggregate <- function(
76
    x,
77
    groupings = NULL,
78
    form = NULL,
79
    fun = "sum",
80
    ...
81
) {
82
    if (!is(x, "Matrix")) {
83
        x <- Matrix(as.matrix(x), sparse = TRUE)
84
    }
85
    if (fun == "count") {
86
        x <- x != 0
87
    }
88
    groupings2 <- groupings
89
    if (!is(groupings2, "data.frame")) {
90
        groupings2 <- as.data.frame(groupings2)
91
    }
92
    groupings2 <- data.frame(lapply(groupings2, as.factor))
93
    groupings2 <- data.frame(interaction(groupings2, sep = "_"))
94
    colnames(groupings2) <- "A"
95
    if (is.null(form)) {
96
        form <- as.formula("~0+.")
97
    }
98
    form <- as.formula(form)
99
    mapping <- dMcast(groupings2, form)
100
    colnames(mapping) <- substring(colnames(mapping), 2)
101
    result <- Matrix::t(mapping) %*% x
102
    if (fun == "mean") {
103
        result@x <- result@x / (fast_aggregate(x, groupings2, fun = "count"))@x
104
    }
105
    attr(result, "crosswalk") <- grr::extract(groupings, match(rownames(result),
106
                                             groupings2$A))
107
    return(result)
108
}
109
110
#' Copy of the dMcast function from the Matrix.utils package,
111
#' since this is off CRAN and does not seem to be maintained anymore
112
#'  internally
113
#'
114
dMcast <- function(
115
    data,
116
    formula,
117
    fun.aggregate = "sum",
118
    value.var = NULL,
119
    as.factors = FALSE,
120
    factor.nas = TRUE,
121
    drop.unused.levels = TRUE
122
) {
123
   values <- 1
124
    if (!is.null(value.var)) {
125
        values <- data[,value.var]
126
    }
127
    alltms <- terms(formula, data=data)
128
    response <- rownames(attr(alltms, "factors"))[attr(alltms, "response")]
129
    tm <- attr(alltms, "term.labels")
130
    interactionsIndex <- grep(":", tm)
131
    interactions <- tm[interactionsIndex]
132
    simple <- setdiff(tm, interactions)
133
    i2 <- strsplit(interactions, ":")
134
    newterms <- unlist(lapply(i2, function(x){
135
        paste("paste(", paste(x, collapse = ","), ",", "sep='_'", ")")
136
        }))
137
    newterms <- c(simple, newterms)
138
    newformula <- as.formula(paste("~0+", paste(newterms, collapse = "+")))
139
    allvars <- all.vars(alltms)
140
    data <- data[, c(allvars), drop = FALSE]
141
    if (as.factors)
142
        data <- data.frame(lapply(data, as.factor))
143
    characters <- unlist(lapply(data, is.character))
144
    data[, characters] <- lapply(data[, characters, drop = FALSE], as.factor)
145
    factors <- unlist(lapply(data, is.factor))
146
    # Prevents errors with 1 or fewer distinct levels
147
    data[, factors] <- lapply(data[, factors, drop = FALSE], function(x) {
148
        if (factor.nas) {
149
            if (any(is.na(x))) {
150
                levels(x) <- c(levels(x), "NA")
151
                x[is.na(x)] <- "NA"
152
            }
153
        }
154
        if (drop.unused.levels){
155
            if (nlevels(x)!=length(na.omit(unique(x)))){
156
                x <- factor(as.character(x))
157
            }
158
        }
159
        y <- contrasts(x, contrasts=FALSE, sparse=TRUE)
160
        attr(x, 'contrasts') <- y
161
        return(x)
162
    })
163
    # Allows NAs to pass
164
    attr(data,'na.action') <- na.pass
165
    result <- Matrix::sparse.model.matrix(newformula,
166
                                          data, .unused.levels = FALSE,
167
                                          row.names = FALSE)
168
    brokenNames <- grep("paste(", colnames(result), fixed = TRUE)
169
    colnames(result)[brokenNames] <- lapply(colnames(result)[brokenNames],
170
                                            function(x) {
171
        x <- gsub("paste(", replacement = "", x = x, fixed = TRUE)
172
        x <- gsub(pattern = ", ", replacement = "_", x = x, fixed = TRUE)
173
        x <- gsub(pattern = '_sep = \"_\")',
174
                  replacement = "",
175
                  x = x,
176
                  fixed = TRUE)
177
        return(x)
178
    })
179
180
    result <- result * values
181
    if (isTRUE(response > 0)) {
182
        responses = all.vars(terms(as.formula(paste(response, "~0"))))
183
        result <- fast_aggregate(result,
184
                                 data[, responses, drop = FALSE],
185
                                 fun = fun.aggregate)
186
    }
187
    return(result)
188
}
189
190
191
#' Aggregate matrix over groups
192
#'
193
#' @import sparseMatrixStats
194
#'
195
#' @param groups A character vector with the groups to aggregate over.
196
#' @param fun The summary function to be applied to each group.
197
#'
198
#' @return A summary matrix.
199
#'
200
#' @export
201
aggregate_matrix <- function(
202
    x,
203
    groups = NULL,
204
    fun = "mean"
205
) {
206
    if (length(groups) == nrow(x) & "character" %in% class(fun)) {
207
        if (fun %in% c("count", "sum")) {
208
            agg_mat <- fast_aggregate(x = x, groupings = groups, fun = fun)
209
            return(agg_mat)
210
        }
211
212
        if (fun == "mean") {
213
            group_counts <- as.numeric(table(groups))
214
            agg_mat <- fast_aggregate(x = x, groupings = groups, fun = "sum")
215
            agg_mat <- agg_mat / group_counts
216
            return(agg_mat)
217
        }
218
    }
219
220
    if ("character" %in% class(fun)) {
221
        fun <- summary_fun[[fun]]
222
    }
223
224
    if (length(groups) == nrow(x)) {
225
        agg_mat <- sapply(levels(factor(groups)), function(g) {
226
            chunk <- x[which(groups == g), ]
227
            if (is.null(dim(chunk))) {
228
                return(chunk)
229
            } else {
230
                return(fun(chunk))
231
            }
232
        })
233
        agg_mat <- Matrix::Matrix(agg_mat, sparse = TRUE)
234
    } else if (length(groups) <= 1) {
235
        agg_mat <- fun(x)
236
        agg_mat <- Matrix::Matrix(agg_mat, sparse = TRUE)
237
        colnames(agg_mat) <- groups
238
        rownames(agg_mat) <- colnames(x)
239
    } else {
240
        stop("Length of groups must be either nrow(x) or 1.")
241
    }
242
    return(Matrix::t(agg_mat))
243
}