--- a
+++ b/R/utils.R
@@ -0,0 +1,243 @@
+`%||%` <- rlang::`%||%`
+
+#' @title Extract TF names from scRNA data and tf2motifs
+#'
+#' @param species (character) - Species name. Default: "human".
+#' @param genes (vector(character)) - List of expressed genes.
+#' @param output_file (character) - Path to output file.
+#' @param tf2motifs (data.frame) - TF to motifs names mapping.
+#' Columns: motif, tf.
+#' @param verbose (integer) - Verbosity level. Default: 1.
+#'
+#' @return TFs (vector(character)) - List of TFs expressed with motifs.
+#' @export
+#'
+get_tfs <- function(
+    hummus,
+    assay = NULL,
+    store_tfs = TRUE,
+    output_file = NULL,
+    verbose = 0
+    ) {
+  # Check if the hummus object has motifs_db slot
+  if (is.null(hummus@motifs_db)) {
+    stop("The hummus object does not have a motifs_db slot")
+  }
+  
+  # Check if the assay is present in the seurat object
+  if (! is.null(assay)) {
+    if (!assay %in% names(hummus@assays)) {
+        stop("The gene assay is not present in the seurat object")
+    }
+    # Get the expressed genes
+    expr_genes <- rownames(hummus@assays[[assay]])
+    tfs <- intersect(unique(as.character(hummus@motifs_db@tf2motifs$tf)),
+                                        expr_genes)
+    if (verbose > 0) {
+        cat("\t", length(tfs), "TFs expressed\n")
+        }
+  } else { # If no assay is provided, get all TFs with motifs
+    tfs <- unique(as.character(hummus@motifs_db@tf2motifs$tf))
+    if (verbose > 0) {
+      cat("\t", length(tfs), "TFs with motif. No check if expressed or not.\n")
+    }
+  }
+  # Store TFs in a file if specified
+  if (store_tfs) {
+    if (is.null(output_file)) {
+      stop("Please provide an output file name")
+    }
+    write.table(tfs, output_file, # Store TFs
+              col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
+  }
+
+  return(tfs)
+}
+
+# Code from Pando github.com/quadbiolab/Pando
+#' @import sparseMatrixStats
+summary_fun <- list(
+    "mean" = sparseMatrixStats::colMeans2,
+    "median" = sparseMatrixStats::colMedians,
+    "max" = sparseMatrixStats::colMaxs,
+    "min" = sparseMatrixStats::colMins,
+    "count" = sparseMatrixStats::colCounts,
+    "any" = sparseMatrixStats::colAnys,
+    "all" = sparseMatrixStats::colAlls,
+    "sd" = sparseMatrixStats::colSds,
+    "mad" = sparseMatrixStats::colMads
+)
+
+#' Copy of the aggregate.Matrix function from the Matrix.utils package,
+#' since this is off CRAN and does not seem to be maintained anymore
+#' internally
+#'
+fast_aggregate <- function(
+    x,
+    groupings = NULL,
+    form = NULL,
+    fun = "sum",
+    ...
+) {
+    if (!is(x, "Matrix")) {
+        x <- Matrix(as.matrix(x), sparse = TRUE)
+    }
+    if (fun == "count") {
+        x <- x != 0
+    }
+    groupings2 <- groupings
+    if (!is(groupings2, "data.frame")) {
+        groupings2 <- as.data.frame(groupings2)
+    }
+    groupings2 <- data.frame(lapply(groupings2, as.factor))
+    groupings2 <- data.frame(interaction(groupings2, sep = "_"))
+    colnames(groupings2) <- "A"
+    if (is.null(form)) {
+        form <- as.formula("~0+.")
+    }
+    form <- as.formula(form)
+    mapping <- dMcast(groupings2, form)
+    colnames(mapping) <- substring(colnames(mapping), 2)
+    result <- Matrix::t(mapping) %*% x
+    if (fun == "mean") {
+        result@x <- result@x / (fast_aggregate(x, groupings2, fun = "count"))@x
+    }
+    attr(result, "crosswalk") <- grr::extract(groupings, match(rownames(result),
+                                             groupings2$A))
+    return(result)
+}
+
+#' Copy of the dMcast function from the Matrix.utils package,
+#' since this is off CRAN and does not seem to be maintained anymore
+#'  internally
+#'
+dMcast <- function(
+    data,
+    formula,
+    fun.aggregate = "sum",
+    value.var = NULL,
+    as.factors = FALSE,
+    factor.nas = TRUE,
+    drop.unused.levels = TRUE
+) {
+   values <- 1
+    if (!is.null(value.var)) {
+        values <- data[,value.var]
+    }
+    alltms <- terms(formula, data=data)
+    response <- rownames(attr(alltms, "factors"))[attr(alltms, "response")]
+    tm <- attr(alltms, "term.labels")
+    interactionsIndex <- grep(":", tm)
+    interactions <- tm[interactionsIndex]
+    simple <- setdiff(tm, interactions)
+    i2 <- strsplit(interactions, ":")
+    newterms <- unlist(lapply(i2, function(x){
+        paste("paste(", paste(x, collapse = ","), ",", "sep='_'", ")")
+        }))
+    newterms <- c(simple, newterms)
+    newformula <- as.formula(paste("~0+", paste(newterms, collapse = "+")))
+    allvars <- all.vars(alltms)
+    data <- data[, c(allvars), drop = FALSE]
+    if (as.factors)
+        data <- data.frame(lapply(data, as.factor))
+    characters <- unlist(lapply(data, is.character))
+    data[, characters] <- lapply(data[, characters, drop = FALSE], as.factor)
+    factors <- unlist(lapply(data, is.factor))
+    # Prevents errors with 1 or fewer distinct levels
+    data[, factors] <- lapply(data[, factors, drop = FALSE], function(x) {
+        if (factor.nas) {
+            if (any(is.na(x))) {
+                levels(x) <- c(levels(x), "NA")
+                x[is.na(x)] <- "NA"
+            }
+        }
+        if (drop.unused.levels){
+            if (nlevels(x)!=length(na.omit(unique(x)))){
+                x <- factor(as.character(x))
+            }
+        }
+        y <- contrasts(x, contrasts=FALSE, sparse=TRUE)
+        attr(x, 'contrasts') <- y
+        return(x)
+    })
+    # Allows NAs to pass
+    attr(data,'na.action') <- na.pass
+    result <- Matrix::sparse.model.matrix(newformula,
+                                          data, .unused.levels = FALSE,
+                                          row.names = FALSE)
+    brokenNames <- grep("paste(", colnames(result), fixed = TRUE)
+    colnames(result)[brokenNames] <- lapply(colnames(result)[brokenNames],
+                                            function(x) {
+        x <- gsub("paste(", replacement = "", x = x, fixed = TRUE)
+        x <- gsub(pattern = ", ", replacement = "_", x = x, fixed = TRUE)
+        x <- gsub(pattern = '_sep = \"_\")',
+                  replacement = "",
+                  x = x,
+                  fixed = TRUE)
+        return(x)
+    })
+
+    result <- result * values
+    if (isTRUE(response > 0)) {
+        responses = all.vars(terms(as.formula(paste(response, "~0"))))
+        result <- fast_aggregate(result,
+                                 data[, responses, drop = FALSE],
+                                 fun = fun.aggregate)
+    }
+    return(result)
+}
+
+
+#' Aggregate matrix over groups
+#'
+#' @import sparseMatrixStats
+#'
+#' @param groups A character vector with the groups to aggregate over.
+#' @param fun The summary function to be applied to each group.
+#'
+#' @return A summary matrix.
+#'
+#' @export
+aggregate_matrix <- function(
+    x,
+    groups = NULL,
+    fun = "mean"
+) {
+    if (length(groups) == nrow(x) & "character" %in% class(fun)) {
+        if (fun %in% c("count", "sum")) {
+            agg_mat <- fast_aggregate(x = x, groupings = groups, fun = fun)
+            return(agg_mat)
+        }
+
+        if (fun == "mean") {
+            group_counts <- as.numeric(table(groups))
+            agg_mat <- fast_aggregate(x = x, groupings = groups, fun = "sum")
+            agg_mat <- agg_mat / group_counts
+            return(agg_mat)
+        }
+    }
+
+    if ("character" %in% class(fun)) {
+        fun <- summary_fun[[fun]]
+    }
+
+    if (length(groups) == nrow(x)) {
+        agg_mat <- sapply(levels(factor(groups)), function(g) {
+            chunk <- x[which(groups == g), ]
+            if (is.null(dim(chunk))) {
+                return(chunk)
+            } else {
+                return(fun(chunk))
+            }
+        })
+        agg_mat <- Matrix::Matrix(agg_mat, sparse = TRUE)
+    } else if (length(groups) <= 1) {
+        agg_mat <- fun(x)
+        agg_mat <- Matrix::Matrix(agg_mat, sparse = TRUE)
+        colnames(agg_mat) <- groups
+        rownames(agg_mat) <- colnames(x)
+    } else {
+        stop("Length of groups must be either nrow(x) or 1.")
+    }
+    return(Matrix::t(agg_mat))
+}
\ No newline at end of file