HuMMuS / Git / Diff of /R/method

Models:

AlyssaS/

HuMMuS

Downloads: 1

Diff of /R/method_wrappers.R [000000] .. [9abfcf]

Switch to unified view

 b/R/method_wrappers.R
+# Description: This file contains the wrapper functions for the methods that
+# are used to compute the different layers of the multilayer network. The
+# functions are called from the compute_*_network functions in layers.R
+# For now, only the compute_atac_peak_network function has wrapper functions
+# for the different methods. The other methods are still directly implemented
+# in the compute_*_network functions in layers.R
+#' @title Cicero wrapper function for the compute_atac_peak_network function
+#'
+#' @description This function is a wrapper for the compute_atac_peak_network
+#' function in layers.R. It computes the peak network from scATAC-seq data
+#' using Cicero. It returns a data frame with the peak network. The data frame
+#' also contains the coaccess score for each edge. The coaccess score is the
+#' probability that two peaks are accessible in the same cell. The coaccess
+#' score is computed by Cicero. Edges are filtered based on the coaccess score.
+#' Only edges with a coaccess score > threshold are kept.
+#'
+#' @param hummus A hummus object
+#' @param atac_assay The name of the assay containing the scATAC-seq data
+#' @param genome The genome object
+#' @param window The window size used by Cicero to compute the coaccess score
+#' @param number_cells_per_clusters The number of cells per cluster used by
+#' Cicero to compute the coaccess score
+#' @param sample_num The number of samples used by Cicero to compute the
+#' coaccess score
+#' @param seed The seed used by Cicero to compute the coaccess score
+#' @param verbose The verbosity level
+#' @param threshold The threshold used to filter edges based on the coaccess
+#' score
+#' @param reduction_method The method used by monocle3 to reduce the dimension
+#' of the scATAC-seq data before defining the pseudocells. The default is UMAP.
+#'
+#' @return A data frame containing the peak network
+#' @export
+#'
+run_cicero_wrapper <- function(
+    hummus,
+    atac_assay,
+    genome,
+    window,
+    number_cells_per_clusters,
+    sample_num,
+    seed,
+    verbose,
+    threshold,
+    reduction_method = "UMAP"
+    ) {
+    # functions that need to be renamed
+    int_elementMetadata <- SingleCellExperiment::int_elementMetadata
+    counts <- SingleCellExperiment::counts
+    # obtain chromosome sizes
+    chromosome_sizes <- data.frame(V1 = genome@seqinfo@seqnames,
+                                   V2 = genome@seqinfo@seqlengths)
+    # Get scATAC-seq data
+    scATAC <- as.matrix(hummus@assays[[atac_assay]]@counts)
+    # Matrix to edgelist
+    acc <- reshape2::melt(scATAC)
+    colnames(acc) <- c("V1", "V2", "V3")
+    # Prepare cicero input
+    input_cds <- cicero::make_atac_cds(acc, binarize = TRUE) # Create CDS object
+    set.seed(seed)
+    # It is required that there is no empty cell
+    if (length(which(colSums(as.matrix(monocle3::exprs(input_cds))) == 0)) == 0
+    ) {
+    # Calculating size factors using default method = mean-geometric-mean-total
+      input_cds <- monocle3::estimate_size_factors(input_cds)
+      # Preprocessing using LSI
+      input_cds <- monocle3::preprocess_cds(input_cds, method = "LSI")
+      # Dimensionality reduction using UMAP
+      input_cds <- monocle3::reduce_dimension(
+                                    input_cds,
+                                    reduction_method = reduction_method,
+                                    preprocess_method = "LSI")
+    } else {
+      print("Error: there is at least one cell with no signal.")
+    }
+    # Get reduced (UMAP) coordinates
+    umap_coords <- SingleCellExperiment::reducedDims(input_cds)$UMAP
+    # Compute pseudocells
+    cicero_cds <- cicero::make_cicero_cds(
+      input_cds,  # Create a Cicero CDS object
+      reduced_coordinates = umap_coords,
+      k = number_cells_per_clusters,  #number neighbors/ Default = 50
+      summary_stats = NULL,         # Default
+      size_factor_normalize = TRUE, # Default
+      silent = FALSE)               # Default
+    cicero <- cicero::run_cicero(
+      cds = cicero_cds, # Infer peak-links
+      genomic_coords = chromosome_sizes,
+      window = window,             # Default = 5e+05
+      silent = FALSE,             # Default = FALSE
+      sample_num = sample_num) # Default = 100
+    # Remove NAs, double edges, and edges with coaccess score <=0
+    # Check for coaccess = NA
+    if (length(which(is.na(cicero$coaccess))) > threshold) {
+      cicero <- cicero[which(!is.na(cicero$coaccess)), ]  # Remove NAs
+    }
+    cicero$temp <- NA  # Helper column to check and remove double edges
+    my_cols <- which(as.character(cicero$Peak1) <= as.character(cicero$Peak2))
+    cicero$temp[my_cols] <- paste(cicero$Peak1[my_cols],
+                                  cicero$Peak2[my_cols],
+                                  sep = ";")
+    my_cols <- which(as.character(cicero$Peak1) > as.character(cicero$Peak2))
+    cicero$temp[my_cols] <- paste(cicero$Peak2[my_cols],
+                                  cicero$Peak1[my_cols],
+                                  sep = ";")
+    # Sort table according to temp-column (each entry appears double)
+    cicero <- cicero[with(cicero, order(temp, decreasing = TRUE)), ]
+    rownames(cicero) <- c(1:dim(cicero)[1])
+    A <- as.character(cicero$Peak1[seq(1, dim(cicero)[1], 2)])
+    Anum <- round(cicero$coaccess[seq(1, dim(cicero)[1], 2)], 10)
+    B <- as.character(cicero$Peak2[seq(2, dim(cicero)[1], 2)])
+    Bnum <- round(cicero$coaccess[seq(2, dim(cicero)[1], 2)], 10)
+    # length(which(A==B & Anum==Bnum))
+    # Each edge appears twice with same coaccess score (rounded to 10 digits)
+    cicero <- cicero[seq(1, dim(cicero)[1], 2), ] # Remove double edges
+    cicero$temp <- NULL # Remove helper column
+    cicero <- cicero[with(cicero, order(cicero$coaccess,
+                                        decreasing = TRUE)), ]  # Sort
+    rownames(cicero) <- c(1:dim(cicero)[1])
+    cicero$Peak1 <- gsub("_", "-", cicero$Peak1)
+    # Peak names 2x"-" to match bipartites
+    cicero$Peak2 <- gsub("_", "-", cicero$Peak2)
+    # Peak names 2x"-" to match bipartites ? 2x"-" or 2x"_"
+    peak_network <- cicero[which(cicero$coaccess > threshold), ]
+    # Remove edges with coaccess score <= threshold
+    if (verbose > 0) {
+      cat("\n", dim(peak_network)[1], "peak edges with a coaccess score >",
+          threshold, "were found.\n")
+    }
+    # Return peak network including edges with positive coaccess score
+    return(peak_network)
+}
+#' @title Omnipath wrapper function for the compute_tf_network function
+#' @description This function is a wrapper for the compute_tf_network function
+#' in layers.R. It computes the TF network from using Omnipath database.
+#' It returns a data frame with the TF network. The data frame is not weighted
+#' and does not contain scores for the edges.
+#' @param hummus A hummus object
+  # Get TF-TF interactions from Omnipath
+run_omnipath_wrapper <- function(
+  hummus = hummus,
+  organism = organism,
+  tfs = tfs,
+  gene_assay = gene_assay,
+  source_target = source_target,
+  verbose = 1) {
+  TF_PPI <- OmnipathR::import_post_translational_interactions(
+    organism = organism, partners = tfs, source_target = source_target
+  )
+  if (verbose > 0) {
+    cat("\tNumber of edges from Omnipath:", nrow(TF_PPI),
+    "\nWill now be filtered to only those corresponding to specified tfs")
+  }
+  if (is.na(tfs)) {
+    # Get tfs list
+    tfs <- get_tfs(hummus = hummus,
+              assay = gene_assay,
+              store_tfs = FALSE,
+              output_file = NULL,
+              verbose = verbose)
+  } else if (typeof(tfs) != "character") {
+      stop("'tfs' argument needs to be a vector of characters
+      (e.g.: c('MYC', 'JAK1')).")
+  }
+  # add filtering if element is not a TF expressed in the dataset
+  if (source_target == "AND") {
+    TF_PPI <- TF_PPI[which(TF_PPI$source_genesymbol %in% tfs &
+                           TF_PPI$target_genesymbol %in% tfs), ]
+  } else if (source_target == "OR") {
+    TF_PPI <- TF_PPI[which(TF_PPI$source_genesymbol %in% tfs |
+                           TF_PPI$target_genesymbol %in% tfs), ]
+  }
+  # Get only source and target columns
+  tf_network <- TF_PPI[, c(3, 4)]
+  # Convert to data.frame from tibble
+  tf_network <- as.data.frame(tf_network)
+  # Check if there is any TF-TF edges otherwise add a fake node
+  # and connect all TFs to it (to allow HuMMuS to run without impacting result)
+  if (nrow(tf_network) == 0) {
+    cat("No TF-TF edges from Omnipath for the given parameters.
+        You can try to change the source_target parameter to 'OR' to get
+        TF-other protein interactions. Or try to import a network
+        computed externally. Right now, a network with all TFs connected
+        to a fake node is created, for HuMMuS analysis.\n It has no biological
+        meaning but will allow to run the pipeline as if no edges were present.
+        \n")
+    tf_network <- run_tf_null_wrapper(
+      hummus = hummus,
+      organism = organism,
+      tfs = tfs,
+      gene_assay = gene_assay,
+      verbose = )
+  }
+  return(tf_network)
+}
+#' @title tf_null wrapper function for the tf_network function
+#' @description This function is a wrapper for the tf_network function
+#'
+#' @param hummus A hummus object
+#'
+#'
+run_tf_null_wrapper <- function(
+  hummus = hummus,
+  organism = organism,
+  tfs = tfs,
+  gene_assay = gene_assay,
+  verbose = 1) {
+  if (verbose > 0) {
+    cat("Creating a fake TF network with all TFs connected to a fake node.\n")
+  }
+  if (is.na(tfs)) {
+    # Get tfs list
+    tfs <- get_tfs(hummus = hummus,
+              assay = gene_assay,
+              store_tfs = FALSE,
+              output_file = NULL,
+              verbose = verbose)
+  } else if (typeof(tfs) != "character") {
+      stop("'tfs' argument needs to be a vector of characters
+      (e.g.: c('MYC', 'JAK1')).")
+  }
+  FAKE_NODE <- "fake_node"
+  tf_network <- data.frame(source = c(), target = c())
+  for (tf in tfs) {
+    tf_network <- rbind(tf_network, data.frame(source = tf, target = FAKE_NODE))
+  }
+  return(tf_network)
+}