VoltRon / Git / Diff of /R/processing.R

Models:
AlyssaS/
VoltRon
Downloads: 1
Diff of /R/processing.R [000000] .. [413088]
Switch to side-by-side view

--- a
+++ b/R/processing.R
@@ -0,0 +1,444 @@
+#' @include allgenerics.R
+#'
+NULL
+
+####
+# Normalization ####
+####
+
+normalizeDataVoltRon <- function(object, assay = NULL, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
+  
+  # get assay names
+  assay_names <- vrAssayNames(object, assay = assay)
+  
+  # normalize assays
+  for(assy in assay_names){
+    cur_assay <- object[[assy]]
+    object[[assy]] <- normalizeData(cur_assay, method = method, desiredQuantile = desiredQuantile, scale = scale, sizefactor = sizefactor, feat_type = feat_type)
+  }
+  
+  # return
+  return(object)
+}
+
+#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
+#' if NULL, the default assay will be used, see \link{vrMainAssay}.
+#' @param method the normalization method: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"
+#' @param desiredQuantile the quantile of the data if "QuanNorm" or "LogQuanNorm" is selected as \code{method}.
+#' @param scale the scale parameter for the hyperbolic arcsine transformation
+#' @param sizefactor size factor if \code{method} is selected as \code{LogNorm}
+#' @param feat_type the feature set type
+#' 
+#' @rdname normalizeData
+#' @method normalizeData VoltRon
+#'
+#' @export
+setMethod("normalizeData", "VoltRon", normalizeDataVoltRon)
+
+normalizeDatavrAssay <- function(object, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
+  
+  # size factor
+  rawdata <- vrData(object, feat_type = feat_type, norm = FALSE)
+  
+  if(!is.numeric(desiredQuantile)){
+    stop("desiredQuantile should be numeric")
+  } else {
+    if(!findInterval(desiredQuantile, c(0,1)) == 1L){
+      stop("desiredQuantile should be between [0,1]")
+    }
+  }
+  
+  # normalization method
+  if(method == "LogNorm"){
+    normdata <- LogNorm(rawdata, colSums(rawdata), sizefactor)
+  } else if(method == "Q3Norm") {
+    # rawdata[rawdata==0] <- 1
+    qs <- getColQuantiles(rawdata, desiredQuantile)
+    normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
+  } else if(method == "LogQ3Norm") {
+    # rawdata[rawdata==0] <- 1
+    qs <- getColQuantiles(rawdata, desiredQuantile)
+    normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
+    normdata <- log(normdata + 1)
+  } else if(method == "CLR") {
+    normdata <- getDivideSweep(rawdata, colSums(rawdata))
+    normdata <- apply(normdata, 2, function(x) {
+      log1p(x = x / (exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE) / length(x = x))))
+    })
+  } else if(method == "hyper.arcsine") {
+    normdata <- asinh(rawdata/scale)
+  } else {
+    stop('Please select one of these methods: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"')
+  }
+  
+  # get normalized data
+  catch_connect1 <- try(slot(object, name = "data"), silent = TRUE)
+  catch_connect2 <- try(slot(object, name = "rawdata"), silent = TRUE)
+  if(!is(catch_connect1, 'try-error') && !methods::is(catch_connect1,'error')){
+    if(is.null(feat_type))
+      feat_type <- vrMainFeatureType(object)
+    object@data[[paste0(feat_type, "_norm")]] <- normdata
+  } else if(!is(catch_connect2, 'try-error') && !methods::is(catch_connect2,'error')){
+    object@normdata <- normdata
+  }
+  
+  # return
+  return(object)
+}
+
+#' @rdname normalizeData
+#' @method normalizeData vrAssay
+#'
+#' @importFrom stats quantile
+#'
+#' @export
+setMethod("normalizeData", "vrAssay", normalizeDatavrAssay)
+
+#' @rdname normalizeData
+#' @method normalizeData vrAssayV2
+#'
+#' @importFrom stats quantile
+#'
+#' @export
+setMethod("normalizeData", "vrAssayV2", normalizeDatavrAssay)
+
+LogNorm <- function(rawdata, coldepth, sizefactor){
+  if(inherits(rawdata, "IterableMatrix")){
+    if(!requireNamespace("BPCells"))
+      stop("You have to install BPCells!")
+    normdata <- BPCells::t(BPCells::t(rawdata)/coldepth)
+    normdata <- BPCells::log1p_slow(normdata*sizefactor)
+  } else {
+    normdata <- sweep(rawdata, 2L, coldepth, FUN = "/")
+    normdata <- log(normdata*sizefactor + 1)
+  }
+  return(normdata)
+}
+
+getDivideSweep <- function(rawdata, divisor){
+  if(inherits(rawdata, "IterableMatrix")){
+    if(!requireNamespace("BPCells"))
+      stop("You have to install BPCells!")
+    return(BPCells::t(BPCells::t(rawdata)/divisor))
+  } else {
+    return(sweep(rawdata, 2L, divisor, FUN = "/"))
+  }
+  return(rawdata)
+}
+
+####
+# Features ####
+####
+
+getFeaturesVoltRon <- function(object, assay = NULL, max.count = 1, n = 3000){
+  
+  # get assay names
+  assay_names <- vrAssayNames(object, assay = assay)
+  
+  # get features for all coordinates
+  for(assy in assay_names){
+    object[[assy]] <- getFeatures(object[[assy]], max.count = max.count, n = n)
+  }
+  
+  # return
+  return(object)
+}
+
+#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
+#' if NULL, the default assay will be used, see \link{vrMainAssay}.
+#' @param max.count maximum count (across spatial points) for low count filtering
+#' @param n the top number of variable features 
+#' 
+#' @rdname getFeatures
+#'
+#' @export
+setMethod("getFeatures", "VoltRon", getFeaturesVoltRon)
+
+getFeaturesvrAssay <- function(object, max.count = 1, n = 3000){
+  
+  # get data and coordinates
+  rawdata <- vrData(object, norm = FALSE)
+  coords <- vrCoordinates(object)
+  features <- vrFeatures(object)
+  
+  # eliminate genes with low counts
+  # keep.genes <- which(apply(rawdata,1,max) > max.count)
+  keep.genes <- getMaxCount(rawdata, max.count)
+  
+  # vst estimation
+  # vst_data <- data.frame(mean = Matrix::rowMeans(rawdata), var = apply(rawdata, 1, stats::var))
+  vst_data <- getVstData(rawdata)
+  loess_data <- vst_data[keep.genes,]
+  loess_results <- stats::loess(var~mean, loess_data, span = 0.3)
+  vst_data$adj_var <- 0
+  vst_data$rank <- 0
+  vst_data[keep.genes,]$adj_var <- stats::predict(loess_results)
+  vst_data[keep.genes,]$rank <- order(order(vst_data$adj_var[keep.genes], decreasing = TRUE))
+  
+  # set feature data
+  vrFeatureData(object) <- vst_data
+  
+  # return
+  return(object)
+}
+
+#' @rdname getFeatures
+#'
+#' @importFrom stats loess predict var
+#' @importFrom Matrix rowMeans
+#'
+#' @export
+setMethod("getFeatures", "vrAssay", getFeaturesvrAssay)
+
+#' @rdname getFeatures
+#'
+#' @importFrom stats loess predict var
+#' @importFrom Matrix rowMeans
+#'
+#' @export
+setMethod("getFeatures", "vrAssayV2", getFeaturesvrAssay)
+
+getVstData <- function(rawdata){
+  if(inherits(rawdata, "IterableMatrix")){
+    if(!requireNamespace("BPCells"))
+      stop("You have to install BPCells!")
+    mean_data <- BPCells::rowMeans(rawdata)
+    var_data <- BPCells::rowSums(rawdata^2)
+    var_data <- (var_data - mean_data^2/nrow(rawdata))/(nrow(rawdata)-1)
+    # var_data <- BPCells::matrix_stats(rawdata, row_stats="variance")
+  } else {
+    mean_data <- Matrix::rowMeans(rawdata)
+    var_data <- apply(rawdata, 1, stats::var)
+  }
+  vst_data <- data.frame(mean = mean_data, var = var_data)
+  return(vst_data)
+}
+
+getMaxCount <- function(rawdata, max.count){
+  if(inherits(rawdata, "IterableMatrix")){
+    if(!requireNamespace("BPCells"))
+      stop("You have to install BPCells!")
+    rawdata <- rawdata > max.count
+    keep.genes <- which(BPCells::rowSums(rawdata) > 0)
+  } else {
+    keep.genes <- which(apply(rawdata,1,max) > max.count)
+  }
+  return(keep.genes)
+}
+
+#' getVariableFeatures
+#'
+#' get shared variable features across multiple assays
+#'
+#' @param object a Voltron Object
+#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
+#' if NULL, the default assay will be used, see \link{vrMainAssay}.
+#' @param n the number of features
+#' @param ... additional arguements passed to \link{vrFeatureData}
+#'
+#' @importFrom dplyr full_join
+#' @importFrom utils head
+#'
+#' @export
+getVariableFeatures <- function(object, assay = NULL, n = 3000, ...){
+
+  # get assay names
+  assay_names <- vrAssayNames(object, assay = assay)
+
+  # get features for all coordinates
+  ranks <- NULL
+  for(assy in assay_names){
+    feature_data <- vrFeatureData(object[[assy]], ...)
+    # if(nrow(feature_data) > 0){
+    if(!is.null(feature_data)) {
+      if(nrow(feature_data) > 0){
+        feature_data$gene <- rownames(feature_data)
+      }
+    } else {
+      feature_data <- data.frame(gene = vrFeatures(object[[assy]]), rank = NA)
+    }
+    if(is.null(ranks)){
+      ranks <- feature_data[,c("gene", "rank")]
+    } else {
+      ranks <- ranks %>% full_join(feature_data[,c("gene", "rank")], by = c("gene" = "gene"))
+    }
+  }
+
+  # get geometric mean of ranks, i.e. rank product statistic
+  rownames_ranks <- ranks$gene
+  ranks <- ranks[,!colnames(ranks) %in% "gene", drop = FALSE]
+  ranks <- apply(ranks, 1, function(x) exp(mean(log(x))))
+  # names(ranks) <- rownames(feature_data)
+  names(ranks) <- rownames_ranks
+  ranks <- ranks[ranks != 0]
+
+  # get selected features
+  if(length(ranks[!is.na(ranks)]) > 0){
+    selected_features <- names(utils::head(sort(ranks, decreasing = FALSE), n))
+  } else {
+    selected_features <- vrFeatures(object, assay = assay)
+  }
+
+  # return
+  return(selected_features)
+}
+
+####
+# vrEmbeddings ####
+####
+
+#' getPCA
+#'
+#' calculate PCA of the VoltRon objects
+#'
+#' @param object a VoltRon object
+#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
+#' if NULL, the default assay will be used, see \link{vrMainAssay}.
+#' @param features the selected features for PCA reduction
+#' @param dims the number of dimensions extracted from PCA
+#' @param type the key name for the embedding, default: pca
+#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
+#' @param seed seed
+#'
+#' @importFrom irlba irlba
+#'
+#' @export
+getPCA <- function(object, assay = NULL, features = NULL, dims = 30, type = "pca", overwrite = FALSE, seed = 1){
+
+  # get assay names
+  assay_names <- vrAssayNames(object, assay = assay)
+
+  # get shared features and subset
+  assay_features <- vrFeatures(object, assay = assay)
+
+  # if there are features of a VoltRon object, then get variable features too
+  if(length(assay_features) > 0) {
+    if(is.null(features))
+      features <- getVariableFeatures(object, assay = assay)
+    object_subset <- subsetVoltRon(object, features = features)
+    vrMainAssay(object_subset) <- vrMainAssay(object)
+
+    # adjust extraction features length
+    if(dims > length(features)){
+      message("Requested more PC dimensions than existing features: dims = length(features) now!")
+      dims <- length(features)
+    }
+
+  # if there are no features in VoltRon object, return the assay as itself
+  } else {
+    object_subset <- object
+  }
+
+  # get data
+  normdata <- vrData(object_subset, assay = assay, norm = TRUE)
+
+  # get PCA embedding
+  set.seed(seed)
+  if(inherits(normdata, "IterableMatrix")){
+    if(!requireNamespace("BPCells"))
+      stop("You have to install BPCells!")
+    svd <- BPCells::svds(normdata, k=dims)
+    pr.data <- BPCells::multiply_cols(svd$v, svd$d)
+  } else {
+    scale.data <- apply(normdata, 1, scale)
+    pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
+    pr.data <- pr.data$x 
+  }
+  
+  # change colnames
+  colnames(pr.data) <- paste0("PC", seq_len(dims))
+  rownames(pr.data) <- colnames(normdata)
+
+  # set Embeddings
+  vrEmbeddings(object, assay = assay, type = type, overwrite = overwrite) <- pr.data
+
+  # return
+  return(object)
+}
+
+#' getUMAP
+#'
+#' calculate UMAP of the VoltRon objects
+#'
+#' @param object a VoltRon object
+#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
+#' if NULL, the default assay will be used, see \link{vrMainAssay}.
+#' @param data.type the type of data used to calculate UMAP from: "pca" (default), "raw" or "norm"
+#' @param dims the number of dimensions extracted from PCA
+#' @param umap.key the name of the umap embedding, default: umap
+#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
+#' @param seed seed
+#'
+#' @importFrom uwot umap
+#' @importFrom Matrix t
+#'
+#' @export
+#'
+getUMAP <- function(object, assay = NULL, data.type = "pca", dims = seq_len(30), umap.key = "umap", overwrite = FALSE, seed = 1){
+
+  # get data
+  if(data.type %in% c("raw", "norm")){
+    data <- vrData(object, assay = assay, norm = (data.type == "norm"))
+    data <- as.matrix(as(Matrix::t(data),"dgCMatrix"))
+  } else{
+    embedding_names <- vrEmbeddingNames(object)
+    if(data.type %in% vrEmbeddingNames(object)) {
+      data <- vrEmbeddings(object, assay = assay, type = data.type, dims = dims)
+    } else {
+      stop("Please provide a data type from one of three choices: raw, norm and pca")
+    }
+  }
+
+  # get umap
+  set.seed(seed)
+  umap_data <- uwot::umap(data)
+  colnames(umap_data) <- c("x", "y")
+  vrEmbeddings(object, assay = assay, type = umap.key, overwrite = overwrite) <- umap_data
+
+  # return
+  return(object)
+}
+
+####
+# Image Processing ####
+####
+
+#' split_into_tiles
+#'
+#' split image raster data into tiles
+#'
+#' @param image_data image raster data
+#' @param tile_size tile size
+#'
+#' @noRd
+split_into_tiles <- function(image_data, tile_size = 10) {
+  n_rows <- nrow(image_data)
+  n_cols <- ncol(image_data)
+
+  # Calculate the number of tiles in rows and columns
+  n_row_tiles <- n_rows %/% tile_size
+  n_col_tiles <- n_cols %/% tile_size
+
+  # Initialize an empty list to store tiles
+  tiles <- list()
+
+  # Loop through the image data matrix to extract tiles
+  for (i in seq_len(n_row_tiles)) {
+    for (j in seq_len(n_col_tiles)) {
+      # Calculate the indices for the current tile
+      start_row <- (i - 1) * tile_size + 1
+      end_row <- i * tile_size
+      start_col <- (j - 1) * tile_size + 1
+      end_col <- j * tile_size
+
+      # Extract the current tile from the image data matrix
+      tile <- image_data[start_row:end_row, start_col:end_col]
+
+      # Store the tile in the list
+      tiles[[length(tiles) + 1]] <- tile
+    }
+  }
+
+  # Return the list of tiles
+  return(tiles)
+}