VoltRon / Git / [413088] /R/processing.R

Models:
AlyssaS/
VoltRon
Downloads: 1
[413088]: / R / processing.R
History
Download this file
445 lines (382 with data), 13.9 kB

#' @include allgenerics.R
#'
NULL

####
# Normalization ####
####

normalizeDataVoltRon <- function(object, assay = NULL, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
  
  # get assay names
  assay_names <- vrAssayNames(object, assay = assay)
  
  # normalize assays
  for(assy in assay_names){
    cur_assay <- object[[assy]]
    object[[assy]] <- normalizeData(cur_assay, method = method, desiredQuantile = desiredQuantile, scale = scale, sizefactor = sizefactor, feat_type = feat_type)
  }
  
  # return
  return(object)
}

#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param method the normalization method: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"
#' @param desiredQuantile the quantile of the data if "QuanNorm" or "LogQuanNorm" is selected as \code{method}.
#' @param scale the scale parameter for the hyperbolic arcsine transformation
#' @param sizefactor size factor if \code{method} is selected as \code{LogNorm}
#' @param feat_type the feature set type
#' 
#' @rdname normalizeData
#' @method normalizeData VoltRon
#'
#' @export
setMethod("normalizeData", "VoltRon", normalizeDataVoltRon)

normalizeDatavrAssay <- function(object, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
  
  # size factor
  rawdata <- vrData(object, feat_type = feat_type, norm = FALSE)
  
  if(!is.numeric(desiredQuantile)){
    stop("desiredQuantile should be numeric")
  } else {
    if(!findInterval(desiredQuantile, c(0,1)) == 1L){
      stop("desiredQuantile should be between [0,1]")
    }
  }
  
  # normalization method
  if(method == "LogNorm"){
    normdata <- LogNorm(rawdata, colSums(rawdata), sizefactor)
  } else if(method == "Q3Norm") {
    # rawdata[rawdata==0] <- 1
    qs <- getColQuantiles(rawdata, desiredQuantile)
    normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
  } else if(method == "LogQ3Norm") {
    # rawdata[rawdata==0] <- 1
    qs <- getColQuantiles(rawdata, desiredQuantile)
    normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
    normdata <- log(normdata + 1)
  } else if(method == "CLR") {
    normdata <- getDivideSweep(rawdata, colSums(rawdata))
    normdata <- apply(normdata, 2, function(x) {
      log1p(x = x / (exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE) / length(x = x))))
    })
  } else if(method == "hyper.arcsine") {
    normdata <- asinh(rawdata/scale)
  } else {
    stop('Please select one of these methods: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"')
  }
  
  # get normalized data
  catch_connect1 <- try(slot(object, name = "data"), silent = TRUE)
  catch_connect2 <- try(slot(object, name = "rawdata"), silent = TRUE)
  if(!is(catch_connect1, 'try-error') && !methods::is(catch_connect1,'error')){
    if(is.null(feat_type))
      feat_type <- vrMainFeatureType(object)
    object@data[[paste0(feat_type, "_norm")]] <- normdata
  } else if(!is(catch_connect2, 'try-error') && !methods::is(catch_connect2,'error')){
    object@normdata <- normdata
  }
  
  # return
  return(object)
}

#' @rdname normalizeData
#' @method normalizeData vrAssay
#'
#' @importFrom stats quantile
#'
#' @export
setMethod("normalizeData", "vrAssay", normalizeDatavrAssay)

#' @rdname normalizeData
#' @method normalizeData vrAssayV2
#'
#' @importFrom stats quantile
#'
#' @export
setMethod("normalizeData", "vrAssayV2", normalizeDatavrAssay)

LogNorm <- function(rawdata, coldepth, sizefactor){
  if(inherits(rawdata, "IterableMatrix")){
    if(!requireNamespace("BPCells"))
      stop("You have to install BPCells!")
    normdata <- BPCells::t(BPCells::t(rawdata)/coldepth)
    normdata <- BPCells::log1p_slow(normdata*sizefactor)
  } else {
    normdata <- sweep(rawdata, 2L, coldepth, FUN = "/")
    normdata <- log(normdata*sizefactor + 1)
  }
  return(normdata)
}

getDivideSweep <- function(rawdata, divisor){
  if(inherits(rawdata, "IterableMatrix")){
    if(!requireNamespace("BPCells"))
      stop("You have to install BPCells!")
    return(BPCells::t(BPCells::t(rawdata)/divisor))
  } else {
    return(sweep(rawdata, 2L, divisor, FUN = "/"))
  }
  return(rawdata)
}

####
# Features ####
####

getFeaturesVoltRon <- function(object, assay = NULL, max.count = 1, n = 3000){
  
  # get assay names
  assay_names <- vrAssayNames(object, assay = assay)
  
  # get features for all coordinates
  for(assy in assay_names){
    object[[assy]] <- getFeatures(object[[assy]], max.count = max.count, n = n)
  }
  
  # return
  return(object)
}

#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param max.count maximum count (across spatial points) for low count filtering
#' @param n the top number of variable features 
#' 
#' @rdname getFeatures
#'
#' @export
setMethod("getFeatures", "VoltRon", getFeaturesVoltRon)

getFeaturesvrAssay <- function(object, max.count = 1, n = 3000){
  
  # get data and coordinates
  rawdata <- vrData(object, norm = FALSE)
  coords <- vrCoordinates(object)
  features <- vrFeatures(object)
  
  # eliminate genes with low counts
  # keep.genes <- which(apply(rawdata,1,max) > max.count)
  keep.genes <- getMaxCount(rawdata, max.count)
  
  # vst estimation
  # vst_data <- data.frame(mean = Matrix::rowMeans(rawdata), var = apply(rawdata, 1, stats::var))
  vst_data <- getVstData(rawdata)
  loess_data <- vst_data[keep.genes,]
  loess_results <- stats::loess(var~mean, loess_data, span = 0.3)
  vst_data$adj_var <- 0
  vst_data$rank <- 0
  vst_data[keep.genes,]$adj_var <- stats::predict(loess_results)
  vst_data[keep.genes,]$rank <- order(order(vst_data$adj_var[keep.genes], decreasing = TRUE))
  
  # set feature data
  vrFeatureData(object) <- vst_data
  
  # return
  return(object)
}

#' @rdname getFeatures
#'
#' @importFrom stats loess predict var
#' @importFrom Matrix rowMeans
#'
#' @export
setMethod("getFeatures", "vrAssay", getFeaturesvrAssay)

#' @rdname getFeatures
#'
#' @importFrom stats loess predict var
#' @importFrom Matrix rowMeans
#'
#' @export
setMethod("getFeatures", "vrAssayV2", getFeaturesvrAssay)

getVstData <- function(rawdata){
  if(inherits(rawdata, "IterableMatrix")){
    if(!requireNamespace("BPCells"))
      stop("You have to install BPCells!")
    mean_data <- BPCells::rowMeans(rawdata)
    var_data <- BPCells::rowSums(rawdata^2)
    var_data <- (var_data - mean_data^2/nrow(rawdata))/(nrow(rawdata)-1)
    # var_data <- BPCells::matrix_stats(rawdata, row_stats="variance")
  } else {
    mean_data <- Matrix::rowMeans(rawdata)
    var_data <- apply(rawdata, 1, stats::var)
  }
  vst_data <- data.frame(mean = mean_data, var = var_data)
  return(vst_data)
}

getMaxCount <- function(rawdata, max.count){
  if(inherits(rawdata, "IterableMatrix")){
    if(!requireNamespace("BPCells"))
      stop("You have to install BPCells!")
    rawdata <- rawdata > max.count
    keep.genes <- which(BPCells::rowSums(rawdata) > 0)
  } else {
    keep.genes <- which(apply(rawdata,1,max) > max.count)
  }
  return(keep.genes)
}

#' getVariableFeatures
#'
#' get shared variable features across multiple assays
#'
#' @param object a Voltron Object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param n the number of features
#' @param ... additional arguements passed to \link{vrFeatureData}
#'
#' @importFrom dplyr full_join
#' @importFrom utils head
#'
#' @export
getVariableFeatures <- function(object, assay = NULL, n = 3000, ...){

  # get assay names
  assay_names <- vrAssayNames(object, assay = assay)

  # get features for all coordinates
  ranks <- NULL
  for(assy in assay_names){
    feature_data <- vrFeatureData(object[[assy]], ...)
    # if(nrow(feature_data) > 0){
    if(!is.null(feature_data)) {
      if(nrow(feature_data) > 0){
        feature_data$gene <- rownames(feature_data)
      }
    } else {
      feature_data <- data.frame(gene = vrFeatures(object[[assy]]), rank = NA)
    }
    if(is.null(ranks)){
      ranks <- feature_data[,c("gene", "rank")]
    } else {
      ranks <- ranks %>% full_join(feature_data[,c("gene", "rank")], by = c("gene" = "gene"))
    }
  }

  # get geometric mean of ranks, i.e. rank product statistic
  rownames_ranks <- ranks$gene
  ranks <- ranks[,!colnames(ranks) %in% "gene", drop = FALSE]
  ranks <- apply(ranks, 1, function(x) exp(mean(log(x))))
  # names(ranks) <- rownames(feature_data)
  names(ranks) <- rownames_ranks
  ranks <- ranks[ranks != 0]

  # get selected features
  if(length(ranks[!is.na(ranks)]) > 0){
    selected_features <- names(utils::head(sort(ranks, decreasing = FALSE), n))
  } else {
    selected_features <- vrFeatures(object, assay = assay)
  }

  # return
  return(selected_features)
}

####
# vrEmbeddings ####
####

#' getPCA
#'
#' calculate PCA of the VoltRon objects
#'
#' @param object a VoltRon object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param features the selected features for PCA reduction
#' @param dims the number of dimensions extracted from PCA
#' @param type the key name for the embedding, default: pca
#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
#' @param seed seed
#'
#' @importFrom irlba irlba
#'
#' @export
getPCA <- function(object, assay = NULL, features = NULL, dims = 30, type = "pca", overwrite = FALSE, seed = 1){

  # get assay names
  assay_names <- vrAssayNames(object, assay = assay)

  # get shared features and subset
  assay_features <- vrFeatures(object, assay = assay)

  # if there are features of a VoltRon object, then get variable features too
  if(length(assay_features) > 0) {
    if(is.null(features))
      features <- getVariableFeatures(object, assay = assay)
    object_subset <- subsetVoltRon(object, features = features)
    vrMainAssay(object_subset) <- vrMainAssay(object)

    # adjust extraction features length
    if(dims > length(features)){
      message("Requested more PC dimensions than existing features: dims = length(features) now!")
      dims <- length(features)
    }

  # if there are no features in VoltRon object, return the assay as itself
  } else {
    object_subset <- object
  }

  # get data
  normdata <- vrData(object_subset, assay = assay, norm = TRUE)

  # get PCA embedding
  set.seed(seed)
  if(inherits(normdata, "IterableMatrix")){
    if(!requireNamespace("BPCells"))
      stop("You have to install BPCells!")
    svd <- BPCells::svds(normdata, k=dims)
    pr.data <- BPCells::multiply_cols(svd$v, svd$d)
  } else {
    scale.data <- apply(normdata, 1, scale)
    pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
    pr.data <- pr.data$x 
  }
  
  # change colnames
  colnames(pr.data) <- paste0("PC", seq_len(dims))
  rownames(pr.data) <- colnames(normdata)

  # set Embeddings
  vrEmbeddings(object, assay = assay, type = type, overwrite = overwrite) <- pr.data

  # return
  return(object)
}

#' getUMAP
#'
#' calculate UMAP of the VoltRon objects
#'
#' @param object a VoltRon object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param data.type the type of data used to calculate UMAP from: "pca" (default), "raw" or "norm"
#' @param dims the number of dimensions extracted from PCA
#' @param umap.key the name of the umap embedding, default: umap
#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
#' @param seed seed
#'
#' @importFrom uwot umap
#' @importFrom Matrix t
#'
#' @export
#'
getUMAP <- function(object, assay = NULL, data.type = "pca", dims = seq_len(30), umap.key = "umap", overwrite = FALSE, seed = 1){

  # get data
  if(data.type %in% c("raw", "norm")){
    data <- vrData(object, assay = assay, norm = (data.type == "norm"))
    data <- as.matrix(as(Matrix::t(data),"dgCMatrix"))
  } else{
    embedding_names <- vrEmbeddingNames(object)
    if(data.type %in% vrEmbeddingNames(object)) {
      data <- vrEmbeddings(object, assay = assay, type = data.type, dims = dims)
    } else {
      stop("Please provide a data type from one of three choices: raw, norm and pca")
    }
  }

  # get umap
  set.seed(seed)
  umap_data <- uwot::umap(data)
  colnames(umap_data) <- c("x", "y")
  vrEmbeddings(object, assay = assay, type = umap.key, overwrite = overwrite) <- umap_data

  # return
  return(object)
}

####
# Image Processing ####
####

#' split_into_tiles
#'
#' split image raster data into tiles
#'
#' @param image_data image raster data
#' @param tile_size tile size
#'
#' @noRd
split_into_tiles <- function(image_data, tile_size = 10) {
  n_rows <- nrow(image_data)
  n_cols <- ncol(image_data)

  # Calculate the number of tiles in rows and columns
  n_row_tiles <- n_rows %/% tile_size
  n_col_tiles <- n_cols %/% tile_size

  # Initialize an empty list to store tiles
  tiles <- list()

  # Loop through the image data matrix to extract tiles
  for (i in seq_len(n_row_tiles)) {
    for (j in seq_len(n_col_tiles)) {
      # Calculate the indices for the current tile
      start_row <- (i - 1) * tile_size + 1
      end_row <- i * tile_size
      start_col <- (j - 1) * tile_size + 1
      end_col <- j * tile_size

      # Extract the current tile from the image data matrix
      tile <- image_data[start_row:end_row, start_col:end_col]

      # Store the tile in the list
      tiles[[length(tiles) + 1]] <- tile
    }
  }

  # Return the list of tiles
  return(tiles)
}