#' @include allgenerics.R
#'
NULL
####
# Normalization ####
####
normalizeDataVoltRon <- function(object, assay = NULL, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
# get assay names
assay_names <- vrAssayNames(object, assay = assay)
# normalize assays
for(assy in assay_names){
cur_assay <- object[[assy]]
object[[assy]] <- normalizeData(cur_assay, method = method, desiredQuantile = desiredQuantile, scale = scale, sizefactor = sizefactor, feat_type = feat_type)
}
# return
return(object)
}
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param method the normalization method: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"
#' @param desiredQuantile the quantile of the data if "QuanNorm" or "LogQuanNorm" is selected as \code{method}.
#' @param scale the scale parameter for the hyperbolic arcsine transformation
#' @param sizefactor size factor if \code{method} is selected as \code{LogNorm}
#' @param feat_type the feature set type
#'
#' @rdname normalizeData
#' @method normalizeData VoltRon
#'
#' @export
setMethod("normalizeData", "VoltRon", normalizeDataVoltRon)
normalizeDatavrAssay <- function(object, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
# size factor
rawdata <- vrData(object, feat_type = feat_type, norm = FALSE)
if(!is.numeric(desiredQuantile)){
stop("desiredQuantile should be numeric")
} else {
if(!findInterval(desiredQuantile, c(0,1)) == 1L){
stop("desiredQuantile should be between [0,1]")
}
}
# normalization method
if(method == "LogNorm"){
normdata <- LogNorm(rawdata, colSums(rawdata), sizefactor)
} else if(method == "Q3Norm") {
# rawdata[rawdata==0] <- 1
qs <- getColQuantiles(rawdata, desiredQuantile)
normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
} else if(method == "LogQ3Norm") {
# rawdata[rawdata==0] <- 1
qs <- getColQuantiles(rawdata, desiredQuantile)
normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
normdata <- log(normdata + 1)
} else if(method == "CLR") {
normdata <- getDivideSweep(rawdata, colSums(rawdata))
normdata <- apply(normdata, 2, function(x) {
log1p(x = x / (exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE) / length(x = x))))
})
} else if(method == "hyper.arcsine") {
normdata <- asinh(rawdata/scale)
} else {
stop('Please select one of these methods: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"')
}
# get normalized data
catch_connect1 <- try(slot(object, name = "data"), silent = TRUE)
catch_connect2 <- try(slot(object, name = "rawdata"), silent = TRUE)
if(!is(catch_connect1, 'try-error') && !methods::is(catch_connect1,'error')){
if(is.null(feat_type))
feat_type <- vrMainFeatureType(object)
object@data[[paste0(feat_type, "_norm")]] <- normdata
} else if(!is(catch_connect2, 'try-error') && !methods::is(catch_connect2,'error')){
object@normdata <- normdata
}
# return
return(object)
}
#' @rdname normalizeData
#' @method normalizeData vrAssay
#'
#' @importFrom stats quantile
#'
#' @export
setMethod("normalizeData", "vrAssay", normalizeDatavrAssay)
#' @rdname normalizeData
#' @method normalizeData vrAssayV2
#'
#' @importFrom stats quantile
#'
#' @export
setMethod("normalizeData", "vrAssayV2", normalizeDatavrAssay)
LogNorm <- function(rawdata, coldepth, sizefactor){
if(inherits(rawdata, "IterableMatrix")){
if(!requireNamespace("BPCells"))
stop("You have to install BPCells!")
normdata <- BPCells::t(BPCells::t(rawdata)/coldepth)
normdata <- BPCells::log1p_slow(normdata*sizefactor)
} else {
normdata <- sweep(rawdata, 2L, coldepth, FUN = "/")
normdata <- log(normdata*sizefactor + 1)
}
return(normdata)
}
getDivideSweep <- function(rawdata, divisor){
if(inherits(rawdata, "IterableMatrix")){
if(!requireNamespace("BPCells"))
stop("You have to install BPCells!")
return(BPCells::t(BPCells::t(rawdata)/divisor))
} else {
return(sweep(rawdata, 2L, divisor, FUN = "/"))
}
return(rawdata)
}
####
# Features ####
####
getFeaturesVoltRon <- function(object, assay = NULL, max.count = 1, n = 3000){
# get assay names
assay_names <- vrAssayNames(object, assay = assay)
# get features for all coordinates
for(assy in assay_names){
object[[assy]] <- getFeatures(object[[assy]], max.count = max.count, n = n)
}
# return
return(object)
}
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param max.count maximum count (across spatial points) for low count filtering
#' @param n the top number of variable features
#'
#' @rdname getFeatures
#'
#' @export
setMethod("getFeatures", "VoltRon", getFeaturesVoltRon)
getFeaturesvrAssay <- function(object, max.count = 1, n = 3000){
# get data and coordinates
rawdata <- vrData(object, norm = FALSE)
coords <- vrCoordinates(object)
features <- vrFeatures(object)
# eliminate genes with low counts
# keep.genes <- which(apply(rawdata,1,max) > max.count)
keep.genes <- getMaxCount(rawdata, max.count)
# vst estimation
# vst_data <- data.frame(mean = Matrix::rowMeans(rawdata), var = apply(rawdata, 1, stats::var))
vst_data <- getVstData(rawdata)
loess_data <- vst_data[keep.genes,]
loess_results <- stats::loess(var~mean, loess_data, span = 0.3)
vst_data$adj_var <- 0
vst_data$rank <- 0
vst_data[keep.genes,]$adj_var <- stats::predict(loess_results)
vst_data[keep.genes,]$rank <- order(order(vst_data$adj_var[keep.genes], decreasing = TRUE))
# set feature data
vrFeatureData(object) <- vst_data
# return
return(object)
}
#' @rdname getFeatures
#'
#' @importFrom stats loess predict var
#' @importFrom Matrix rowMeans
#'
#' @export
setMethod("getFeatures", "vrAssay", getFeaturesvrAssay)
#' @rdname getFeatures
#'
#' @importFrom stats loess predict var
#' @importFrom Matrix rowMeans
#'
#' @export
setMethod("getFeatures", "vrAssayV2", getFeaturesvrAssay)
getVstData <- function(rawdata){
if(inherits(rawdata, "IterableMatrix")){
if(!requireNamespace("BPCells"))
stop("You have to install BPCells!")
mean_data <- BPCells::rowMeans(rawdata)
var_data <- BPCells::rowSums(rawdata^2)
var_data <- (var_data - mean_data^2/nrow(rawdata))/(nrow(rawdata)-1)
# var_data <- BPCells::matrix_stats(rawdata, row_stats="variance")
} else {
mean_data <- Matrix::rowMeans(rawdata)
var_data <- apply(rawdata, 1, stats::var)
}
vst_data <- data.frame(mean = mean_data, var = var_data)
return(vst_data)
}
getMaxCount <- function(rawdata, max.count){
if(inherits(rawdata, "IterableMatrix")){
if(!requireNamespace("BPCells"))
stop("You have to install BPCells!")
rawdata <- rawdata > max.count
keep.genes <- which(BPCells::rowSums(rawdata) > 0)
} else {
keep.genes <- which(apply(rawdata,1,max) > max.count)
}
return(keep.genes)
}
#' getVariableFeatures
#'
#' get shared variable features across multiple assays
#'
#' @param object a Voltron Object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param n the number of features
#' @param ... additional arguements passed to \link{vrFeatureData}
#'
#' @importFrom dplyr full_join
#' @importFrom utils head
#'
#' @export
getVariableFeatures <- function(object, assay = NULL, n = 3000, ...){
# get assay names
assay_names <- vrAssayNames(object, assay = assay)
# get features for all coordinates
ranks <- NULL
for(assy in assay_names){
feature_data <- vrFeatureData(object[[assy]], ...)
# if(nrow(feature_data) > 0){
if(!is.null(feature_data)) {
if(nrow(feature_data) > 0){
feature_data$gene <- rownames(feature_data)
}
} else {
feature_data <- data.frame(gene = vrFeatures(object[[assy]]), rank = NA)
}
if(is.null(ranks)){
ranks <- feature_data[,c("gene", "rank")]
} else {
ranks <- ranks %>% full_join(feature_data[,c("gene", "rank")], by = c("gene" = "gene"))
}
}
# get geometric mean of ranks, i.e. rank product statistic
rownames_ranks <- ranks$gene
ranks <- ranks[,!colnames(ranks) %in% "gene", drop = FALSE]
ranks <- apply(ranks, 1, function(x) exp(mean(log(x))))
# names(ranks) <- rownames(feature_data)
names(ranks) <- rownames_ranks
ranks <- ranks[ranks != 0]
# get selected features
if(length(ranks[!is.na(ranks)]) > 0){
selected_features <- names(utils::head(sort(ranks, decreasing = FALSE), n))
} else {
selected_features <- vrFeatures(object, assay = assay)
}
# return
return(selected_features)
}
####
# vrEmbeddings ####
####
#' getPCA
#'
#' calculate PCA of the VoltRon objects
#'
#' @param object a VoltRon object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param features the selected features for PCA reduction
#' @param dims the number of dimensions extracted from PCA
#' @param type the key name for the embedding, default: pca
#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
#' @param seed seed
#'
#' @importFrom irlba irlba
#'
#' @export
getPCA <- function(object, assay = NULL, features = NULL, dims = 30, type = "pca", overwrite = FALSE, seed = 1){
# get assay names
assay_names <- vrAssayNames(object, assay = assay)
# get shared features and subset
assay_features <- vrFeatures(object, assay = assay)
# if there are features of a VoltRon object, then get variable features too
if(length(assay_features) > 0) {
if(is.null(features))
features <- getVariableFeatures(object, assay = assay)
object_subset <- subsetVoltRon(object, features = features)
vrMainAssay(object_subset) <- vrMainAssay(object)
# adjust extraction features length
if(dims > length(features)){
message("Requested more PC dimensions than existing features: dims = length(features) now!")
dims <- length(features)
}
# if there are no features in VoltRon object, return the assay as itself
} else {
object_subset <- object
}
# get data
normdata <- vrData(object_subset, assay = assay, norm = TRUE)
# get PCA embedding
set.seed(seed)
if(inherits(normdata, "IterableMatrix")){
if(!requireNamespace("BPCells"))
stop("You have to install BPCells!")
svd <- BPCells::svds(normdata, k=dims)
pr.data <- BPCells::multiply_cols(svd$v, svd$d)
} else {
scale.data <- apply(normdata, 1, scale)
pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
pr.data <- pr.data$x
}
# change colnames
colnames(pr.data) <- paste0("PC", seq_len(dims))
rownames(pr.data) <- colnames(normdata)
# set Embeddings
vrEmbeddings(object, assay = assay, type = type, overwrite = overwrite) <- pr.data
# return
return(object)
}
#' getUMAP
#'
#' calculate UMAP of the VoltRon objects
#'
#' @param object a VoltRon object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param data.type the type of data used to calculate UMAP from: "pca" (default), "raw" or "norm"
#' @param dims the number of dimensions extracted from PCA
#' @param umap.key the name of the umap embedding, default: umap
#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
#' @param seed seed
#'
#' @importFrom uwot umap
#' @importFrom Matrix t
#'
#' @export
#'
getUMAP <- function(object, assay = NULL, data.type = "pca", dims = seq_len(30), umap.key = "umap", overwrite = FALSE, seed = 1){
# get data
if(data.type %in% c("raw", "norm")){
data <- vrData(object, assay = assay, norm = (data.type == "norm"))
data <- as.matrix(as(Matrix::t(data),"dgCMatrix"))
} else{
embedding_names <- vrEmbeddingNames(object)
if(data.type %in% vrEmbeddingNames(object)) {
data <- vrEmbeddings(object, assay = assay, type = data.type, dims = dims)
} else {
stop("Please provide a data type from one of three choices: raw, norm and pca")
}
}
# get umap
set.seed(seed)
umap_data <- uwot::umap(data)
colnames(umap_data) <- c("x", "y")
vrEmbeddings(object, assay = assay, type = umap.key, overwrite = overwrite) <- umap_data
# return
return(object)
}
####
# Image Processing ####
####
#' split_into_tiles
#'
#' split image raster data into tiles
#'
#' @param image_data image raster data
#' @param tile_size tile size
#'
#' @noRd
split_into_tiles <- function(image_data, tile_size = 10) {
n_rows <- nrow(image_data)
n_cols <- ncol(image_data)
# Calculate the number of tiles in rows and columns
n_row_tiles <- n_rows %/% tile_size
n_col_tiles <- n_cols %/% tile_size
# Initialize an empty list to store tiles
tiles <- list()
# Loop through the image data matrix to extract tiles
for (i in seq_len(n_row_tiles)) {
for (j in seq_len(n_col_tiles)) {
# Calculate the indices for the current tile
start_row <- (i - 1) * tile_size + 1
end_row <- i * tile_size
start_col <- (j - 1) * tile_size + 1
end_col <- j * tile_size
# Extract the current tile from the image data matrix
tile <- image_data[start_row:end_row, start_col:end_col]
# Store the tile in the list
tiles[[length(tiles) + 1]] <- tile
}
}
# Return the list of tiles
return(tiles)
}