#' @include allgenerics.R
#'
NULL
####
# Nearest Neighbor graphs ####
####
#' Get profile specific neighbors
#'
#' Get neighbors of spatial points
#'
#' @param object a VoltRon object
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param method the method used for graph construction, SNN or kNN
#' @param k number of neighbors for kNN
#' @param data.type the type of embedding used for neighborhood calculation, e.g. raw counts (raw), normalized counts (norm), PCA embeddings (pca), UMAP embeddings (umap) etc.
#' @param dims the set of dimensions of the embedding data
#' @param graph.key the name of the graph
#'
#' @importFrom igraph add_edges simplify make_empty_graph vertices E<- E
#'
#' @export
getProfileNeighbors <- function(object, assay = NULL, method = "kNN", k = 10, data.type = "pca", dims = seq_len(30), graph.key = method){
# get data
if(data.type %in% c("raw", "norm")){
nndata <- vrData(object, assay = assay, norm = (data.type == "norm"))
nndata <- t(nndata)
} else {
embedding_names <- vrEmbeddingNames(object)
if(data.type %in% vrEmbeddingNames(object)) {
nndata <- vrEmbeddings(object, assay = assay, type = data.type, dims = dims)
} else {
stop("Please provide a data type from one of three choices: raw, norm and pca")
}
}
# find profile neighbors
# if(knn.method == "FNN"){
# nnedges <- FNN::get.knn(nndata, k = k + 1)
nnedges <- knn_annoy(nndata, k = k + 1)
names(nnedges) <- c("nn.index", "nn.dist")
weights <- NULL
nnedges <-
switch(method,
SNN = {
g.out <- build_snn_number(nnedges$nn.index)
nnedges <- g.out[[1]]
weights <- g.out[[2]]
weights <- weights/(2 * (k+2) - weights)
nnedges
},
kNN = {
nnedges <- nnedges$nn.index
nnedges <- cbind(seq_len(nrow(nndata)), nnedges)
nnedges <- apply(nnedges, 1, function(x){
do.call(c,lapply(x[-1], function(y) return(c(x[1],y))))
})
nnedges
})
nnedges <- rownames(nndata)[nnedges]
# make graph and add edges
graph <- make_empty_graph(directed = FALSE) + vertices(rownames(nndata))
graph <- add_edges(graph, edges = nnedges)
if(!is.null(weights))
igraph::E(graph)$weight <- weights
graph <- simplify(graph, remove.multiple = TRUE, remove.loops = FALSE)
vrGraph(object, graph.type = graph.key) <- graph
# return
return(object)
}
#' knn_annoy
#'
#' knn engine employed by RcppAnnoy package, adapted from \code{BPCells} package.
#'
#' @rdname knn
#'
#' @details **knn_annoy**: Use RcppAnnoy as knn engine
#'
#' @param data data
#' @param query query data (Default: data)
#' @param k number of neighbors for kNN
#' @param n_trees Number of trees during index build time. More trees gives higher accuracy
#' @param search_k Number of nodes to inspect during the query, or -1 for default value. Higher number gives higher accuracy
#'
#' @importFrom RcppAnnoy AnnoyEuclidean
knn_annoy <- function(data, query = data, k = 10, n_trees = 50, search_k = -1) {
annoy <- new(RcppAnnoy::AnnoyEuclidean, ncol(data))
for (i in seq_len(nrow(data))) {
annoy$addItem(i - 1, data[i, ])
}
annoy$build(n_trees)
idx <- matrix(nrow = nrow(query), ncol = k)
dist <- matrix(nrow = nrow(query), ncol = k)
rownames(idx) <- rownames(query)
rownames(dist) <- rownames(query)
for (i in seq_len(nrow(query))) {
res <- annoy$getNNsByVectorList(query[i, ], k, search_k, include_distances = TRUE)
idx[i, ] <- res$item + 1
dist[i, ] <- res$dist
}
list(idx = idx, dist = dist)
}
####
# Clustering ####
####
#' getClusters
#'
#' Get clustering of the VoltRon object
#'
#' @param object a VoltRon object
#' @param resolution the resolution parameter for leiden clustering.
#' @param nclus The number of cluster centers for K-means clustering.
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}.
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
#' @param method The method of clustering. Use 'leiden' to perform graph clustering and 'kmeans' for K-means based clustering
#' @param label the name for the newly created clustering column in the metadata
#' @param graph the graph type to be used
#' @param seed seed
#' @param abundance_limit the minimum number of points for a cluster, hence clusters with abundance lower than this limit will be appointed to other nearby clusters
#'
#' @importFrom igraph cluster_leiden
#' @importFrom stats kmeans
#'
#' @export
getClusters <- function(object, resolution = 1, nclus = integer(0), assay = NULL, method = "leiden", label = "clusters", graph = "kNN", seed = 1, abundance_limit = 2){
# sample metadata
sample.metadata <- SampleMetadata(object)
# get assay names
assay_names <- vrAssayNames(object, assay = assay)
# get assays
object_subset <- subsetVoltRon(object, assays = assay_names)
# check clustering parameters
.check_clustering_params(method, resolution, nclus, abundance_limit)
# clustering
set.seed(seed)
if(method == "leiden"){
object_graph <- vrGraph(object_subset, assay = assay, graph.type = graph)
clusters <- igraph::cluster_leiden(object_graph, objective_function = "modularity", resolution = resolution)
} else if(method == "kmeans"){
vrdata <- vrData(object_subset, norm = TRUE)
clusters <- stats::kmeans(t(vrdata), centers = nclus)
clusters <- list(names = names(clusters$cluster), membership = clusters$cluster)
} else {
stop("Unrecognized clustering method! Use either 'leiden' for graph clustering or 'kmeans' for K-means clustering")
}
# correct clustering
clusters <- .correct_low_abundant_clusters(object_graph, clusters, abundance_limit)
# update metadata
spatialpoints <- vrSpatialPoints(object, assay = assay)
membership <- setNames(rep(NA,length(spatialpoints)), spatialpoints)
membership[clusters$names] <- clusters$membership
object <- addMetadata(object, assay = assay, value = membership, label = label)
# return
return(object)
}
#' @noRd
.correct_low_abundant_clusters <- function(object_graph, clusters, abundance_limit){
# cluster abundances
cluster_abundance <- table(clusters$membership)
# check if some clusters are low in abundance
ind <- cluster_abundance < abundance_limit
if(any(ind)){
low_abundant_clusters <- names(cluster_abundance)[ind]
clusters$membership[clusters$membership %in% low_abundant_clusters] <- NA
}
return(clusters)
}
#' @noRd
.check_clustering_params <- function(method, resolution, nclus, abundance_limit){
# method related params
if(method == "leiden"){
msg <- "Resolution must be a single numeric and above 0"
if(!is.numeric(resolution))
stop(msg)
if(length(resolution) > 1)
stop(msg)
if(resolution == 0 | resolution < 0)
stop(msg)
} else if(method == "kmeans"){
msg <- "Number of cluster centres (nclus) must be a single integer and should be above 1"
if(!is.numeric(nclus))
stop(msg)
if(length(nclus) > 1)
stop(msg)
if(nclus %% 1 != 0)
stop(msg)
if(nclus == 0)
stop(msg)
}
# low abundant clusters
msg <- "Low abundance limit must be a single integer and should be above 0"
if(!is.numeric(abundance_limit))
stop(msg)
if(length(abundance_limit) > 1)
stop(msg)
if(abundance_limit %% 1 != 0)
stop(msg)
if(abundance_limit == 0 || abundance_limit < 0)
stop(msg)
}