--- a +++ b/R/integration.R @@ -0,0 +1,545 @@ +#### +# Data Transfer #### +#### + +#' transferData +#' +#' transfer data across assays +#' +#' @param object a VoltRon object +#' @param from the name or class of assay whose data transfered to the second assay +#' @param to the name or class of target assay where data is transfered to +#' @param features the set of features from \link{vrFeatures} or metadata columns from \link{Metadata} that are transferred. +#' Only one metadata feature can be transfered at a time. +#' @param new_feature_name the name of the new feature set created from the source assay defined in \code{from} argument. +#' +#' @export +transferData <- function(object, from = NULL, to = NULL, features = NULL, new_feature_name = NULL){ + + # assay list + assaytypes <- c("ROI", "spot", "cell", "molecule", "tile") + + # get Assay IDs from Names and IDs + from <- vrAssayNames(object, assay = from) + to <- vrAssayNames(object, assay = to) + + # check assay names + if(length(from) > 1 | length(to) > 1){ + stop("For now, label transfer can only be accomplished across two assays") + } + + # check if assays are in the same block + sample.metadata <- SampleMetadata(object) + from_assayclass <- sample.metadata[from, "Assay"] + to_assayclass <- sample.metadata[to, "Assay"] + samples <- sample.metadata[c(from, to), "Sample"] + + if(length(unique(samples)) > 1) + stop("Selected assays have to be within the same sample block!") + + # get assay types + to_object_type <- vrAssayTypes(object[[to]]) + from_object_type <- vrAssayTypes(object[[from]]) + + if(which(assaytypes == to_object_type) > which(assaytypes == from_object_type) && from_object_type == "ROI"){ + return(transferLabels(object = object, from = from, to = to, features = features)) + } else { + return(transferFeatureData(object = object, from = from, to = to, features = features, new_feature_name = new_feature_name)) + } +} + +#' transferFeatureData +#' +#' transfer feature data across assays +#' +#' @param object a VoltRon object +#' @param from The ID of assay whose data transfer to the second assay +#' @param to The ID of the target assay where data is transfered to +#' @param features The set of data or metadata features that are transfered. Only one metadata feature can be transfered at a time. +#' @param new_feature_name the name of the new feature set created from the source assay defined in \code{from}. +#' +#' @noRd +transferFeatureData <- function(object, from = NULL, to = NULL, features = NULL, new_feature_name = NULL){ + + # get assays and metadata + from_object <- object[[from]] + from_metadata <- Metadata(object, assay = from) + to_object <- object[[to]] + to_metadata <- Metadata(object, assay = to) + + # get assay types + to_object_type <- vrAssayTypes(to_object) + from_object_type <- vrAssayTypes(from_object) + + # get transfer data type + if(to_object_type == "spot"){ + if(from_object_type == "cell"){ + new_assay <- getSpotsFromCells(from_object, from_metadata, to_object, features = features) + } + } else if(to_object_type == "cell"){ + if(from_object_type == "tile"){ + new_assay <- getCellsFromTiles(from_object, from_metadata, to_object, features = features) + } else if(from_object_type == "spot"){ + new_assay <- getCellsFromSpots(from_object, from_metadata, to_object, features = features) + } + } else if(to_object_type == "ROI"){ + if(from_object_type == "cell"){ + new_assay <- getROIsFromCells(from_object, from_metadata, to_object, features = features) + } + } + + # add new feature set + if(is.null(new_feature_name)){ + new_feature_name <- paste(vrMainFeatureType(object, assay = from)$Feature, "pseudo", sep = "_") + } + object <- addFeature(object, assay = to, data = new_assay, feature_name = new_feature_name) + + # return + object +} + +#' transferLabels +#' +#' transfer labels across assays +#' +#' @param object a VoltRon object +#' @param from The ID of assay whose data transfer to the second assay +#' @param to The ID of the target assay where data is transfered to +#' @param features The set of data or metadata features that are transferred. Only one metadata feature can be transferred at a time. +#' +#' @noRd +transferLabels <- function(object, from = NULL, to = NULL, features = NULL){ + + # get assays and metadata + from_object <- object[[from]] + from_metadata <- Metadata(object, assay = from) + to_object <- object[[to]] + to_metadata <- Metadata(object, assay = to) + + # get assay types + to_object_type <- vrAssayTypes(to_object) + from_object_type <- vrAssayTypes(from_object) + + # get transfer data type + if(from_object_type == "ROI" & to_object_type != "ROI"){ + + # transfer labels + transferedLabelsMetadata <- transferLabelsFromROI(from_object, from_metadata, to_object, to_metadata, features = features) + + # update metadata + # metadata <- Metadata(object, assay = to) + # entities <- vrSpatialPoints(to_object) + # if(is.numeric(value)){ + # metadata[[features]] <- as.numeric(NA) + # } else { + # metadata[[features]] <- "" + # } + # if(is.null(rownames(metadata))){ + # metadata[[features]][match(entities, as.vector(metadata$id))] <- value + # } else { + # metadata[entities,][[features]] <- value + # } + # + # # transfer labels + # Metadata(object, assay = to) <- metadata + object <- addMetadata(object, assay = to, value = transferedLabelsMetadata[[features]], label = features) + } + + # return + object +} + + +#' getSpotsFromCells +#' +#' Generate Psuedo counts per spots and insert to as a separate image assay to the Visium object +#' +#' @param from_object The vrAssay object whose data transfer to the second assay +#' @param from_metadata the metadata associated with \code{from_object} +#' @param to_object The ID of the target vrAssay object where data is transfered to +#' @param features the name of the metadata feature to transfer, if NULL, the rawdata will be transfered +#' +#' @importFrom dplyr %>% right_join +#' @importFrom stats aggregate +#' @importFrom magick image_data +#' +#' @noRd +#' +getSpotsFromCells <- function(from_object, from_metadata = NULL, to_object, features = NULL) { + + # get the spot radius of Visium spots + Vis_spotradius <- vrAssayParams(to_object, param = "spot.radius") + + # get cell and spot coordinates + message("Cell to Spot Distances \n") + coords_spots <- vrCoordinates(to_object) + coords_cells <- vrCoordinates(from_object) + + # get distances from cells to spots + # alldist <- flexclust::dist2(coords_cells, coords_spots) + # cell_to_spot <- FNN::get.knnx(coords_spots, coords_cells, k = 1) + cell_to_spot <- knn_annoy(coords_spots, coords_cells, k = 1) + names(cell_to_spot) <- c("nn.index", "nn.dist") + cell_to_spot_nnid <- vrSpatialPoints(to_object)[cell_to_spot$nn.index[,1]] + names(cell_to_spot_nnid) <- rownames(coords_cells) + cell_to_spot_nndist <- cell_to_spot$nn.dist[,1] + names(cell_to_spot_nndist) <- rownames(coords_cells) + cell_to_spot_nnid <- cell_to_spot_nnid[cell_to_spot_nndist < Vis_spotradius] + + # find associated spot for each cell + message("Find associated spots for each cell \n") + cell_to_spot_id <- names(cell_to_spot_nnid) + + # get data + if(is.null(features)){ + raw_counts <- vrData(from_object, norm = FALSE) + } else { + data_features <- features[features %in% vrFeatures(from_object)] + metadata_features <- features[features %in% colnames(from_metadata)] + if(length(data_features) > 0){ + if(length(metadata_features) > 0){ + stop("Data and metadata features cannot be transfered in the same time!") + } else { + raw_counts <- vrData(from_object, norm = FALSE) + raw_counts <- raw_counts[features,] + message("There are ", length(setdiff(features, data_features)), " unknown features!") + } + } else { + if(length(metadata_features) > 1){ + stop("Only one metadata column can be transfered at a time") + } else if(length(metadata_features) == 1) { + raw_counts <- from_metadata[,metadata_features, drop = FALSE] + rownames_raw_counts <- rownames(raw_counts) + raw_counts <- dummy_cols(raw_counts, remove_first_dummy = FALSE) + raw_counts <- raw_counts[,-1] + raw_counts <- t(raw_counts) + colnames(raw_counts) <- rownames_raw_counts + rownames(raw_counts) <- gsub(paste0("^", metadata_features, "_"), "", rownames(raw_counts)) + } else { + stop("Features cannot be found in data and metadata!") + } + } + } + raw_counts <- raw_counts[,cell_to_spot_id, drop = FALSE] + + # pool cell counts to Spots + message("Aggregating cell profiles in spots \n") + aggregate_raw_counts <- stats::aggregate(t(as.matrix(raw_counts)), list(cell_to_spot_nnid), sum) + aggregate_raw_counts <- data.frame(barcodes = vrSpatialPoints(to_object)) %>% dplyr::right_join(aggregate_raw_counts, by = c("barcodes" = "Group.1")) + rownames(aggregate_raw_counts) <- aggregate_raw_counts$barcodes + aggregate_raw_counts <- t(aggregate_raw_counts[,-1]) + aggregate_raw_counts[is.na(aggregate_raw_counts)] <- 0 + + # return + return(aggregate_raw_counts) +} + +#' getSpotsFromCells +#' +#' Generate Psuedo counts per spots and insert to as a separate image assay to the Visium object +#' +#' @param from_object The vrAssay object whose data transfer to the second assay +#' @param from_metadata the metadata associated with \code{from_object} +#' @param to_object The ID of the target vrAssay object where data is transfered to +#' @param features the name of the metadata feature to transfer, if NULL, the rawdata will be transfered +#' +#' @importFrom dplyr %>% right_join +#' @importFrom stats aggregate +#' @importFrom magick image_data +#' +#' @noRd +#' +getCellsFromSpots <- function(from_object, from_metadata = NULL, to_object, features = NULL) { + + # get the spot radius of Visium spots + radius <- vrAssayParams(from_object, param = "nearestpost.distance")/2 + + # get cell and spot coordinates + message("Spot to Cell Distances \n") + coords_spots <- vrCoordinates(from_object) + coords_cells <- vrCoordinates(to_object) + + # get distances from cells to spots + spot_to_cell <- knn_annoy(coords_spots, coords_cells, k = 1) + names(spot_to_cell) <- c("nn.index", "nn.dist") + nnindex <- spot_to_cell$nn.index[,1] + names_nnindex <- names(nnindex) + nnindex <- vrSpatialPoints(from_object)[nnindex] + names(nnindex) <- names_nnindex + nndist <- spot_to_cell$nn.dist[,1] + nnindex <- nnindex[nndist < radius] + + # find associated spot for each cell + message("Find associated spot for each cell \n") + + # get data + if(is.null(features)){ + raw_counts <- vrData(from_object, norm = FALSE) + } else { + data_features <- features[features %in% vrFeatures(from_object)] + metadata_features <- features[features %in% colnames(from_metadata)] + if(length(data_features) > 0){ + if(length(metadata_features) > 0){ + stop("Data and metadata features cannot be transfered in the same time!") + } else { + raw_counts <- vrData(from_object, norm = FALSE) + raw_counts <- raw_counts[features,] + message("There are ", length(setdiff(features, data_features)), " unknown features!") + } + } else { + if(length(metadata_features) > 1){ + stop("Only one metadata column can be transfered at a time") + } else if(length(metadata_features) == 1) { + raw_counts <- from_metadata[,metadata_features, drop = FALSE] + rownames_raw_counts <- rownames(raw_counts) + raw_counts <- dummy_cols(raw_counts, remove_first_dummy = FALSE) + raw_counts <- raw_counts[,-1] + raw_counts <- t(raw_counts) + colnames(raw_counts) <- rownames_raw_counts + rownames(raw_counts) <- gsub(paste0("^", metadata_features, "_"), "", rownames(raw_counts)) + } else { + stop("Features cannot be found in data and metadata!") + } + } + } + raw_counts <- raw_counts[,nnindex, drop = FALSE] + colnames(raw_counts) <- names(nnindex) + + # return + return(raw_counts) +} + +#' getROIsFromCells +#' +#' Generate Psuedo counts per ROIs and insert to as a separate image assay to the Visium object +#' +#' @param from_object The vrAssay object whose data transfer to the second assay +#' @param from_metadata the metadata associated with \code{from_object} +#' @param to_object The ID of the target vrAssay object where data is transfered to +#' @param features the name of the metadata feature to transfer, if NULL, the rawdata will be transfered +#' +#' @importFrom dplyr %>% right_join +#' @importFrom stats aggregate +#' @importFrom magick image_data +#' +#' @noRd +#' +getROIsFromCells <- function(from_object, from_metadata = NULL, to_object, features = NULL) { + + # get cell and ROIs coordinates + message("Cell to ROI Distances \n") + segments_rois <- vrSegments(to_object) + coords_cells <- vrCoordinates(from_object) + + # find associated spot for each cell + message("Find associated ROIs for each cell \n") + cell_to_roi_id <- NULL + cell_to_roi_labelid <- NULL + names_segments_rois <- names(segments_rois) + for(i in seq_len(length(segments_rois))){ + cur_segt <- segments_rois[[i]] + if(ncol(cur_segt) > 3){ + in.list <- point.in.circle(coords_cells[,1], coords_cells[,2], cur_segt$x, cur_segt$y, cur_segt$rx) + } else { + in.list <- sp::point.in.polygon(coords_cells[,1], coords_cells[,2], cur_segt$x, cur_segt$y) + } + in.list.cells <- rownames(coords_cells)[!!in.list] + cell_to_roi_id <- c(cell_to_roi_id, in.list.cells) + cell_to_roi_labelid <- c(cell_to_roi_labelid, rep(names_segments_rois[i], length(in.list.cells))) + } + + # get data + if(is.null(features)){ + raw_counts <- vrData(from_object, norm = FALSE) + } else { + data_features <- features[features %in% vrFeatures(from_object)] + metadata_features <- features[features %in% colnames(from_metadata)] + if(length(data_features) > 0){ + if(length(metadata_features) > 0){ + stop("Data and metadata features cannot be transfered in the same time!") + } else { + raw_counts <- vrData(from_object, norm = FALSE) + raw_counts <- raw_counts[features,] + message("There are ", length(setdiff(features, data_features)), " unknown features!") + } + } else { + if(length(metadata_features) > 1){ + stop("Only one metadata column can be transfered at a time") + } else if(length(metadata_features) == 1) { + raw_counts <- from_metadata[,metadata_features, drop = FALSE] + rownames_raw_counts <- rownames(raw_counts) + raw_counts <- dummy_cols(raw_counts, remove_first_dummy = FALSE) + raw_counts <- raw_counts[,-1] + raw_counts <- t(raw_counts) + colnames(raw_counts) <- rownames_raw_counts + rownames(raw_counts) <- gsub(paste0("^", metadata_features, "_"), "", rownames(raw_counts)) + } else { + stop("Features cannot be found in data and metadata!") + } + } + } + raw_counts <- raw_counts[,cell_to_roi_id, drop = FALSE] + + # pool cell counts to Spots + message("Aggregating cell profiles in spots \n") + aggregate_raw_counts <- stats::aggregate(t(as.matrix(raw_counts)), list(cell_to_roi_labelid), sum) + aggregate_raw_counts <- data.frame(barcodes = vrSpatialPoints(to_object)) %>% dplyr::right_join(aggregate_raw_counts, by = c("barcodes" = "Group.1")) + rownames(aggregate_raw_counts) <- aggregate_raw_counts$barcodes + aggregate_raw_counts <- t(aggregate_raw_counts[,-1]) + aggregate_raw_counts[is.na(aggregate_raw_counts)] <- 0 + + # return + return(aggregate_raw_counts) +} + +getCellsFromTiles <- function(from_object, from_metadata = NULL, to_object, features = NULL, k = 5) { + + # get cell and spot coordinates + message("Tile to Cell Distances \n") + coords_cells <- vrCoordinates(to_object) + coords_tiles <- vrCoordinates(from_object) + + # get distances from cells to spots + # tile_to_cell <- FNN::get.knnx(coords_tiles, coords_cells, k = k) + tile_to_cell <- knn_annoy(coords_tiles, coords_cells, k = k) + names(tile_to_cell) <- c("nn.index", "nn.dist") + tile_to_cell_nnid <- data.frame(id = rownames(coords_cells), tile_to_cell$nn.index) + # tile_to_cell_nnid <- reshape2::melt(tile_to_cell_nnid, id.vars = "id") + tile_to_cell_nnid <- data.table::melt(tile_to_cell_nnid, id.vars = "id") + tile_id <- vrSpatialPoints(from_object)[tile_to_cell_nnid$value] + tile_to_cell_nnid <- tile_to_cell_nnid$id + + # get data + raw_counts <- vrData(from_object, norm = FALSE) + raw_counts <- raw_counts[,tile_id] + + # pool cell counts to Spots + message("Aggregating tile profiles in cells \n") + aggregate_raw_counts <- stats::aggregate(t(as.matrix(raw_counts)), list(tile_to_cell_nnid), mean) + aggregate_raw_counts <- data.frame(barcodes = vrSpatialPoints(to_object)) %>% dplyr::right_join(aggregate_raw_counts, by = c("barcodes" = "Group.1")) + rownames(aggregate_raw_counts) <- aggregate_raw_counts$barcodes + aggregate_raw_counts <- t(aggregate_raw_counts[,-1]) + aggregate_raw_counts[is.na(aggregate_raw_counts)] <- 0 + + # return + return(aggregate_raw_counts) +} + +transferLabelsFromROI <- function(from_object, from_metadata = NULL, to_object, to_metadata = NULL, features = NULL) { + + # get ROI and other coordinates + segments_roi <- vrSegments(from_object) + coords <- vrCoordinates(to_object) + spatialpoints <- rownames(coords) + + # check if all features are in from_metadata + if(!all(features %in% colnames(from_metadata))){ + stop("Some features are not found in the ROI metadata!") + } + + # annotate points in the to object + for(feat in features){ + + # get from metadata labels + feat_labels <- from_metadata[,feat] + + # get to metadata + new_label <- rep("undefined", length(spatialpoints)) + names(new_label) <- spatialpoints + + for(i in seq_len(length(segments_roi))){ + cur_poly <- segments_roi[[i]][,c("x","y")] + in.list <- sp::point.in.polygon(coords[,1], coords[,2], cur_poly[,1], cur_poly[,2]) + new_label[rownames(coords)[!!in.list]] <- feat_labels[i] + } + + to_metadata[[feat]] <- new_label + } + + + # return label + return(to_metadata) +} + +#### +# Embedding #### +#### + +getJointPCA <- function(object, assay.1 = NULL, assay.2 = NULL, dims = 30, seed = 1, ...){ + + # get assay names + assay <- assay.1 + assay_names <- vrAssayNames(object, assay = assay) + + # if there are features of a VoltRon object, then get variable features too + assay_features <- vrFeatures(object, assay = assay) + if(length(assay_features) > 0) { + features <- getVariableFeatures(object, assay = assay) + vrMainAssay(object) <- object@sample.metadata[assay, "Assay"] + object_subset <- subsetVoltRon(object, features = features) + vrMainAssay(object_subset) <- vrMainAssay(object) + if(dims > length(features)){ + message("Requested more PC dimensions than existing features: dims = length(features) now!") + dims <- length(features) + } + } else { + object_subset <- object + } + + # set seed + set.seed(seed) + + # get PCA embedding + normdata.1 <- vrData(object_subset, assay = assay, norm = TRUE) + scale.data <- apply(normdata.1, 1, scale) + pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data)) + pr.data.1 <- pr.data$x + colnames(pr.data.1) <- paste0("PC", seq_len(dims)) + rownames(pr.data.1) <- colnames(normdata.1) + normdata.1 <- normdata.1/(pr.data$sdev[1]^2) + + # get assay names + assay <- assay.2 + assay_names <- vrAssayNames(object, assay = assay) + + # if there are features of a VoltRon object, then get variable features too + assay_features <- vrFeatures(object, assay = assay) + if(length(assay_features) > 0) { + features <- getVariableFeatures(object, assay = assay) + vrMainAssay(object) <- object@sample.metadata[assay, "Assay"] + object_subset <- subsetVoltRon(object, features = features) + vrMainAssay(object_subset) <- vrMainAssay(object) + if(dims > length(features)){ + message("Requested more PC dimensions than existing features: dims = length(features) now!") + dims <- length(features) + } + } else { + object_subset <- object + } + + # get PCA embedding + normdata.2 <- vrData(object_subset, assay = assay, norm = TRUE) + scale.data <- apply(normdata.2, 1, scale) + pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data)) + pr.data.2 <- pr.data$x + colnames(pr.data.2) <- paste0("PC", seq_len(dims)) + rownames(pr.data.2) <- colnames(normdata.2) + normdata.2 <- normdata.2/(pr.data$sdev[1]^2) + + # get joint PCA + normdata <- rbind(normdata.1, normdata.2) + scale.data <- apply(normdata, 1, scale) + pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data)) + prsdev <- pr.data$sdev + pr.data <- pr.data$x + colnames(pr.data) <- paste0("PC", seq_len(dims)) + rownames(pr.data) <- colnames(normdata) + normdata <- normdata/(prsdev[1]^2) + + # set Embeddings + vrEmbeddings(object, type = "pca_joint", assay = assay.1, ...) <- pr.data + vrEmbeddings(object, type = "pca_joint", assay = assay.2, ...) <- pr.data + + # return + return(object) +}