--- a +++ b/R/import.R @@ -0,0 +1,2174 @@ +#### +# 10X Genomics #### +#### + +#### +## Xenium #### +#### + +#' importXenium +#' +#' Importing Xenium data +#' +#' @param dir.path path to Xenium output folder +#' @param selected_assay selected assay from Xenium +#' @param assay_name the assay name of the SR object +#' @param sample_name the name of the sample +#' @param use_image if TRUE, the DAPI image will be used. +#' @param morphology_image the name of the lowred morphology image. Default: morphology_lowres.tif +#' @param resolution_level the level of resolution within Xenium OME-TIFF image, see \link{generateXeniumImage}. Default: 7 (553x402) +#' @param overwrite_resolution if TRUE, the image "file.name" will be generated again although it exists at "dir.path" +#' @param image_name the image name of the Xenium assay, Default: main +#' @param channel_name the channel name of the image of the Xenium assay, Default: DAPI +#' @param import_molecules if TRUE, molecule assay will be created along with cell assay. +#' @param verbose verbose +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom magick image_read image_info +#' @importFrom utils read.csv +#' @importFrom data.table fread +#' @importFrom ids random_id +#' +#' @export +#' +importXenium <- function (dir.path, selected_assay = "Gene Expression", assay_name = "Xenium", sample_name = NULL, use_image = TRUE, + morphology_image = "morphology_lowres.tif", resolution_level = 7, overwrite_resolution = FALSE, + image_name = "main", channel_name = "DAPI", import_molecules = FALSE, verbose = TRUE, ...) +{ + # cell assay + if(verbose) + message("Creating cell level assay ...") + + # raw counts + datafile <- paste0(dir.path, "/cell_feature_matrix.h5") + if(file.exists(datafile)){ + rawdata <- import10Xh5(filename = datafile) + if(any(names(rawdata) %in% selected_assay)){ + rawdata <- as.matrix(rawdata[[selected_assay]]) + } else { + stop("There are no assays called ", selected_assay, " in the h5 file!") + } + } else { + stop("There are no files named 'cell_feature_matrix.h5' in the path") + } + + # image + if(use_image){ + generateXeniumImage(dir.path, file.name = morphology_image, resolution_level = resolution_level, overwrite_resolution = overwrite_resolution, + verbose = verbose) + image_file <- paste0(dir.path, "/", morphology_image) + if(file.exists(image_file)){ + image <- image_read(image_file) + } else { + stop("There are no spatial image files in the path") + } + # scale the xenium image instructed by 10x Genomics help page + scaleparam <- 0.2125*(2^(resolution_level-1)) + } else { + image <- NULL + } + + # coordinates + coord_file <- paste0(dir.path, "/cells.csv.gz") + if(file.exists(coord_file)){ + Xenium_coords <- utils::read.csv(file = coord_file) + coords <- as.matrix(Xenium_coords[,c("x_centroid", "y_centroid")]) + colnames(coords) <- c("x","y") + if(use_image) { + coords <- coords/scaleparam + imageinfo <- unlist(magick::image_info(image)[c("height")]) + range_coords <- c(0,imageinfo) + } else { + range_coords <- range(coords[,2]) + } + coords[,2] <- range_coords[2] - coords[,2] + range_coords[1] + } else { + stop("There are no file named 'cells.csv.gz' in the path") + } + + # segments + segments_file <- paste0(dir.path, "/cell_boundaries.csv.gz") + if(file.exists(segments_file)){ + segments <- as.data.frame(data.table::fread(segments_file)) + segments <- segments[,c("cell_id", "vertex_x", "vertex_y")] + colnames(segments) <- c("cell_id", "x", "y") + if(use_image){ + segments[,c("x","y")] <- segments[,c("x","y")]/scaleparam + } + segments[,"y"] <- range_coords[2] - segments[,"y"] + range_coords[1] + segments <- segments %>% dplyr::group_split(cell_id) + segments <- as.list(segments) + names(segments) <- rownames(coords) + } else { + stop("There are no file named 'cell_boundaries.csv.gz' in the path") + } + + # create VoltRon object for cells + cell_object <- formVoltRon(rawdata, metadata = NULL, image = image, coords, segments = segments, main.assay = assay_name, + assay.type = "cell", image_name = image_name, main_channel = channel_name, sample_name = sample_name, + feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...) + + # molecule assay + if(!import_molecules){ + return(cell_object) + } else { + if(verbose) + message("Creating molecule level assay ...") + # transcripts + transcripts_file <- paste0(dir.path, "/transcripts.csv.gz") + if(!file.exists(transcripts_file)){ + stop("There are no file named 'transcripts.csv.gz' in the path") + } else { + # get subcellur data components + subcellular_data <- data.table::fread(transcripts_file) + subcellular_data <- subcellular_data[,c("transcript_id", colnames(subcellular_data)[!colnames(subcellular_data) %in% "transcript_id"]), with = FALSE] + colnames(subcellular_data)[colnames(subcellular_data)=="transcript_id"] <- "id" + colnames(subcellular_data)[colnames(subcellular_data)=="feature_name"] <- "gene" + subcellular_data <- subcellular_data[subcellular_data$qv >= 20, ] + + # coordinates + coords <- as.matrix(subcellular_data[,c("x_location", "y_location", "z_location")]) + colnames(coords) <- c("x", "y", "z") + if(use_image){ + coords[,c("x", "y")] <- coords[,c("x", "y")]/scaleparam + } + coords[,"y"] <- range_coords[2] - coords[,"y"] + range_coords[1] + + # metadata + # mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell_id", "transcript_id", "x_location", "y_location")], with = FALSE] + mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell_id", "transcript_id", "x_location", "y_location", "z_location")], with = FALSE] + set.seed(nrow(mol_metadata$id)) + mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE)) + mol_metadata$assay_id <- "Assay1" + mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")] + + # coord names + rownames(coords) <- mol_metadata$id + + # create VoltRon assay for molecules + mol_assay <- formAssay(coords = coords, image = image, type = "molecule", main_image = image_name, main_channel = channel_name) + + # merge assays in one section + if(verbose) + message("Merging assays ...") + sample.metadata <- SampleMetadata(cell_object) + object <- addAssay(cell_object, + assay = mol_assay, + metadata = mol_metadata, + assay_name = paste0(assay_name, "_mol"), + sample = sample.metadata["Assay1", "Sample"], + layer = sample.metadata["Assay1", "Layer"]) + + # connectivity + connectivity <- data.table::data.table(transcript_id = mol_metadata$id, cell_id = subcellular_data[["cell_id"]]) + if(is.numeric(connectivity$cell_id)){ + connectivity <- subset(connectivity, cell_id != -1) + connectivity[["cell_id"]] <- vrSpatialPoints(cell_object)[connectivity[["cell_id"]]] + } else { + connectivity <- subset(connectivity, cell_id != "UNASSIGNED") + connectivity$cell_assay_id <- "_Assay1" + connectivity[, cell_id:=do.call(paste0,.SD), .SDcols=c("cell_id", "cell_assay_id")] + connectivity$cell_assay_id <- NULL + } + + # add connectivity of spatial points across assays + object <- addLayerConnectivity(object, + connectivity = connectivity, + sample = sample.metadata["Assay1", "Sample"], + layer = sample.metadata["Assay1", "Layer"]) + + # return + return(object) + } + } +} + +#' generateXeniumImage +#' +#' Generate a low resolution DAPI image of the Xenium experiment +#' +#' @param dir.path Xenium output folder +#' @param increase.contrast increase the contrast of the image before writing +#' @param resolution_level the level of resolution within Xenium OME-TIFF image. Default: 7 (553x402) +#' @param overwrite_resolution if TRUE, the image "file.name" will be generated again although it exists at "dir.path" +#' @param output.path The path to the new morphology image created if the image should be saved to a location other than Xenium output folder. +#' @param file.name the name of the lowred morphology image. Default: morphology_lowres.tif +#' @param verbose verbose +#' @param ... additional parameters passed to the \link{writeImage} function +#' +#' @importFrom EBImage writeImage +#' +#' @details +#' The Xenium morphology_mip.ome.tif file that is found under the outs folder comes is an hyperstack of different resolutions of the DAPI image. +#' \link{generateXeniumImage} allows extracting only one of these layers by specifying the \code{resolution} parameter (Default: 7 for 553x402) among 1 to 8. +#' Lower incides of resolutions have higher higher resolutions, e.g. 1 for 35416x25778. Note that you may need to allocate larger memory of Java to import +#' higher resolution images. +#' +#' @export +#' +generateXeniumImage <- function(dir.path, increase.contrast = TRUE, resolution_level = 7, overwrite_resolution = FALSE, + output.path = NULL, file.name = "morphology_lowres.tif", verbose = TRUE, ...) { + + # file path to either Xenium output folder or specified folder + file.path <- paste0(dir.path, "/", file.name) + output.file <- paste0(output.path, "/", file.name) + + # check if the file exists in either Xenium output folder, or the specified location + if((file.exists(file.path) | file.exists(paste0(output.file))) & !overwrite_resolution){ + if(verbose) + message(file.name, " already exists!") + } else { + if (!requireNamespace('RBioFormats')) + stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") + if(dir.exists(paste0(dir.path, "/morphology_focus"))){ + if(verbose) + message("Loading morphology_focus_0000.ome.tif ...") + morphology_image_lowres <- RBioFormats::read.image(paste0(dir.path, "/morphology_focus/morphology_focus_0000.ome.tif"), + resolution = resolution_level, + subset=list(C=1)) + } else if(file.exists(paste0(dir.path, "/morphology_mip.ome.tif"))) { + if(verbose) + message("Loading morphology_mip.ome.tif ...") + morphology_image_lowres <- RBioFormats::read.image(paste0(dir.path, "/morphology_mip.ome.tif"), resolution = resolution_level) + } + + # pick a resolution level + image_info <- morphology_image_lowres@metadata$coreMetadata + if(verbose) + message(" Image Resolution (X:", image_info$sizeX, " Y:", image_info$sizeY, ") ...") + + # increase contrast using EBImage + if(increase.contrast) { + if(verbose) + message(" Increasing Contrast ...") + morphology_image_lowres <- (morphology_image_lowres/max(morphology_image_lowres)) + } + + # write to the same folder + if(verbose) + message(" Writing Tiff File ...") + if(is.null(output.path)){ + EBImage::writeImage(morphology_image_lowres, file = file.path, ...) + } else { + EBImage::writeImage(morphology_image_lowres, file = output.file, ...) + } + } + invisible() +} + +#### +## Visium #### +#### + +#' importVisium +#' +#' Importing Visium data +#' +#' @param dir.path path to Visium output folder +#' @param selected_assay selected assay from Visium +#' @param assay_name the assay name +#' @param sample_name the name of the sample +#' @param image_name the image name of the Visium assay, Default: main +#' @param channel_name the channel name of the image of the Visium assay, Default: H&E +#' @param inTissue if TRUE, only barcodes that are in the tissue will be kept (default: TRUE) +#' @param resolution_level the level of resolution of Visium image: "lowres" (default) or "hires" +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom magick image_read image_info +#' @importFrom rjson fromJSON +#' @importFrom utils read.csv +#' +#' @export +#' +importVisium <- function(dir.path, selected_assay = "Gene Expression", assay_name = "Visium", sample_name = NULL, image_name = "main", channel_name = "H&E", inTissue = TRUE, resolution_level = "lowres", ...) +{ + # raw counts + listoffiles <- list.files(dir.path) + datafile <- listoffiles[grepl("filtered_feature_bc_matrix.h5", listoffiles)][1] + datafile <- paste0(dir.path, "/", datafile) + if(file.exists(datafile)){ + rawdata <- import10Xh5(filename = datafile) + if(any(names(rawdata) %in% selected_assay)){ + rawdata <- as.matrix(rawdata[[selected_assay]]) + } else { + stop("There are no assays called ", selected_assay, " in the h5 file!") + } + } else { + stop("There are no files named 'filtered_feature_bc_matrix.h5' in the path") + } + + # resolution + if(!resolution_level %in% c("lowres","hires")) + stop("resolution_level should be either 'lowres' or 'hires'!") + + # image + image_file <- paste0(dir.path, paste0("/spatial/tissue_", resolution_level, "_image.png")) + if(file.exists(image_file)){ + image <- magick::image_read(image_file) + info <- image_info(image) + } else { + stop("There are no spatial image files in the path") + } + + # coordinates + coords_file <- list.files(paste0(dir.path, "/spatial/"), full.names = TRUE) + coords_file <- coords_file[grepl("tissue_positions",coords_file)] + if(length(coords_file) == 1){ + if(grepl("tissue_positions_list.csv", coords_file)) { + coords <- utils::read.csv(file = coords_file, header = FALSE) + colnames(coords) <- c("barcode", "in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres") + } else { + coords <- utils::read.csv(file = coords_file, header = TRUE) + } + } else if(length(coords_file) > 1) { + stop("There are more than 1 position files in the path") + } else { + stop("There are no files named 'tissue_positions.csv' in the path") + } + + if(inTissue){ + coords <- coords[coords$in_tissue==1,] + rawdata <- rawdata[,colnames(rawdata) %in% coords$barcode] + } + coords <- coords[match(colnames(rawdata), coords$barcode),] + spotID <- coords$barcode + coords <- as.matrix(coords[,c("pxl_col_in_fullres", "pxl_row_in_fullres")], ) + colnames(coords) <- c("x", "y") + rownames(coords) <- spotID + + # scale coordinates + scale_file <- paste0(dir.path, "/spatial/scalefactors_json.json") + if(file.exists(scale_file)){ + scalefactors <- rjson::fromJSON(file = scale_file) + scales <- scalefactors[[paste0("tissue_", resolution_level, "_scalef")]] + params <- list( + nearestpost.distance = 200*scales, # distance to nearest spot + spot.radius = scalefactors$spot_diameter_fullres*scales, + vis.spot.radius = scalefactors$spot_diameter_fullres*scales*2, + spot.type = "circle") + coords <- coords*scales + coords[,2] <- info$height - coords[,2] + } else { + stop("There are no files named 'scalefactors_json.json' in the path") + } + + # create VoltRon + formVoltRon(rawdata, metadata = NULL, image, coords, main.assay = assay_name, params = params, assay.type = "spot", + image_name = image_name, main_channel = channel_name, sample_name = sample_name, + feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...) +} + +#' importVisiumHD +#' +#' Importing VisiumHD data +#' +#' @param dir.path path to Visium output folder +#' @param bin.size bin size of the VisiumHD output (Exp: "2", "8" and "16") +#' @param selected_assay selected assay from Visium +#' @param assay_name the assay name +#' @param sample_name the name of the sample +#' @param image_name the image name of the Visium assay, Default: main +#' @param channel_name the channel name of the image of the Visium assay, Default: H&E +#' @param inTissue if TRUE, only barcodes that are in the tissue will be kept (default: TRUE) +#' @param resolution_level the level of resolution of Visium image: "lowres" (default) or "hires" +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom magick image_read image_info +#' @importFrom rjson fromJSON +#' @importFrom utils read.csv +#' +#' @export +#' +importVisiumHD <- function(dir.path, bin.size = "8", selected_assay = "Gene Expression", assay_name = "VisiumHD", sample_name = NULL, image_name = "main", channel_name = "H&E", inTissue = TRUE, resolution_level = "lowres", ...){ + + # raw counts + bin.size <- formatC(as.numeric(bin.size), width = 3, format = "d", flag = "0") + dir.path <- paste0(dir.path, "binned_outputs/square_", bin.size, "um/") + listoffiles <- list.files(dir.path) + datafile <- listoffiles[grepl("filtered_feature_bc_matrix.h5", listoffiles)][1] + datafile <- paste0(dir.path, "/", datafile) + if(file.exists(datafile)){ + rawdata <- import10Xh5(filename = datafile) + if(any(names(rawdata) %in% selected_assay)){ + rawdata <- as(rawdata[[selected_assay]], "CsparseMatrix") + } else { + stop("There are no assays called ", selected_assay, " in the h5 file!") + } + } else { + stop("There are no files named 'filtered_feature_bc_matrix.h5' in the path") + } + + # resolution + if(!resolution_level %in% c("lowres","hires")) + stop("resolution_level should be either 'lowres' or 'hires'!") + + # image + image_file <- paste0(dir.path, paste0("/spatial/tissue_", resolution_level, "_image.png")) + if(file.exists(image_file)){ + image <- magick::image_read(image_file) + info <- magick::image_info(image) + } else { + stop("There are no spatial image files in the path") + } + + # coordinates + if(!requireNamespace("arrow")) + stop("Please install arrow package to import VisiumHD!: install.packages('arrow')") + coords_file <- list.files(paste0(dir.path, "/spatial/"), full.names = TRUE) + coords_file <- coords_file[grepl("tissue_positions",coords_file)] + if(length(coords_file) == 1){ + coords <- data.table::as.data.table(arrow::read_parquet(coords_file, as_data_frame = FALSE)) + } else if(length(coords_file) > 1) { + stop("There are more than 1 position files in the path") + } else { + stop("There are no files named 'tissue_positions.parquet' in the path") + } + if(inTissue){ + coords <- subset(coords, in_tissue == 1) + rawdata <- rawdata[,colnames(rawdata) %in% coords$barcode] + } + coords <- coords[match(colnames(rawdata), coords$barcode),] + spotID <- coords$barcode + coords <- as.matrix(coords[,c("pxl_col_in_fullres", "pxl_row_in_fullres")], ) + colnames(coords) <- c("x", "y") + rownames(coords) <- spotID + + # scale coordinates + scale_file <- paste0(dir.path, "/spatial/scalefactors_json.json") + if(file.exists(scale_file)){ + scalefactors <- rjson::fromJSON(file = scale_file) + scales <- scalefactors[[paste0("tissue_", resolution_level, "_scalef")]] + params <- list( + nearestpost.distance = scalefactors$spot_diameter_fullres*scales*(3/sqrt(2)), + spot.radius = scalefactors$spot_diameter_fullres*scales, + vis.spot.radius = scalefactors$spot_diameter_fullres*scales, + spot.type = "square") + coords <- coords*scales + coords[,2] <- info$height - coords[,2] + } else { + stop("There are no files named 'scalefactors_json.json' in the path") + } + + # create VoltRon + formVoltRon(rawdata, metadata = NULL, image, coords, main.assay = assay_name, params = params, assay.type = "spot", + image_name = image_name, main_channel = channel_name, sample_name = sample_name, + feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...) +} + +#### +## Auxiliary #### +#### + +#' import10Xh5 +#' +#' import the sparse matrix from the H5 file +#' +#' @param filename the path to h5 file +#' +#' @importFrom Matrix sparseMatrix +#' +#' @export +import10Xh5 <- function(filename){ + + # check package + if(!requireNamespace("rhdf5")){ + stop("You have to install the rhdf5 package!: BiocManager::install('rhdf5')") + } + + # check file + if(file.exists(filename)){ + matrix.10X <- rhdf5::h5read(file = filename, name = "matrix") + } else { + stop("There are no files named ", filename," in the path") + } + + # get data, barcodes and feature + features <- matrix.10X[["features"]][["name"]] + feature_type <- matrix.10X[["features"]][["feature_type"]] + cells <- matrix.10X[["barcodes"]] + mat <- matrix.10X[["data"]] + indices <- matrix.10X[["indices"]] + indptr <- matrix.10X[["indptr"]] + sparse.mat <- sparseMatrix(i = indices + 1, p = indptr, + x = as.numeric(mat), dims = c(length(features), length(cells)), + repr = "T") + colnames(sparse.mat) <- cells + rownames(sparse.mat) <- make.unique(features) + + # separate feature types + matrix.10X <- list() + for(feat in unique(feature_type)){ + cur_features <- features[feature_type %in% feat] + cur_mat <- sparse.mat[features %in% cur_features,] + matrix.10X[[feat]] <- cur_mat + } + + return(matrix.10X) +} + +#### +# Nanostring #### +#### + +#### +## GeoMx #### +#### + +#' importGeoMx +#' +#' Import GeoMx data +#' +#' @param dcc.path path to the folder where the dcc files are found +#' @param pkc.file path to the pkc file +#' @param summarySegment the metadata csv (sep = ";") or excel file, if the file is an excel file, \code{summarySegmentSheetName} should be provided as well. +#' @param summarySegmentSheetName the sheet name of the excel file, \code{summarySegment} +#' @param assay_name the assay name, default: GeoMx +#' @param image the reference morphology image of the GeoMx assay +#' @param segment_polygons if TRUE, the ROI polygons are parsed from the OME.TIFF file +#' @param ome.tiff the OME.TIFF file of the GeoMx experiment if exists +#' @param resolution_level the level of resolution within GeoMx OME-TIFF image, Default: 3 +#' @param image_name the image name of the Visium assay, Default: main +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom dplyr %>% full_join +#' @importFrom utils read.csv +#' @importFrom magick image_info image_read +#' +#' @export +#' +importGeoMx <- function(dcc.path, pkc.file, summarySegment, summarySegmentSheetName, assay_name = "GeoMx", + image = NULL, segment_polygons = FALSE, ome.tiff = NULL, resolution_level = 3, image_name = "main", ...) +{ + # Get pkc file + if(file.exists(pkc.file)){ + pkcdata <- readPKC(pkc.file) + } else { + stop("pkc file is not found!") + } + + # Get dcc file + if(file.exists(dcc.path)){ + dcc_files <- dir(dcc.path, pattern = ".dcc$", full.names = TRUE) + if(length(dcc_files) == 0){ + stop("no dcc files are found under ", dcc.path) + } else { + dcc_files <- dcc_files[!grepl("A01.dcc$", dcc_files)] + dcc_filenames <- dir(dcc.path, pattern = ".dcc$", full.names = FALSE) + dcc_filenames <- dcc_filenames[!grepl("A01.dcc$", dcc_filenames)] + dcc_filenames <- gsub(".dcc$", "", dcc_filenames) + dcc_filenames <- gsub("-", "_", dcc_filenames) + dccData <- sapply(dcc_files, readDCC, simplify = FALSE, USE.NAMES = FALSE) + names(dccData) <- dcc_filenames + } + } else { + stop("path to dcc files does not exist!") + } + + # merge dcc files + rawdata <- NULL + for(i in seq_len(length(dccData))){ + cur_data <- dccData[[i]]$Code_Summary + colnames(cur_data) <- c("RTS_ID", dcc_filenames[i]) + if(i == 1){ + rawdata <- cur_data + } else { + suppressMessages(rawdata <- rawdata %>% full_join(cur_data)) + } + } + rawdata[is.na(rawdata)] <- 0 + rownames(rawdata) <- rawdata$RTS_ID + rawdata <- rawdata[,!colnames(rawdata) %in% "RTS_ID"] + + # get negative probes and targets + # NegProbes <- pkcdata$RTS_ID[pkcdata$Target == "NegProbe-WTX"] + NegProbes <- pkcdata$RTS_ID[grepl("NegProbe", pkcdata$Target)] + + # negative probes + rawdata_neg <- rawdata[rownames(rawdata) %in% NegProbes, ] + rownames(rawdata_neg) <- paste0(rownames(rawdata_neg), "_", + pkcdata$Target[match(rownames(rawdata_neg), pkcdata$RTS_ID)]) + rawdata_neg <- as.matrix(rawdata_neg) + + # other probes + rawdata <- rawdata[!rownames(rawdata) %in% NegProbes, ] + rownames(rawdata) <- pkcdata$Target[match(rownames(rawdata), pkcdata$RTS_ID)] + rawdata <- as.matrix(rawdata) + + # get segment summary + if(file.exists(summarySegment)){ + if(grepl(".xls$|.xlsx$", summarySegment)){ + if (!requireNamespace('xlsx')) + stop("Please install xlsx package for using the read.xlsx function!: install.packages('xlsx')") + if(!is.null(summarySegmentSheetName)){ + segmentsummary <- xlsx::read.xlsx(summarySegment, sheetName = summarySegmentSheetName) + } else { + stop("Please provide 'summarySegmentSheetName' for the excel sheet name!") + } + } else if(grepl(".csv$", summarySegment)) { + segmentsummary <- utils::read.csv(summarySegment, row.names = NULL, header = T, sep = ";") + } + rownames(segmentsummary) <- gsub(".dcc$", "", segmentsummary$Sample_ID) + rownames(segmentsummary) <- gsub("-", "_", rownames(segmentsummary)) + if(all(dcc_filenames %in% rownames(segmentsummary))){ + segmentsummary <- segmentsummary[dcc_filenames, ] + } else{ + stop("Some GeoMx dcc files are not represented in the segment summary file!") + } + } else { + stop(summarySegment, " is not found!") + } + + # get image + if(!is.null(image)){ + if(file.exists(image)){ + image <- magick::image_read(image) + } else { + stop(image, " is not found!") + } + } else { + stop("Please provide a image for the GeoMx data!") + } + geomx_image_info <- magick::image_info(image) + + # get coordinates + coords <- segmentsummary[,c("X","Y")] + colnames(coords) <- c("x", "y") + rownames(coords) <- segmentsummary$ROI.name + coords <- rescaleGeoMxPoints(coords, segmentsummary, geomx_image_info) + + # get ROI segments (polygons) + segments <- list() + if(!is.null(ome.tiff)){ + + # get segments + segments <- importGeoMxSegments(ome.tiff, segmentsummary, geomx_image_info) + segments <- segments[rownames(coords)] + + # parse channels if ome.tiff is given + channels <- importGeoMxChannels(ome.tiff, segmentsummary, geomx_image_info, resolution_level = resolution_level) + image <- c(list(scanimage = image), channels) + } + + # create VoltRon for non-negative probes + object <- formVoltRon(rawdata, metadata = segmentsummary, image, coords, segments, main.assay = assay_name, assay.type = "ROI", + feature_name = "RNA", image_name = image_name, ...) + + # add negative probe assay as new feature set + object <- addFeature(object, assay = assay_name, data = rawdata_neg, feature_name = "NegProbe") + + # return + return(object) +} + +#' readPKC +#' +#' Read a NanoString Probe Kit Configuration (PKC) file. Adapted from \code{GeomxTools} package. +#' +#' @param file A character string containing the path to the PKC file. +#' @param default_pkc_vers Optional list of pkc file names to use as default if more than one pkc version of each module is provided. +#' +#' @importFrom rjson fromJSON +#' +#' @noRd +readPKC <- function (file, default_pkc_vers = NULL) +{ + pkc_json_list <- lapply(file, function(pkc_file) { + rjson::fromJSON(file = pkc_file) + }) + pkc_names <- unlist(lapply(file, function(pkc_file) { + base_pkc_name <- gsub(".pkc", "", trimws(basename(pkc_file))) + return(base_pkc_name) + })) + names(pkc_json_list) <- pkc_names + pkc_modules <- basename(unlist(lapply(pkc_names, sub, pattern = "_[^_]+$", + replacement = ""))) + names(pkc_modules) <- pkc_names + header <- list(PKCFileName = sapply(pkc_json_list, function(list) list[["Name"]]), + PKCModule = pkc_modules, + PKCFileVersion = sapply(pkc_json_list, function(list) list[["Version"]]), + PKCFileDate = sapply(pkc_json_list, function(list) list[["Date"]]), + AnalyteType = sapply(pkc_json_list, function(list) list[["AnalyteType"]]), + MinArea = sapply(pkc_json_list, function(list) list[["MinArea"]]), + MinNuclei = sapply(pkc_json_list, function(list) list[["MinNuclei"]])) + + multi_idx <- duplicated(header[["PKCModule"]]) + multi_mods <- unique(header[["PKCModule"]][multi_idx]) + if (length(multi_mods) < 1) { + if (!is.null(default_pkc_vers)) { + warning("Only one version found per PKC module. ", + "No PKCs need to be combined. ", "Therefore, no default PKC versions will be used.") + } + } + else { + use_pkc_names <- lapply(multi_mods, function(mod) { + mod_idx <- header[["PKCModule"]] == mod + max_vers <- as.numeric(as.character(max(as.numeric_version(header[["PKCFileVersion"]][mod_idx])))) + max_name <- names(header[["PKCFileVersion"]][header[["PKCFileVersion"]] == max_vers]) + return(max_name) + }) + names(use_pkc_names) <- multi_mods + if (!is.null(default_pkc_vers)) { + default_names <- unlist(lapply(default_pkc_vers, function(pkc_file) { + base_pkc_name <- gsub(".pkc", "", trimws(basename(pkc_file))) + return(base_pkc_name) + })) + default_mods <- unlist(lapply(default_names, sub, pattern = "_[^_]+$", + replacement = "")) + dup_defaults <- default_names[duplicated(default_mods) | + duplicated(default_mods, fromLast = TRUE)] + if (!all(default_names %in% names(header[["PKCFileName"]]))) { + removed_pkcs <- default_pkc_vers[!default_names %in% + names(header[["PKCFileName"]])] + stop("Could not match all default PKC versions with a PKC file name. ", + "Check default file names match exactly to a PKC file name.\n", + paste0("Unmatched default PKC versions: ", + removed_pkcs)) + } + else if (length(dup_defaults) > 0) { + stop("There should only be one default PKC version per module. ", + "Ensure only one version per module in default PKCs list.\n", + "Multiple default PKC version conflicts: ", + paste(dup_defaults, collapse = ", ")) + } + else { + use_pkc_names[default_mods] <- default_names + } + } + } + rtsid_lookup_df <- generate_pkc_lookup(pkc_json_list) + rtsid_lookup_df$Negative <- grepl("Negative", rtsid_lookup_df$CodeClass) + rtsid_lookup_df$RTS_ID <- gsub("RNA", "RTS00", rtsid_lookup_df[["RTS_ID"]]) + # rtsid_lookup_df <- S4Vectors::DataFrame(rtsid_lookup_df) + rtsid_lookup_df <- data.table::data.table(rtsid_lookup_df) + if (length(multi_mods) > 0) { + for (mod in names(use_pkc_names)) { + mod_vers <- names(header[["PKCModule"]])[header[["PKCModule"]] == + mod] + mod_lookup <- rtsid_lookup_df[rtsid_lookup_df$Module %in% + mod_vers, ] + mod_tab <- table(mod_lookup$RTS_ID) + remove_rts <- names(mod_tab[mod_tab != length(mod_vers)]) + if (length(remove_rts) > 0) { + warning("The following probes were removed from analysis", + " as they were not found in all PKC module versions used.\n", + # paste(utils::capture.output(print(subset(rtsid_lookup_df, + # subset = RTS_ID %in% remove_rts))), collapse = "\n") + ) + rtsid_lookup_df <- subset(rtsid_lookup_df, subset = !RTS_ID %in% + remove_rts) + } + remove_vers <- mod_vers[mod_vers != use_pkc_names[mod]] + rtsid_lookup_df <- subset(rtsid_lookup_df, subset = !Module %in% + remove_vers) + warning("The following PKC versions were removed from analysis", + " as they were overridden by a newer PKC version or", + " were overridden by a user-defined default PKC version.\n", + paste(remove_vers, collapse = ", ")) + header <- lapply(header, function(elem) { + elem[!names(elem) %in% remove_vers] + }) + } + } + # S4Vectors::metadata(rtsid_lookup_df) <- header + return(rtsid_lookup_df) +} + +#' readDCC +#' +#' Read a NanoString GeoMx Digital Count Conversion (DCC) file. +#' +#' @param file A character string containing the path to the DCC file. +#' +#' @noRd +readDCC <- function(file) +{ + lines <- trimws(readLines(file)) + trimGalore <- grep("trimGalore", lines) + if (length(trimGalore) > 0) { + Raw <- grep("Raw", lines) + lines <- lines[-c(trimGalore:(Raw - 1))] + } + lines <- gsub("SoftwareVersion,\"GeoMx_NGS_Pipeline_ ", + "SoftwareVersion,", lines) + lines <- gsub("SoftwareVersion,\"GeoMx_NGS_Pipeline_", "SoftwareVersion,", + lines) + lines <- gsub("SoftwareVersion,DRAGEN_GeoMx_", "SoftwareVersion,", + lines) + lines[grepl("SoftwareVersion", lines)] <- gsub("\"", "", + lines[grepl("SoftwareVersion", lines)]) + tags <- c("Header", "Scan_Attributes", "NGS_Processing_Attributes", "Code_Summary") + output <- sapply(tags, function(tag) { + bounds <- charmatch(sprintf(c("<%s>", "</%s>"), tag), + lines) + if (anyNA(bounds) || bounds[1L] + 1L >= bounds[2L]) + lines[integer(0)] + else lines[(bounds[1L] + 1L):(bounds[2L] - 1L)] + }, simplify = FALSE) + for (tag in c("Header", "Scan_Attributes", "NGS_Processing_Attributes")) { + while (length(bad <- grep(",", output[[tag]], invert = TRUE)) > + 0L) { + bad <- bad[1L] + if (bad == 1L) + stop(sprintf("%s section has malformed first line", + tag)) + fixed <- output[[tag]] + fixed[bad - 1L] <- sprintf("%s %s", fixed[bad - + 1L], fixed[bad]) + output[[tag]] <- fixed[-bad] + } + output[[tag]] <- strsplit(output[[tag]], split = ",") + output[[tag]] <- structure(lapply(output[[tag]], function(x) if (length(x) == + 1L) + "" + else x[2L]), names = lapply(output[[tag]], `[`, 1L), + class = "data.frame", row.names = basename(file)) + } + cols <- c("FileVersion", "SoftwareVersion") + if (!(all(cols %in% colnames(output[["Header"]])))) + stop("Header section must contain \"FileVersion\" and \"SoftwareVersion\"") + output[["Header"]][, cols] <- lapply(output[["Header"]][, + cols], numeric_version) + fileVersion <- output[["Header"]][1L, "FileVersion"] + if (!(numeric_version(fileVersion) %in% numeric_version(c("0.01", + "0.02")))) + stop("\"FileVersion\" in Header section must be 0.01 or 0.02") + for (section in c("Header", "Scan_Attributes", "NGS_Processing_Attributes")) { + valid <- .validDccSchema(output[[section]], fileVersion, + section) + if (!isTRUE(valid)) + stop(valid) + } + output[["Header"]][["Date"]] <- as.Date(output[["Header"]][["Date"]], + format = "%Y-%m-%d") + cols <- c("Raw", "Trimmed", "Stitched", "Aligned", "umiQ30", + "rtsQ30") + output[["NGS_Processing_Attributes"]][, cols] <- lapply(output[["NGS_Processing_Attributes"]][, + cols], as.numeric) + names(output[["Scan_Attributes"]])[names(output[["Scan_Attributes"]]) == + "ID"] <- "SampleID" + output[["Code_Summary"]] <- paste0("RTS_ID,Count\n", paste(output[["Code_Summary"]], + collapse = "\n")) + output[["Code_Summary"]] <- utils::read.csv(textConnection(output[["Code_Summary"]]), + colClasses = c(RTS_ID = "character", Count = "numeric")) + output[["Code_Summary"]][["Count"]] <- as.integer(round(output[["Code_Summary"]][["Count"]])) + rn <- output[["Code_Summary"]][["RTS_ID"]] + if ((ndups <- anyDuplicated(rn)) > 0L) { + warning(sprintf("removed %d rows from \"Code_Summary\" due to duplicate rownames", + ndups)) + ok <- which(!duplicated(rn, fromLast = FALSE) & !duplicated(rn, + fromLast = TRUE)) + rn <- rn[ok] + output[["Code_Summary"]] <- output[["Code_Summary"]][ok, + , drop = FALSE] + } + rownames(output[["Code_Summary"]]) <- rn + output[["NGS_Processing_Attributes"]][, "DeduplicatedReads"] <- sum(output[["Code_Summary"]][["Count"]]) + return(output) +} + + +#' importGeoMxSegments +#' +#' Import ROI polygons from the OME.TIFF file +#' +#' @param ome.tiff the OME.TIFF file of the GeoMx Experiment +#' @param summary segmentation summary data frame +#' @param imageinfo image information +#' +#' @noRd +importGeoMxSegments <- function(ome.tiff, summary, imageinfo){ + + # check file + if(file.exists(ome.tiff)){ + if(grepl(".ome.tiff$|.ome.tif$", ome.tiff)){ + if (!requireNamespace('RBioFormats')) + stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") + if (!requireNamespace('XML')) + stop("Please install XML package to extract xml from the ome.tiff file!") + omexml <- RBioFormats::read.omexml(ome.tiff) + } else if(grepl(".xml$", ome.tiff)){ + omexml <- XML::xmlParse(file = ome.tiff) + } else { + stop("Please provide either an ome.tiff or .xml file!") + } + omexml <- XML::xmlToList(omexml, simplify = TRUE) + } else { + stop("There are no files named ", ome.tiff," in the path") + } + + # get ROIs + ROIs <- omexml[which(names(omexml) == "ROI")] + + # get masks for each ROI + mask_lists <- list() + for(i in seq_len(length(ROIs))){ + cur_ROI <- ROIs[[i]] + + # if the shape is a polygon + if("Polygon" %in% names(cur_ROI$Union)){ + coords <- strsplit(cur_ROI$Union$Polygon, split = "\\n") + coords <- strsplit(coords$Points, split = " ")[[1]] + coords <- sapply(coords, function(x) as.numeric(strsplit(x, split = ",")[[1]]), USE.NAMES = FALSE) + coords <- as.data.frame(t(coords)) + colnames(coords) <- c("x", "y") + coords <- rescaleGeoMxPoints(coords, summary, imageinfo) + coords <- data.frame(id = cur_ROI$Union$Label[["Text"]], coords) + mask_lists[[cur_ROI$Union$Label[["Text"]]]] <- data.frame(coords) + + # if the shape is an ellipse + } else if("Ellipse" %in% names(cur_ROI$Union)){ + coords <- as.numeric(cur_ROI$Union$Ellipse[c("X","Y", "RadiusX", "RadiusY")]) + coords <- as.data.frame(matrix(coords, nrow = 1)) + colnames(coords) <- c("x", "y", "rx", "ry") + coords[,c("x", "y")] <- rescaleGeoMxPoints(coords[,c("x", "y")], summary, imageinfo) + coords$rx <- coords$rx * imageinfo$width/summary$Scan.Width[1] + coords$ry <- coords$ry * imageinfo$height/summary$Scan.Height[1] + coords <- data.frame(id = cur_ROI$Union$Label[["Text"]], coords) + mask_lists[[cur_ROI$Union$Label[["Text"]]]] <- coords + } + } + + mask_lists +} + +#' rescaleGeoMxROIs +#' +#' Rescale GeoMx point (center or polygon corners of ROI) coordinates +#' +#' @param pts coordinates of the cells from the Xenium assays +#' @param summary segmentation summary data frame +#' @param imageinfo image information +#' +#' @noRd +rescaleGeoMxPoints <- function(pts, summary, imageinfo){ + + xRatio = imageinfo$width/summary$Scan.Width[1] + yRatio = imageinfo$height/summary$Scan.Height[1] + pts$x = (pts$x - summary$Scan.Offset.X[1]) * xRatio + pts$y = (pts$y - summary$Scan.Offset.Y[1]) * yRatio + pts$y = imageinfo$height - pts$y + pts <- as.matrix(pts) + + # return + return(pts) +} + +#' importGeoMxChannels +#' +#' Rescale GeoMx channels with respect to the scan image +#' +#' @param ome.tiff the OME.TIFF file of the GeoMx Experiment +#' @param summary segmentation summary data frame +#' @param imageinfo image information +#' @param resolution_level the resolution level (1-7) of the image parsed from the OME.TIFF file +#' +#' @param RBioFormats read.image +#' @param EBImage as.Image +#' @param grDevices as.raster +#' @param magick image_read +#' +#' @noRd +importGeoMxChannels <- function(ome.tiff, summary, imageinfo, resolution_level){ + + # check file + if(file.exists(ome.tiff)){ + if(grepl(".ome.tiff$|.ome.tif$", ome.tiff)){ + if (!requireNamespace('RBioFormats')) + stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") + if (!requireNamespace('XML')) + stop("Please install XML package to extract xml from the ome.tiff file!") + omexml <- RBioFormats::read.omexml(ome.tiff) + omexml <- XML::xmlToList(omexml, simplify = TRUE) + } else { + warning("ome.tiff format not found!") + return(NULL) + } + } else { + stop("There are no files named ", ome.tiff," in the path") + } + + # get all channels + ome.tiff <- RBioFormats::read.image(ome.tiff, resolution = resolution_level) + + # get frame information + nframes <- ome.tiff@metadata$coreMetadata$imageCount + frames <- EBImage::getFrames(ome.tiff) + frames <- lapply(frames, function(x){ + img <- magick::image_read(grDevices::as.raster(EBImage::as.Image(x))) + rescaleGeoMxImage(img, summary, imageinfo, resolution_level = resolution_level) + }) + + # get channel names + omexml <- omexml$StructuredAnnotations[seq(from = 1, by = 2, length.out = nframes)] + channel_names <- lapply(omexml, function(x){ + x$Value$ChannelInfo$BiologicalTarget + }) + names(frames) <- channel_names + + # return frames + return(frames) +} + +#' rescaleGeoMxImage +#' +#' Rescale GeoMx channels with respect to the scan image +#' +#' @param img coordinates of the cells from the Xenium assays +#' @param summary segmentation summary data frame +#' @param imageinfo image information +#' @param resolution_level the resolution level (1-7) of the image parsed from the OME.TIFF file +#' +#' @param magick image_crop +#' +#' @noRd +rescaleGeoMxImage <- function(img, summary, imageinfo, resolution_level){ + + # adjust offset and scan size to the resolution level + offset.x <- summary$Scan.Offset.X[1]/(2*(resolution_level-1)) + offset.y <- summary$Scan.Offset.Y[1]/(2*(resolution_level-1)) + scan.width <- summary$Scan.Width[1]/(2*(resolution_level-1)) + scan.height <- summary$Scan.Height[1]/(2*(resolution_level-1)) + + # crop image given adjusted offsets + new_img <- magick::image_crop(img, geometry = paste0(scan.width, "x", scan.height, "+", offset.x, "+", offset.y)) + + # resize image + new_img <- magick::image_resize(new_img, geometry = paste0(imageinfo$width, "x", imageinfo$height)) + + # return + return(new_img) +} + +#### +## CosMx #### +#### + +#' importCosMx +#' +#' Import CosMx data +#' +#' @param tiledbURI the path to the tiledb folder +#' @param assay_name the assay name, default: CosMx +#' @param image the reference morphology image of the CosMx assay +#' @param image_name the image name of the CosMx assay, Default: main +#' @param ome.tiff the OME.TIFF file of the CosMx experiment if exists +#' @param import_molecules if TRUE, molecule assay will be created along with cell assay. +#' @param verbose verbose +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom data.table data.table +#' @importFrom ids random_id +#' +#' @export +#' +importCosMx <- function(tiledbURI, assay_name = "CosMx", + image = NULL, image_name = "main", ome.tiff = NULL, import_molecules = FALSE, verbose = TRUE, ...) +{ + # check tiledb and tiledbsc + if (!requireNamespace("tiledb", quietly = TRUE)) + stop("Please install the tiledb package: \n + remotes::install_github('TileDB-Inc/TileDB-R', force = TRUE, ref = '0.17.0')") + if (!requireNamespace("tiledbsc", quietly = TRUE)) + stop("Please install the tiledbsc package: \n + remotes::install_github('tiledb-inc/tiledbsc', force = TRUE, ref = '8157b7d54398b1f957832f37fff0b173d355530e')") + + # get tiledb + if(verbose) + message("Scanning TileDB array for cell data ...") + tiledb_scdataset <- tiledbsc::SOMACollection$new(uri = tiledbURI, verbose = FALSE) + + # raw counts + counts <- tiledb_scdataset$somas$RNA$X$members$counts$to_matrix(batch_mode = TRUE) + counts <- as.matrix(counts) + + # cell metadata + metadata <- tiledb_scdataset$somas$RNA$obs$to_dataframe() + + # coordinates + coords <- as.matrix(metadata[,c("x_slide_mm", "y_slide_mm")]) + colnames(coords) <- c("x","y") + + # transcripts + if(import_molecules){ + if(verbose) + message("Scanning TileDB array for molecule data ...") + subcellular <- tiledb::tiledb_array( + tiledb_scdataset$somas$RNA$obsm$members$transcriptCoords$uri, + return_as="data.table")[] + colnames(subcellular)[colnames(subcellular)=="target"] <- "gene" + } + + # get slides and construct VoltRon objects for each slides + slides <- unique(metadata$slide_ID_numeric) + + # for each slide create a VoltRon object with combined layers + vr_list <- list() + for(slide in slides){ + + # cell assay + if(verbose) + message("Creating cell level assay for slide ", slide, " ...") + + # slide info + cur_coords <- coords[metadata$slide_ID_numeric == slide,] + cur_counts <- counts[,rownames(cur_coords)] + cur_metadata <- metadata[rownames(cur_coords),] + + # create VoltRon object + cell_object <- formVoltRon(data = cur_counts, metadata = cur_metadata, image = image, coords = cur_coords, + main.assay = assay_name, assay.type = "cell", image_name = image_name, main_featureset = "RNA", ...) + cell_object$Sample <- paste0("Slide", slide) + + # molecule assay + if(import_molecules){ + + # get slide + if(verbose) + message("Creating molecule level assay for slide ", slide, " ...") + cur_subcellular <- subset(subcellular, slideID == slide) + + # coordinates + mol_coords <- as.matrix(cur_subcellular[,c("x_FOV_px", "y_FOV_px")]) + colnames(mol_coords) <- c("x", "y") + + # get subcellular data components + mol_metadata <- cur_subcellular[,colnames(cur_subcellular)[!colnames(cur_subcellular) %in% c("CellId", "cell_id", "x_FOV_px", "y_FOV_px")], with = FALSE] + set.seed(nrow(mol_metadata)) + mol_metadata$id <- rownames(mol_metadata) + mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE)) + mol_metadata$assay_id <- "Assay1" + mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")] + + # coord names + rownames(mol_coords) <- mol_metadata$id + + # create VoltRon assay for molecules + mol_assay <- formAssay(coords = mol_coords, image = image, type = "molecule", main_image = image_name) + + # merge assays in one section + if(verbose) + message("Merging assays for slide ", slide, " ...") + sample.metadata <- SampleMetadata(cell_object) + cell_object <- addAssay(cell_object, + assay = mol_assay, + metadata = mol_metadata, + assay_name = paste0(assay_name, "_mol"), + sample = sample.metadata["Assay1", "Sample"], + layer = sample.metadata["Assay1", "Layer"]) + } + vr_list <- append(vr_list, cell_object) + } + + # return + if(verbose) + message("Merging slides ...") + vr <- merge(vr_list[[1]], vr_list[-1]) +} + +#' generateCosMxImage +#' +#' Generates a low resolution Morphology image of the CosMx experiment +#' +#' @param dir.path CosMx output folder +#' @param increase.contrast increase the contrast of the image before writing +#' @param output.path The path to the new morphology image created if the image should be saved to a location other than Xenium output folder. +#' @param verbose verbose +#' @param ... additional parameters passed to the \link{writeImage} function +#' +#' @importFrom magick image_read image_contrast +#' @importFrom EBImage writeImage +#' +#' @export +generateCosMxImage <- function(dir.path, increase.contrast = TRUE, output.path = NULL, verbose = TRUE, ...) { + + # check package + if(!requireNamespace("reshape2")){ + stop("You have to install the reshape2 package!: install.packages('reshape2')") + } + + # file path to either Xenium output folder or specified folder + file.path <- paste0(dir.path, "/CellComposite_lowres.tif") + output.file <- paste0(output.path, "/CellComposite_lowres.tif") + + # check if the file exists in either Xenium output folder, or the specified location + if(file.exists(file.path) | file.exists(paste0(output.file))){ + if(verbose) + message("CellComposite_lowres.tif already exists! \n") + return(NULL) + } + + # FOV positions of CosMx + list_of_files <- list.files(dir.path) + fov_positions_path <- paste0(dir.path, "/", list_of_files[grepl("fov_positions_file.csv$",list_of_files)][1]) + fov_positions <- read.csv(fov_positions_path) + + # manipulate fov positions matrix + if(verbose) + message("Getting FOV Positions \n") + relative_fov_positions <- fov_positions + x_min <- min(relative_fov_positions$x_global_px) + y_min <- min(relative_fov_positions$y_global_px) + x_gap <- diff(unique(fov_positions$x_global_px))[1] + y_gap <- diff(unique(fov_positions$y_global_px))[1] + relative_fov_positions[,c("x_global_px","y_global_px")] <- t(apply(relative_fov_positions[,c("x_global_px","y_global_px")], 1, function(cell){ + c((cell[1]-x_min)/x_gap,(cell[2]-y_min)/y_gap) + })) + relative_fov_positions <- relative_fov_positions[order(relative_fov_positions$y_global_px, decreasing = TRUE),] + + # Combine Images of the FOV grid + if(verbose) + message("Loading FOV tif files \n") + image.dir.path <- paste0(dir.path,"/CellComposite/") + morphology_image_data <- NULL + for(i in relative_fov_positions$fov){ + image_path <- paste0(image.dir.path, "CellComposite_F", str_pad(as.character(i), 3, pad = 0), ".jpg") + image_data <- magick::image_read(image_path) %>% magick::image_resize("x500") %>% magick::image_raster() + if(is.null(morphology_image_data)) + dim_image <- apply(image_data[,seq_len(2)], 2, max) + scale_dim <- relative_fov_positions[i,2:3]*dim_image + image_data[,seq_len(2)] <- image_data[,seq_len(2)] + + rep(1, nrow(image_data)) %o% as.matrix(scale_dim)[1,] + morphology_image_data <- rbind(morphology_image_data, image_data) + } + morphology_image_data_array <- reshape2::acast(morphology_image_data, y ~ x) + morphology_image <- magick::image_read(morphology_image_data_array) %>% magick::image_resize("x800") + + # pick a resolution level + morphology_image_info <- image_info(morphology_image) + if(verbose) + message("Image Resolution (X:", morphology_image_info$width, " Y:", morphology_image_info$height, ") \n") + + # increase contrast using EBImage + if(increase.contrast) { + if(verbose) + message("Increasing Contrast \n") + morphology_image <- magick::image_contrast(morphology_image, sharpen = 1) + } + + # write to the same folder + if(verbose) + message("Writing Tiff File \n") + if(is.null(output.path)){ + EBImage::writeImage(magick::as_EBImage(morphology_image), file = file.path, ...) + } else { + EBImage::writeImage(magick::as_EBImage(morphology_image), file = output.file, ...) + } + + return(NULL) +} + +#### +# Spatial Genomics #### +#### + +#### +## GenePs #### +#### + +#' importGenePS +#' +#' Importing GenePS data +#' +#' @param dir.path path to Xenium output folder +#' @param assay_name the assay name of the SR object +#' @param sample_name the name of the sample +#' @param use_image if TRUE, the DAPI image will be used. +#' @param resolution_level the level of resolution within TIFF image. Default: 7 (971x638) +#' @param image_name the image name of the Xenium assay, Default: main +#' @param channel_name the channel name of the image of the Xenium assay, Default: DAPI +#' @param import_molecules if TRUE, molecule assay will be created along with cell assay. +#' @param verbose verbose +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom magick image_read image_info +#' @importFrom utils read.csv +#' @importFrom data.table fread +#' @importFrom ids random_id +#' @importFrom grDevices as.raster +#' @importFrom EBImage as.Image +#' +#' @export +#' +importGenePS <- function (dir.path, assay_name = "GenePS", sample_name = NULL, use_image = TRUE, + resolution_level = 7, image_name = "main", channel_name = "DAPI", import_molecules = FALSE, + verbose = TRUE, ...) +{ + # cell assay + if(verbose) + message("Creating cell level assay ...") + + # raw counts + DataOutputfiles <- list.files(paste0(dir.path, "DataOutput/")) + datafile <- DataOutputfiles[grepl("_cellxgene.csv", DataOutputfiles)] + if(length(datafile) > 0){ + rawdata <- utils::read.csv(file = paste0(dir.path, "DataOutput/", datafile), row.names = NULL) + rawdata <- rawdata[,colnames(rawdata)[!colnames(rawdata) %in% "X"]] + rawdata <- t(rawdata) + } else { + stop("There are no files ending with '_cellxgene.csv' in the path") + } + + # image + if(use_image){ + + # check RBioFormats + if (!requireNamespace('RBioFormats')) + stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") + + # check image + image_file <- paste0(dir.path, "/images/DAPI.tiff") + if(file.exists(image_file)){ + image <- RBioFormats::read.image(image_file, resolution = resolution_level) + image <- EBImage::as.Image(image) + image <- grDevices::as.raster(image) + image <- magick::image_read(image) + } else { + stop("There are no image files in the path") + } + # scale the xenium image instructed by 10x Genomics help page + scaleparam <- ceiling(2^(resolution_level-1)) + } else { + image <- NULL + } + + # coordinates + coordsfile <- DataOutputfiles[grepl("_cellcoordinate.csv", DataOutputfiles)] + if(length(coordsfile) > 0){ + coords <- utils::read.csv(file = paste0(dir.path, "DataOutput/", coordsfile)) + coords <- as.matrix(coords[,c("center_x", "center_y")]) + colnames(coords) <- c("x", "y") + if(use_image) { + coords <- coords/scaleparam + imageinfo <- unlist(magick::image_info(image)[c("height")]) + range_coords <- c(0,imageinfo) + } else { + range_coords <- range(coords[,2]) + } + coords[,2] <- range_coords[2] - coords[,2] + range_coords[1] + } else { + stop("There are no files ending with '_cellcoordinate.csv' in the path") + } + + # create VoltRon object for cells + cell_object <- formVoltRon(rawdata, metadata = NULL, image = image, coords, main.assay = assay_name, + assay.type = "cell", image_name = image_name, main_channel = channel_name, sample_name = sample_name, + feature_name = "RNA", ...) + + # add molecules + if(!import_molecules){ + return(cell_object) + } else { + if(verbose) + message("Creating molecule level assay ...") + transcripts_file <- DataOutputfiles[grepl("_transcriptlocation.csv", DataOutputfiles)] + if(length(transcripts_file) == 0){ + stop("There are no files ending with '_transcriptlocation.csv' in the path") + } else { + + # get subcellur data components + subcellular_data <- data.table::fread(paste0(dir.path, "DataOutput/", transcripts_file)) + subcellular_data$id <- seq_len(nrow(subcellular_data)) + subcellular_data <- subcellular_data[,c("id", colnames(subcellular_data)[!colnames(subcellular_data) %in% "id"]), with = FALSE] + colnames(subcellular_data)[colnames(subcellular_data)=="name"] <- "gene" + + # coordinates + coords <- as.matrix(subcellular_data[,c("x", "y")]) + colnames(coords) <- c("x", "y") + if(use_image){ + coords <- coords/scaleparam + } + coords[,"y"] <- range_coords[2] - coords[,"y"] + range_coords[1] + + # metadata + mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell", "x", "y")], with = FALSE] + set.seed(nrow(mol_metadata$id)) + mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE)) + mol_metadata$assay_id <- "Assay1" + mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")] + + # coord names + rownames(coords) <- mol_metadata$id + + # create VoltRon assay for molecules + mol_assay <- formAssay(coords = coords, image = image, type = "molecule", main_image = image_name, main_channel = channel_name) + + # merge assays in one section + if(verbose) + message("Merging assays ...") + sample.metadata <- SampleMetadata(cell_object) + object <- addAssay(cell_object, + assay = mol_assay, + metadata = mol_metadata, + assay_name = paste0(assay_name, "_mol"), + sample = sample.metadata["Assay1", "Sample"], + layer = sample.metadata["Assay1", "Layer"]) + + # connectivity + connectivity <- data.table::data.table(transcript_id = mol_metadata$id, cell_id = subcellular_data[["cell"]]) + connectivity <- subset(connectivity, cell_id != 0) + connectivity[["cell_id"]] <- vrSpatialPoints(cell_object)[connectivity[["cell_id"]]] + + # add connectivity of spatial points across assays + object <- addLayerConnectivity(object, + connectivity = connectivity, + sample = sample.metadata["Assay1", "Sample"], + layer = sample.metadata["Assay1", "Layer"]) + + # return + return(object) + } + } +} + +#### +# BGI Genomics #### +#### + +#### +## STOmics #### +#### + +#' importSTOmics +#' +#' Importing STOmics (Stereo-Seq) data +#' +#' @param h5ad.path path to h5ad file of STOmics output +#' @param assay_name the assay name +#' @param sample_name the name of the sample +#' @param image_name the image name of the Visium assay, Default: main +#' @param channel_name the channel name of the image of the Visium assay, Default: H&E +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom methods as +#' @export +#' +importSTOmics <- function(h5ad.path, assay_name = "STOmics", sample_name = NULL, image_name = "main", channel_name = "H&E", ...) +{ + # check package + if(!requireNamespace('anndataR')) + stop("Please install anndataR package!: devtools::install_github('scverse/anndataR')") + + # get h5ad data + stdata <- anndataR::read_h5ad(h5ad.path, to = "HDF5AnnData") + + # observation and feature names + obs_names <- stdata$obs_names + var_names <- stdata$var_names + + # raw counts + rawdata <- Matrix::t(stdata$X) + rownames(rawdata) <- var_names + colnames(rawdata) <- obs_names + rawdata <- methods::as(rawdata, 'CsparseMatrix') + + # metadata + metadata <- stdata$obs + rownames(metadata) <- obs_names + + # coordinates + coords <- stdata$obsm$spatial + rownames(coords) <- obs_names + + # scale coordinates + binsize <- stdata$uns$bin_size + params <- list( + spot.radius = 0.5 + (binsize-1), + vis.spot.radius = 0.5 + (binsize-1)) + + # create VoltRon + formVoltRon(rawdata, metadata = metadata, coords = coords, main.assay = assay_name, params = params, assay.type = "spot", + image_name = image_name, main_channel = channel_name, sample_name = sample_name, feature_name = "RNA", ...) +} + +#### +# Akoya #### +#### + +#### +## PhenoCycler #### +#### + +#' importPhenoCycler +#' +#' Importing PhenoCycler data +#' +#' @param dir.path path to PhenoCycler output folder +#' @param assay_name the assay name of the SR object +#' @param sample_name the name of the sample +#' @param image_name the image name of the Xenium assay, Default: main +#' @param type Specify which type matrix is being provided. +#' \itemize{ +#' \item \dQuote{\code{processor}}: matrix generated by CODEX Processor +#' \item \dQuote{\code{inform}}: matrix generated by inForm +#' \item \dQuote{\code{qupath}}: matrix generated by QuPath +#' } +#' @param filter A pattern to filter features by; pass \code{NA} to skip feature filtering +#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the quantification level to read in +#' @param verbose verbose +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom magick image_info image_read +#' +#' @export +importPhenoCycler <- function(dir.path, assay_name = "PhenoCycler", sample_name = NULL, image_name = "main", + type = c('inform', 'processor', 'qupath'), filter = 'DAPI|Blank|Empty', + inform.quant = c('mean', 'total', 'min', 'max', 'std'), verbose = TRUE, ...){ + + # raw counts, metadata and coordinates + listoffiles <- list.files(paste0(dir.path, "/processed/segm/segm-1/fcs/compensated/"), full.names = TRUE) + datafile <- listoffiles[grepl("_compensated.csv$", listoffiles)][1] + if(file.exists(datafile)){ + rawdata <- readPhenoCyclerMat(filename = datafile, type = type, filter = filter, inform.quant = inform.quant) + } else { + stop("There are no files named ending with '_compensated.csv' in the processed/segm/segm-1/fcs/compensated/ subfolder") + } + + # cell id + cellid <- paste0("cell", colnames(rawdata$matrix)) + + # coordinates + coords <- rawdata$centroids[,c("x", "y")] + rownames(coords) <- cellid + coords <- as.matrix(coords) + + # metadata + metadata <- rawdata$metadata + rownames(metadata) <- cellid + + # data + rawdata <- rawdata$matrix + colnames(rawdata) <- cellid + rownames(rawdata) <- gsub("(\t|\r|\n)", "", rownames(rawdata)) + + # images + image_dir <- paste0(dir.path, "/processed/stitched/reg001/") + list_files <- list.files(image_dir) + if(!dir.exists(image_dir)){ + if(verbose) + message("There are no images of channels!") + image_list <- NULL + } else { + if(!any(grepl(".tif$", list_files))){ + stop("The folder doesnt have any images associated with channels!") + } else{ + image_channel_names <- sapply(list_files, function(x) { + name <- strsplit(x, split = "_")[[1]] + name <- gsub(".tif$", "", name[length(name)]) + name <- make.unique(name) + return(name) + }) + image_channel_names <- c(image_channel_names[grepl("DAPI", image_channel_names)][1], + image_channel_names[image_channel_names %in% rownames(rawdata)]) + list_files <- names(image_channel_names) + image_list <- lapply(list_files, function(x){ + magick::image_read(paste0(image_dir, "/", x)) + }) + names(image_list) <- image_channel_names + coords[,2] <- magick::image_info(image_list[[1]])$height - coords[,2] + } + } + + # voltron object + object <- formVoltRon(data = rawdata, metadata = metadata, image = image_list, coords = coords, assay.type = "cell", + sample_name = sample_name, main.assay = assay_name, image_name = image_name, feature_name = "RNA", ...) + + # return + object +} + +#' readPhenoCyclerMat +#' +#' Read and Load Akoya CODEX data, adapted from the \code{ReadAkoya} function \code{Seurat} package +#' +#' @param filename Path to matrix generated by upstream processing. +#' @param type Specify which type matrix is being provided. +#' \itemize{ +#' \item \dQuote{\code{processor}}: matrix generated by CODEX Processor +#' \item \dQuote{\code{inform}}: matrix generated by inForm +#' \item \dQuote{\code{qupath}}: matrix generated by QuPath +#' } +#' @param filter A pattern to filter features by; pass \code{NA} to skip feature filtering +#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the quantification level to read in +#' +#' @importFrom methods as +#' +#' @noRd +readPhenoCyclerMat <- function( + filename, + type = c('inform', 'processor', 'qupath'), + filter = 'DAPI|Blank|Empty', + inform.quant = c('mean', 'total', 'min', 'max', 'std') +) { + # Check arguments + if (!file.exists(filename)) { + stop("Can't find file: ", filename) + } + type <- tolower(x = type[1L]) + type <- match.arg(arg = type) + ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4) + # Preload matrix + sep <- switch(EXPR = type, 'inform' = '\t', ',') + mtx <- data.table::fread( + file = filename, + sep = sep, + data.table = FALSE, + verbose = FALSE + ) + # Assemble outputs + outs <- switch( + EXPR = type, + 'processor' = { + # Create centroids data frame + centroids <- data.frame( + x = mtx[['x:x']], + y = mtx[['y:y']], + cell = as.character(x = mtx[['cell_id:cell_id']]), + stringsAsFactors = FALSE + ) + rownames(x = mtx) <- as.character(x = mtx[['cell_id:cell_id']]) + # Create metadata data frame + md <- mtx[, !grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE] + colnames(x = md) <- vapply( + X = strsplit(x = colnames(x = md), split = ':'), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 2L + ) + # Create expression matrix + mtx <- mtx[, grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE] + colnames(x = mtx) <- vapply( + X = strsplit(x = colnames(x = mtx), split = ':'), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 2L + ) + if (!is.na(x = filter)) { + mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE] + } + mtx <- t(x = mtx) + if ((sum(mtx == 0) / length(x = mtx)) > ratio) { + # mtx <- as.sparse(x = mtx) + mtx <- methods::as(mtx, 'CsparseMatrix') + } + list(matrix = mtx, centroids = centroids, metadata = md) + }, + 'inform' = { + inform.quant <- tolower(x = inform.quant[1L]) + inform.quant <- match.arg(arg = inform.quant) + expr.key <- c( + mean = 'Mean', + total = 'Total', + min = 'Min', + max = 'Max', + std = 'Std Dev' + )[inform.quant] + expr.pattern <- '\\(Normalized Counts, Total Weighting\\)' + rownames(x = mtx) <- mtx[['Cell ID']] + mtx <- mtx[, setdiff(x = colnames(x = mtx), y = 'Cell ID'), drop = FALSE] + # Create centroids + centroids <- data.frame( + x = mtx[['Cell X Position']], + y = mtx[['Cell Y Position']], + cell = rownames(x = mtx), + stringsAsFactors = FALSE + ) + # Create metadata + cols <- setdiff( + x = grep( + pattern = expr.pattern, + x = colnames(x = mtx), + value = TRUE, + invert = TRUE + ), + y = paste('Cell', c('X', 'Y'), 'Position') + ) + md <- mtx[, cols, drop = FALSE] + # Create expression matrices + exprs <- data.frame( + cols = grep( + pattern = paste(expr.key, expr.pattern), + x = colnames(x = mtx), + value = TRUE + ) + ) + exprs$feature <- vapply( + X = trimws(x = gsub( + pattern = paste(expr.key, expr.pattern), + replacement = '', + x = exprs$cols + )), + FUN = function(x) { + x <- unlist(x = strsplit(x = x, split = ' ')) + x <- x[length(x = x)] + return(gsub(pattern = '\\(|\\)', replacement = '', x = x)) + }, + FUN.VALUE = character(length = 1L) + ) + exprs$class <- tolower(x = vapply( + X = strsplit(x = exprs$cols, split = ' '), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 1L + )) + classes <- unique(x = exprs$class) + outs <- vector( + mode = 'list', + length = length(x = classes) + 2L + ) + names(x = outs) <- c( + 'matrix', + 'centroids', + 'metadata', + setdiff(x = classes, y = 'entire') + ) + outs$centroids <- centroids + outs$metadata <- md + for (i in classes) { + df <- exprs[exprs$class == i, , drop = FALSE] + expr <- mtx[, df$cols] + colnames(x = expr) <- df$feature + if (!is.na(x = filter)) { + expr <- expr[, !grepl(pattern = filter, x = colnames(x = expr)), drop = FALSE] + } + expr <- t(x = expr) + if ((sum(expr == 0, na.rm = TRUE) / length(x = expr)) > ratio) { + # expr <- as.sparse(x = expr) + expr <- methods::as(expr, 'CsparseMatrix') + } + outs[[switch(EXPR = i, 'entire' = 'matrix', i)]] <- expr + } + outs + }, + 'qupath' = { + rownames(x = mtx) <- as.character(x = seq_len(length.out = nrow(x = mtx))) + # Create centroids + xpos <- sort( + x = grep(pattern = 'Centroid X', x = colnames(x = mtx), value = TRUE), + decreasing = TRUE + )[1L] + ypos <- sort( + x = grep(pattern = 'Centroid Y', x = colnames(x = mtx), value = TRUE), + decreasing = TRUE + )[1L] + centroids <- data.frame( + x = mtx[[xpos]], + y = mtx[[ypos]], + cell = rownames(x = mtx), + stringsAsFactors = FALSE + ) + # Create metadata + cols <- setdiff( + x = grep( + pattern = 'Cell: Mean', + x = colnames(x = mtx), + ignore.case = TRUE, + value = TRUE, + invert = TRUE + ), + y = c(xpos, ypos) + ) + md <- mtx[, cols, drop = FALSE] + # Create expression matrix + idx <- which(x = grepl( + pattern = 'Cell: Mean', + x = colnames(x = mtx), + ignore.case = TRUE + )) + mtx <- mtx[, idx, drop = FALSE] + colnames(x = mtx) <- vapply( + X = strsplit(x = colnames(x = mtx), split = ':'), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 1L + ) + if (!is.na(x = filter)) { + mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE] + } + mtx <- t(x = mtx) + if ((sum(mtx == 0) / length(x = mtx)) > ratio) { + # mtx <- as.sparse(x = mtx) + mtx <- methods::as(mtx, 'CsparseMatrix') + } + list(matrix = mtx, centroids = centroids, metadata = md) + }, + stop("Unknown matrix type: ", type) + ) + return(outs) +} + +#### +# Non-Commercial #### +#### + +#### +## OpenST #### +#### + +#' importOpenST +#' +#' Importing OpenST data +#' +#' @param h5ad.path path to h5ad file of STOmics output +#' @param assay_name the assay name +#' @param sample_name the name of the sample +#' @param image_name the image name of the Visium assay, Default: main +#' @param channel_name the channel name of the image of the Visium assay, Default: H&E +#' @param verbose verbose +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom methods as +#' @importFrom Matrix t +#' +#' @export +importOpenST <- function(h5ad.path, assay_name = "OpenST", sample_name = NULL, image_name = "main", channel_name = "H&E", verbose = TRUE, ...) +{ + # check package + if(!requireNamespace('anndataR')) + stop("Please install anndataR package!: devtools::install_github('scverse/anndataR')") + + # get h5ad data + stdata <- anndataR::read_h5ad(h5ad.path, to = "HDF5AnnData") + + # observation and feature names + obs_names <- stdata$obs_names + var_names <- stdata$var_names + + # raw counts + rawdata <- Matrix::t(stdata$X) + rownames(rawdata) <- var_names + colnames(rawdata) <- obs_names + rawdata <- methods::as(rawdata, 'CsparseMatrix') + + # metadata + metadata <- stdata$obs + rownames(metadata) <- obs_names + + # coordinates + coords <- stdata$obsm$spatial_3d_aligned + rownames(coords) <- obs_names + zlocation <- unique(coords[,3]) + + # get individual sections as voltron data + sections <- unique(metadata$n_section) + zlocation <- zlocation[order(sections)] + connectivity <- data.frame(Var1 = rep(seq_len(length(sections)), length(sections)), + Var2 = rep(seq_len(length(sections)), each = length(sections))) + sections <- sections[order(sections)] + vr_data_list <- list() + if(verbose) + message("Creating Layers ...") + for(i in seq_len(length(sections))){ + ind <- metadata$n_section == sections[i] + spatialpoints <- rownames(metadata[metadata$n_section == sections[i],]) + cur_data <- rawdata[,spatialpoints] + cur_metadata <- metadata[spatialpoints,] + cur_coords <- coords[ind,c(1,2)] + rownames(cur_coords) <- spatialpoints + vr_data_list[[i]] <- formVoltRon(data = cur_data, metadata = cur_metadata, coords = cur_coords, + main.assay = assay_name, sample_name = paste0("Section", sections[i]), + image_name = image_name, main_channel = channel_name, feature_name = "RNA", ...) + } + + # create VoltRon + sample_name <- ifelse(is.null(sample_name), "Sample", sample_name) + vr_data <- mergeVoltRon(vr_data_list[[1]], vr_data_list[-1], samples = sample_name) + + # set zlocations and adjacency of layer in the vrBlock + vr_data <- addBlockConnectivity(vr_data, connectivity = connectivity, zlocation = zlocation, sample = sample_name) + + # return + vr_data +} + +#### +## DBIT-Seq #### +#### + +#' importDBITSeq +#' +#' Importing DBIT-Seq data +#' +#' @param path.rna path to rna count matrix +#' @param path.prot path to protein count matrix +#' @param size the size of the in situ pixel (defualt is 10 (micron)) +#' @param assay_name the assay name +#' @param sample_name the name of the sample +#' @param image_name the image name of the Visium assay, Default: main +#' @param channel_name the channel name of the image of the Visium assay, Default: H&E +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom utils read.table +#' +#' @export +importDBITSeq <- function(path.rna, path.prot = NULL, size = 10, assay_name = "DBIT-Seq", sample_name = NULL, image_name = "main", channel_name = "H&E", ...) +{ + # count matrix RNA + rnadata <- utils::read.table(path.rna, header = TRUE, sep = "\t", row.names = 1) + rnadata <- t(as.matrix(rnadata)) + + # count matrix Protein + protdata <- utils::read.table(path.prot, header = TRUE, sep = "\t", row.names = 1) + protdata <- t(as.matrix(protdata)) + + # coords + coords <- sapply(colnames(rnadata), function(x) as.numeric(strsplit(x, split = "x")[[1]])) + coords <- t(coords) + colnames(coords) <- c("x", "y") + + # get DBIT-Seq parameters + params <- list( + spot.radius = size/2, + vis.spot.radius = size, + nearestpost.distance = size*2*sqrt(2)) + coords <- coords*size*3/2 + + # make voltron object + object <- formVoltRon(data = rnadata, coords = coords, image = NULL, assay.type = "spot", params = params, image_name = "main", + main.assay = assay_name, sample_name = sample_name, feature_name = "RNA", ...) + + # add protein assay + if(!is.null(path.prot)){ + + # # create protein assay + # new_assay <- formAssay(data = protdata, + # coords = coords, + # type = "spot") + # new_assay@image <- object[["Assay1"]]@image + # sample.metadata <- SampleMetadata(object) + + # # add new assay + # object <- addAssay(object, + # assay = new_assay, + # metadata = Metadata(object), + # assay_name = paste(assay_name, "Prot", sep = "-"), + # sample = sample.metadata["Assay1", "Sample"], + # layer = sample.metadata["Assay1", "Layer"]) + # + # # add connectivity of spatial points across assays + # connectivity <- cbind(vrSpatialPoints(object, assay = "Assay1"), + # vrSpatialPoints(object, assay = "Assay2")) + # object <- addConnectivity(object, + # connectivity = connectivity, + # sample = sample.metadata["Assay1", "Sample"], + # layer = sample.metadata["Assay1", "Layer"]) + + # add negative probe assay as new feature set + object <- addFeature(object, assay = assay_name, data = protdata, feature_name = "Protein") + + } + + # return + return(object) +} + +#### +# Image Data #### +#### + +#' importImageData +#' +#' import an image as VoltRon object +#' +#' @param image a single or a list of image paths or magick-image objects +#' @param tile.size the size of tiles +#' @param segments Either a list of segments or a GeoJSON file. This will result in a second assay in the VoltRon object to be created +#' @param image_name the image name of the Image assay, Default: main +#' @param channel_names the channel names of the images if multiple images are provided +#' @param ... additional parameters passed to \link{formVoltRon} +#' +#' @importFrom magick image_read image_info +#' @importFrom data.table data.table +#' +#' @examples +#' # single image +#' imgfile <- system.file("extdata", "DAPI.tif", package = "VoltRon") +#' vrdata <- importImageData(imgfile, image_name = "main") +#' +#' # multiple images +#' imgfile <- c(system.file("extdata", "DAPI.tif", package = "VoltRon"), +#' system.file("extdata", "DAPI.tif", package = "VoltRon")) +#' vrdata <- importImageData(imgfile, image_name = "main", channel_name = c("DAPI", "DAPI2")) +#' +#' @export +importImageData <- function(image, tile.size = 10, segments = NULL, image_name = "main", channel_names = NULL, ...){ + + # images and channel names + if(!is.null(channel_names)){ + if(length(image) != length(channel_names)) + stop("Provided channel names should of the same length as the images!") + if(any(!is.character(channel_names))) + stop("Invalid channel names!") + } + + # get image + if(!is.list(image)){} + image <- as.list(image) + image <- sapply(image, function(img){ + if(!inherits(img, "magick-image")){ + if(!is.character(img)){ + stop("image should either be a magick-image object or a file.path") + } else{ + if(file.exists(img)){ + img <- magick::image_read(img) + } else { + stop(img, " is not found!") + } + } + } + img + }, USE.NAMES = TRUE, simplify = FALSE) + + # channel names + if(!is.null(channel_names)){ + names(image) <- channel_names + } + + # check image size + imageinfo <- vapply(image, function(img) { + info <- magick::image_info(img) + c(info$width, info$height) + }, numeric(2)) + unique_width <- unique(imageinfo[1,]) + unique_height <- unique(imageinfo[2,]) + if(length(unique_width) == 1 && length(unique_height) == 1){ + imageinfo <- list(width = imageinfo[1,1], height = imageinfo[2,1]) + } + + # coordinates + even_odd_correction <- (!tile.size%%2)*(0.5) + x_coords <- seq((tile.size/2) + even_odd_correction, imageinfo$width, tile.size)[seq_len(imageinfo$width %/% tile.size)] + y_coords <- seq((tile.size/2) + even_odd_correction, imageinfo$height, tile.size)[seq_len(imageinfo$height %/% tile.size)] + y_coords <- rev(y_coords) + coords <- as.matrix(expand.grid(x_coords, y_coords)) + colnames(coords) <- c("x", "y") + rownames(coords) <- paste0("tile", seq_len(nrow(coords))) + + # metadata + metadata <- data.table::data.table(id = rownames(coords)) + + # create voltron object with tiles + object <- formVoltRon(data = NULL, metadata = metadata, image = image, coords, main.assay = "ImageData", assay.type = "tile", params = list(tile.size = tile.size), image_name = image_name, ...) + + # check if segments are defined + if(is.null(segments)){ + return(object) + } else { + + # check if segments are paths + if(inherits(segments, "character")){ + if(grepl(".geojson$", segments)){ + segments <- generateSegments(geojson.file = segments) + } else { + stop("Only lists or GeoJSON files are accepted as segments input!") + } + } + + # make coordinates out of segments + coords <- t(vapply(segments, function(dat){ + apply(dat[,c("x", "y")], 2, mean) + }, numeric(2))) + rownames(coords) <- names(segments) + + # make segment assay + assay <- formAssay(coords = coords, segments = segments, image = image, type = "ROI", main_image = image_name) + + # add segments as assay + sample_metadata <- SampleMetadata(object) + object <- addAssay(object, + assay = assay, + metadata = data.frame(row.names = paste0(names(segments), "_Assay2")), + assay_name = "ROIAnnotation", + sample = sample_metadata$Sample, + layer = sample_metadata$Layer) + + # return + return(object) + } +} + + +#' generateSegments +#' +#' The function to import segments from a geojson file +#' +#' @param geojson.file the GeoJSON file, typically generated by QuPath software +#' +#' @importFrom rjson fromJSON +#' @importFrom dplyr tibble +#' +#' @export +generateSegments <- function(geojson.file){ + + # get segments + if(inherits(geojson.file, "character")){ + if(file.exists(geojson.file)){ + segments <- rjson::fromJSON(file = geojson.file) + } else { + stop("geojson.file doesn't exist!") + } + } else { + stop("geojson.file should be the path to the GeoJSON file!") + } + + # parse polygons as segments/ROIs + segments <- lapply(segments, function(x){ + type <- x$geometry$type + poly <- x$geometry$coordinates + if(grepl("Polygon", type)){ + poly <- as.data.frame(matrix(unlist(poly[[1]]), ncol = 2, byrow = TRUE)) + } + colnames(poly) <- c("x", "y") + dplyr::tibble(poly) + }) + + # attach names to segments + segments <- mapply(function(x, sgt){ + dplyr::tibble(data.frame(id = x, sgt)) + }, seq_len(length(segments)), segments, SIMPLIFY = FALSE) + + # generate ROI names + names(segments) <- paste0("ROI", seq_len(length(segments))) + + # return + return(segments) +} + +#' generateGeoJSON +#' +#' generating geojson files from segments +#' +#' @param segments the segments, typically from \link{vrSegments}. +#' @param file the GeoJSON file, typically to be used by QuPath software. +#' +#' @importFrom rjson fromJSON +#' +#' @export +generateGeoJSON <- function(segments, file){ + + if(!requireNamespace('geojsonR')) + stop("Please install geojsonR package for using geojsonR functions") + + # get segments + if(!inherits(file, "character")){ + stop("file should be the path to the GeoJSON file!") + } + + # reshape segments + segments <- mapply(function(id, sgt){ + poly <- na.omit(as.matrix(sgt[,c("x", "y")])) + poly <- rbind(poly, poly[1,,drop = FALSE]) + poly <- as.list(data.frame(t(poly))) + names(poly) <- NULL + init <- geojsonR::TO_GeoJson$new() + geometry <- init$Polygon(list(poly), stringify = TRUE) + feature <- list(type = "Feature", + id = id, + geometry = geometry[!names(geometry) %in% "json_dump"], + properties = list(objectType = "annotation")) + feature + }, names(segments), segments, SIMPLIFY = FALSE, USE.NAMES = FALSE) + + # save as json + segments <- rjson::toJSON(segments) + write(segments, file = file) +} \ No newline at end of file