|
a |
|
b/R/import.R |
|
|
1 |
#### |
|
|
2 |
# 10X Genomics #### |
|
|
3 |
#### |
|
|
4 |
|
|
|
5 |
#### |
|
|
6 |
## Xenium #### |
|
|
7 |
#### |
|
|
8 |
|
|
|
9 |
#' importXenium |
|
|
10 |
#' |
|
|
11 |
#' Importing Xenium data |
|
|
12 |
#' |
|
|
13 |
#' @param dir.path path to Xenium output folder |
|
|
14 |
#' @param selected_assay selected assay from Xenium |
|
|
15 |
#' @param assay_name the assay name of the SR object |
|
|
16 |
#' @param sample_name the name of the sample |
|
|
17 |
#' @param use_image if TRUE, the DAPI image will be used. |
|
|
18 |
#' @param morphology_image the name of the lowred morphology image. Default: morphology_lowres.tif |
|
|
19 |
#' @param resolution_level the level of resolution within Xenium OME-TIFF image, see \link{generateXeniumImage}. Default: 7 (553x402) |
|
|
20 |
#' @param overwrite_resolution if TRUE, the image "file.name" will be generated again although it exists at "dir.path" |
|
|
21 |
#' @param image_name the image name of the Xenium assay, Default: main |
|
|
22 |
#' @param channel_name the channel name of the image of the Xenium assay, Default: DAPI |
|
|
23 |
#' @param import_molecules if TRUE, molecule assay will be created along with cell assay. |
|
|
24 |
#' @param verbose verbose |
|
|
25 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
26 |
#' |
|
|
27 |
#' @importFrom magick image_read image_info |
|
|
28 |
#' @importFrom utils read.csv |
|
|
29 |
#' @importFrom data.table fread |
|
|
30 |
#' @importFrom ids random_id |
|
|
31 |
#' |
|
|
32 |
#' @export |
|
|
33 |
#' |
|
|
34 |
importXenium <- function (dir.path, selected_assay = "Gene Expression", assay_name = "Xenium", sample_name = NULL, use_image = TRUE, |
|
|
35 |
morphology_image = "morphology_lowres.tif", resolution_level = 7, overwrite_resolution = FALSE, |
|
|
36 |
image_name = "main", channel_name = "DAPI", import_molecules = FALSE, verbose = TRUE, ...) |
|
|
37 |
{ |
|
|
38 |
# cell assay |
|
|
39 |
if(verbose) |
|
|
40 |
message("Creating cell level assay ...") |
|
|
41 |
|
|
|
42 |
# raw counts |
|
|
43 |
datafile <- paste0(dir.path, "/cell_feature_matrix.h5") |
|
|
44 |
if(file.exists(datafile)){ |
|
|
45 |
rawdata <- import10Xh5(filename = datafile) |
|
|
46 |
if(any(names(rawdata) %in% selected_assay)){ |
|
|
47 |
rawdata <- as.matrix(rawdata[[selected_assay]]) |
|
|
48 |
} else { |
|
|
49 |
stop("There are no assays called ", selected_assay, " in the h5 file!") |
|
|
50 |
} |
|
|
51 |
} else { |
|
|
52 |
stop("There are no files named 'cell_feature_matrix.h5' in the path") |
|
|
53 |
} |
|
|
54 |
|
|
|
55 |
# image |
|
|
56 |
if(use_image){ |
|
|
57 |
generateXeniumImage(dir.path, file.name = morphology_image, resolution_level = resolution_level, overwrite_resolution = overwrite_resolution, |
|
|
58 |
verbose = verbose) |
|
|
59 |
image_file <- paste0(dir.path, "/", morphology_image) |
|
|
60 |
if(file.exists(image_file)){ |
|
|
61 |
image <- image_read(image_file) |
|
|
62 |
} else { |
|
|
63 |
stop("There are no spatial image files in the path") |
|
|
64 |
} |
|
|
65 |
# scale the xenium image instructed by 10x Genomics help page |
|
|
66 |
scaleparam <- 0.2125*(2^(resolution_level-1)) |
|
|
67 |
} else { |
|
|
68 |
image <- NULL |
|
|
69 |
} |
|
|
70 |
|
|
|
71 |
# coordinates |
|
|
72 |
coord_file <- paste0(dir.path, "/cells.csv.gz") |
|
|
73 |
if(file.exists(coord_file)){ |
|
|
74 |
Xenium_coords <- utils::read.csv(file = coord_file) |
|
|
75 |
coords <- as.matrix(Xenium_coords[,c("x_centroid", "y_centroid")]) |
|
|
76 |
colnames(coords) <- c("x","y") |
|
|
77 |
if(use_image) { |
|
|
78 |
coords <- coords/scaleparam |
|
|
79 |
imageinfo <- unlist(magick::image_info(image)[c("height")]) |
|
|
80 |
range_coords <- c(0,imageinfo) |
|
|
81 |
} else { |
|
|
82 |
range_coords <- range(coords[,2]) |
|
|
83 |
} |
|
|
84 |
coords[,2] <- range_coords[2] - coords[,2] + range_coords[1] |
|
|
85 |
} else { |
|
|
86 |
stop("There are no file named 'cells.csv.gz' in the path") |
|
|
87 |
} |
|
|
88 |
|
|
|
89 |
# segments |
|
|
90 |
segments_file <- paste0(dir.path, "/cell_boundaries.csv.gz") |
|
|
91 |
if(file.exists(segments_file)){ |
|
|
92 |
segments <- as.data.frame(data.table::fread(segments_file)) |
|
|
93 |
segments <- segments[,c("cell_id", "vertex_x", "vertex_y")] |
|
|
94 |
colnames(segments) <- c("cell_id", "x", "y") |
|
|
95 |
if(use_image){ |
|
|
96 |
segments[,c("x","y")] <- segments[,c("x","y")]/scaleparam |
|
|
97 |
} |
|
|
98 |
segments[,"y"] <- range_coords[2] - segments[,"y"] + range_coords[1] |
|
|
99 |
segments <- segments %>% dplyr::group_split(cell_id) |
|
|
100 |
segments <- as.list(segments) |
|
|
101 |
names(segments) <- rownames(coords) |
|
|
102 |
} else { |
|
|
103 |
stop("There are no file named 'cell_boundaries.csv.gz' in the path") |
|
|
104 |
} |
|
|
105 |
|
|
|
106 |
# create VoltRon object for cells |
|
|
107 |
cell_object <- formVoltRon(rawdata, metadata = NULL, image = image, coords, segments = segments, main.assay = assay_name, |
|
|
108 |
assay.type = "cell", image_name = image_name, main_channel = channel_name, sample_name = sample_name, |
|
|
109 |
feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...) |
|
|
110 |
|
|
|
111 |
# molecule assay |
|
|
112 |
if(!import_molecules){ |
|
|
113 |
return(cell_object) |
|
|
114 |
} else { |
|
|
115 |
if(verbose) |
|
|
116 |
message("Creating molecule level assay ...") |
|
|
117 |
# transcripts |
|
|
118 |
transcripts_file <- paste0(dir.path, "/transcripts.csv.gz") |
|
|
119 |
if(!file.exists(transcripts_file)){ |
|
|
120 |
stop("There are no file named 'transcripts.csv.gz' in the path") |
|
|
121 |
} else { |
|
|
122 |
# get subcellur data components |
|
|
123 |
subcellular_data <- data.table::fread(transcripts_file) |
|
|
124 |
subcellular_data <- subcellular_data[,c("transcript_id", colnames(subcellular_data)[!colnames(subcellular_data) %in% "transcript_id"]), with = FALSE] |
|
|
125 |
colnames(subcellular_data)[colnames(subcellular_data)=="transcript_id"] <- "id" |
|
|
126 |
colnames(subcellular_data)[colnames(subcellular_data)=="feature_name"] <- "gene" |
|
|
127 |
subcellular_data <- subcellular_data[subcellular_data$qv >= 20, ] |
|
|
128 |
|
|
|
129 |
# coordinates |
|
|
130 |
coords <- as.matrix(subcellular_data[,c("x_location", "y_location", "z_location")]) |
|
|
131 |
colnames(coords) <- c("x", "y", "z") |
|
|
132 |
if(use_image){ |
|
|
133 |
coords[,c("x", "y")] <- coords[,c("x", "y")]/scaleparam |
|
|
134 |
} |
|
|
135 |
coords[,"y"] <- range_coords[2] - coords[,"y"] + range_coords[1] |
|
|
136 |
|
|
|
137 |
# metadata |
|
|
138 |
# mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell_id", "transcript_id", "x_location", "y_location")], with = FALSE] |
|
|
139 |
mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell_id", "transcript_id", "x_location", "y_location", "z_location")], with = FALSE] |
|
|
140 |
set.seed(nrow(mol_metadata$id)) |
|
|
141 |
mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE)) |
|
|
142 |
mol_metadata$assay_id <- "Assay1" |
|
|
143 |
mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")] |
|
|
144 |
|
|
|
145 |
# coord names |
|
|
146 |
rownames(coords) <- mol_metadata$id |
|
|
147 |
|
|
|
148 |
# create VoltRon assay for molecules |
|
|
149 |
mol_assay <- formAssay(coords = coords, image = image, type = "molecule", main_image = image_name, main_channel = channel_name) |
|
|
150 |
|
|
|
151 |
# merge assays in one section |
|
|
152 |
if(verbose) |
|
|
153 |
message("Merging assays ...") |
|
|
154 |
sample.metadata <- SampleMetadata(cell_object) |
|
|
155 |
object <- addAssay(cell_object, |
|
|
156 |
assay = mol_assay, |
|
|
157 |
metadata = mol_metadata, |
|
|
158 |
assay_name = paste0(assay_name, "_mol"), |
|
|
159 |
sample = sample.metadata["Assay1", "Sample"], |
|
|
160 |
layer = sample.metadata["Assay1", "Layer"]) |
|
|
161 |
|
|
|
162 |
# connectivity |
|
|
163 |
connectivity <- data.table::data.table(transcript_id = mol_metadata$id, cell_id = subcellular_data[["cell_id"]]) |
|
|
164 |
if(is.numeric(connectivity$cell_id)){ |
|
|
165 |
connectivity <- subset(connectivity, cell_id != -1) |
|
|
166 |
connectivity[["cell_id"]] <- vrSpatialPoints(cell_object)[connectivity[["cell_id"]]] |
|
|
167 |
} else { |
|
|
168 |
connectivity <- subset(connectivity, cell_id != "UNASSIGNED") |
|
|
169 |
connectivity$cell_assay_id <- "_Assay1" |
|
|
170 |
connectivity[, cell_id:=do.call(paste0,.SD), .SDcols=c("cell_id", "cell_assay_id")] |
|
|
171 |
connectivity$cell_assay_id <- NULL |
|
|
172 |
} |
|
|
173 |
|
|
|
174 |
# add connectivity of spatial points across assays |
|
|
175 |
object <- addLayerConnectivity(object, |
|
|
176 |
connectivity = connectivity, |
|
|
177 |
sample = sample.metadata["Assay1", "Sample"], |
|
|
178 |
layer = sample.metadata["Assay1", "Layer"]) |
|
|
179 |
|
|
|
180 |
# return |
|
|
181 |
return(object) |
|
|
182 |
} |
|
|
183 |
} |
|
|
184 |
} |
|
|
185 |
|
|
|
186 |
#' generateXeniumImage |
|
|
187 |
#' |
|
|
188 |
#' Generate a low resolution DAPI image of the Xenium experiment |
|
|
189 |
#' |
|
|
190 |
#' @param dir.path Xenium output folder |
|
|
191 |
#' @param increase.contrast increase the contrast of the image before writing |
|
|
192 |
#' @param resolution_level the level of resolution within Xenium OME-TIFF image. Default: 7 (553x402) |
|
|
193 |
#' @param overwrite_resolution if TRUE, the image "file.name" will be generated again although it exists at "dir.path" |
|
|
194 |
#' @param output.path The path to the new morphology image created if the image should be saved to a location other than Xenium output folder. |
|
|
195 |
#' @param file.name the name of the lowred morphology image. Default: morphology_lowres.tif |
|
|
196 |
#' @param verbose verbose |
|
|
197 |
#' @param ... additional parameters passed to the \link{writeImage} function |
|
|
198 |
#' |
|
|
199 |
#' @importFrom EBImage writeImage |
|
|
200 |
#' |
|
|
201 |
#' @details |
|
|
202 |
#' The Xenium morphology_mip.ome.tif file that is found under the outs folder comes is an hyperstack of different resolutions of the DAPI image. |
|
|
203 |
#' \link{generateXeniumImage} allows extracting only one of these layers by specifying the \code{resolution} parameter (Default: 7 for 553x402) among 1 to 8. |
|
|
204 |
#' Lower incides of resolutions have higher higher resolutions, e.g. 1 for 35416x25778. Note that you may need to allocate larger memory of Java to import |
|
|
205 |
#' higher resolution images. |
|
|
206 |
#' |
|
|
207 |
#' @export |
|
|
208 |
#' |
|
|
209 |
generateXeniumImage <- function(dir.path, increase.contrast = TRUE, resolution_level = 7, overwrite_resolution = FALSE, |
|
|
210 |
output.path = NULL, file.name = "morphology_lowres.tif", verbose = TRUE, ...) { |
|
|
211 |
|
|
|
212 |
# file path to either Xenium output folder or specified folder |
|
|
213 |
file.path <- paste0(dir.path, "/", file.name) |
|
|
214 |
output.file <- paste0(output.path, "/", file.name) |
|
|
215 |
|
|
|
216 |
# check if the file exists in either Xenium output folder, or the specified location |
|
|
217 |
if((file.exists(file.path) | file.exists(paste0(output.file))) & !overwrite_resolution){ |
|
|
218 |
if(verbose) |
|
|
219 |
message(file.name, " already exists!") |
|
|
220 |
} else { |
|
|
221 |
if (!requireNamespace('RBioFormats')) |
|
|
222 |
stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") |
|
|
223 |
if(dir.exists(paste0(dir.path, "/morphology_focus"))){ |
|
|
224 |
if(verbose) |
|
|
225 |
message("Loading morphology_focus_0000.ome.tif ...") |
|
|
226 |
morphology_image_lowres <- RBioFormats::read.image(paste0(dir.path, "/morphology_focus/morphology_focus_0000.ome.tif"), |
|
|
227 |
resolution = resolution_level, |
|
|
228 |
subset=list(C=1)) |
|
|
229 |
} else if(file.exists(paste0(dir.path, "/morphology_mip.ome.tif"))) { |
|
|
230 |
if(verbose) |
|
|
231 |
message("Loading morphology_mip.ome.tif ...") |
|
|
232 |
morphology_image_lowres <- RBioFormats::read.image(paste0(dir.path, "/morphology_mip.ome.tif"), resolution = resolution_level) |
|
|
233 |
} |
|
|
234 |
|
|
|
235 |
# pick a resolution level |
|
|
236 |
image_info <- morphology_image_lowres@metadata$coreMetadata |
|
|
237 |
if(verbose) |
|
|
238 |
message(" Image Resolution (X:", image_info$sizeX, " Y:", image_info$sizeY, ") ...") |
|
|
239 |
|
|
|
240 |
# increase contrast using EBImage |
|
|
241 |
if(increase.contrast) { |
|
|
242 |
if(verbose) |
|
|
243 |
message(" Increasing Contrast ...") |
|
|
244 |
morphology_image_lowres <- (morphology_image_lowres/max(morphology_image_lowres)) |
|
|
245 |
} |
|
|
246 |
|
|
|
247 |
# write to the same folder |
|
|
248 |
if(verbose) |
|
|
249 |
message(" Writing Tiff File ...") |
|
|
250 |
if(is.null(output.path)){ |
|
|
251 |
EBImage::writeImage(morphology_image_lowres, file = file.path, ...) |
|
|
252 |
} else { |
|
|
253 |
EBImage::writeImage(morphology_image_lowres, file = output.file, ...) |
|
|
254 |
} |
|
|
255 |
} |
|
|
256 |
invisible() |
|
|
257 |
} |
|
|
258 |
|
|
|
259 |
#### |
|
|
260 |
## Visium #### |
|
|
261 |
#### |
|
|
262 |
|
|
|
263 |
#' importVisium |
|
|
264 |
#' |
|
|
265 |
#' Importing Visium data |
|
|
266 |
#' |
|
|
267 |
#' @param dir.path path to Visium output folder |
|
|
268 |
#' @param selected_assay selected assay from Visium |
|
|
269 |
#' @param assay_name the assay name |
|
|
270 |
#' @param sample_name the name of the sample |
|
|
271 |
#' @param image_name the image name of the Visium assay, Default: main |
|
|
272 |
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E |
|
|
273 |
#' @param inTissue if TRUE, only barcodes that are in the tissue will be kept (default: TRUE) |
|
|
274 |
#' @param resolution_level the level of resolution of Visium image: "lowres" (default) or "hires" |
|
|
275 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
276 |
#' |
|
|
277 |
#' @importFrom magick image_read image_info |
|
|
278 |
#' @importFrom rjson fromJSON |
|
|
279 |
#' @importFrom utils read.csv |
|
|
280 |
#' |
|
|
281 |
#' @export |
|
|
282 |
#' |
|
|
283 |
importVisium <- function(dir.path, selected_assay = "Gene Expression", assay_name = "Visium", sample_name = NULL, image_name = "main", channel_name = "H&E", inTissue = TRUE, resolution_level = "lowres", ...) |
|
|
284 |
{ |
|
|
285 |
# raw counts |
|
|
286 |
listoffiles <- list.files(dir.path) |
|
|
287 |
datafile <- listoffiles[grepl("filtered_feature_bc_matrix.h5", listoffiles)][1] |
|
|
288 |
datafile <- paste0(dir.path, "/", datafile) |
|
|
289 |
if(file.exists(datafile)){ |
|
|
290 |
rawdata <- import10Xh5(filename = datafile) |
|
|
291 |
if(any(names(rawdata) %in% selected_assay)){ |
|
|
292 |
rawdata <- as.matrix(rawdata[[selected_assay]]) |
|
|
293 |
} else { |
|
|
294 |
stop("There are no assays called ", selected_assay, " in the h5 file!") |
|
|
295 |
} |
|
|
296 |
} else { |
|
|
297 |
stop("There are no files named 'filtered_feature_bc_matrix.h5' in the path") |
|
|
298 |
} |
|
|
299 |
|
|
|
300 |
# resolution |
|
|
301 |
if(!resolution_level %in% c("lowres","hires")) |
|
|
302 |
stop("resolution_level should be either 'lowres' or 'hires'!") |
|
|
303 |
|
|
|
304 |
# image |
|
|
305 |
image_file <- paste0(dir.path, paste0("/spatial/tissue_", resolution_level, "_image.png")) |
|
|
306 |
if(file.exists(image_file)){ |
|
|
307 |
image <- magick::image_read(image_file) |
|
|
308 |
info <- image_info(image) |
|
|
309 |
} else { |
|
|
310 |
stop("There are no spatial image files in the path") |
|
|
311 |
} |
|
|
312 |
|
|
|
313 |
# coordinates |
|
|
314 |
coords_file <- list.files(paste0(dir.path, "/spatial/"), full.names = TRUE) |
|
|
315 |
coords_file <- coords_file[grepl("tissue_positions",coords_file)] |
|
|
316 |
if(length(coords_file) == 1){ |
|
|
317 |
if(grepl("tissue_positions_list.csv", coords_file)) { |
|
|
318 |
coords <- utils::read.csv(file = coords_file, header = FALSE) |
|
|
319 |
colnames(coords) <- c("barcode", "in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres") |
|
|
320 |
} else { |
|
|
321 |
coords <- utils::read.csv(file = coords_file, header = TRUE) |
|
|
322 |
} |
|
|
323 |
} else if(length(coords_file) > 1) { |
|
|
324 |
stop("There are more than 1 position files in the path") |
|
|
325 |
} else { |
|
|
326 |
stop("There are no files named 'tissue_positions.csv' in the path") |
|
|
327 |
} |
|
|
328 |
|
|
|
329 |
if(inTissue){ |
|
|
330 |
coords <- coords[coords$in_tissue==1,] |
|
|
331 |
rawdata <- rawdata[,colnames(rawdata) %in% coords$barcode] |
|
|
332 |
} |
|
|
333 |
coords <- coords[match(colnames(rawdata), coords$barcode),] |
|
|
334 |
spotID <- coords$barcode |
|
|
335 |
coords <- as.matrix(coords[,c("pxl_col_in_fullres", "pxl_row_in_fullres")], ) |
|
|
336 |
colnames(coords) <- c("x", "y") |
|
|
337 |
rownames(coords) <- spotID |
|
|
338 |
|
|
|
339 |
# scale coordinates |
|
|
340 |
scale_file <- paste0(dir.path, "/spatial/scalefactors_json.json") |
|
|
341 |
if(file.exists(scale_file)){ |
|
|
342 |
scalefactors <- rjson::fromJSON(file = scale_file) |
|
|
343 |
scales <- scalefactors[[paste0("tissue_", resolution_level, "_scalef")]] |
|
|
344 |
params <- list( |
|
|
345 |
nearestpost.distance = 200*scales, # distance to nearest spot |
|
|
346 |
spot.radius = scalefactors$spot_diameter_fullres*scales, |
|
|
347 |
vis.spot.radius = scalefactors$spot_diameter_fullres*scales*2, |
|
|
348 |
spot.type = "circle") |
|
|
349 |
coords <- coords*scales |
|
|
350 |
coords[,2] <- info$height - coords[,2] |
|
|
351 |
} else { |
|
|
352 |
stop("There are no files named 'scalefactors_json.json' in the path") |
|
|
353 |
} |
|
|
354 |
|
|
|
355 |
# create VoltRon |
|
|
356 |
formVoltRon(rawdata, metadata = NULL, image, coords, main.assay = assay_name, params = params, assay.type = "spot", |
|
|
357 |
image_name = image_name, main_channel = channel_name, sample_name = sample_name, |
|
|
358 |
feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...) |
|
|
359 |
} |
|
|
360 |
|
|
|
361 |
#' importVisiumHD |
|
|
362 |
#' |
|
|
363 |
#' Importing VisiumHD data |
|
|
364 |
#' |
|
|
365 |
#' @param dir.path path to Visium output folder |
|
|
366 |
#' @param bin.size bin size of the VisiumHD output (Exp: "2", "8" and "16") |
|
|
367 |
#' @param selected_assay selected assay from Visium |
|
|
368 |
#' @param assay_name the assay name |
|
|
369 |
#' @param sample_name the name of the sample |
|
|
370 |
#' @param image_name the image name of the Visium assay, Default: main |
|
|
371 |
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E |
|
|
372 |
#' @param inTissue if TRUE, only barcodes that are in the tissue will be kept (default: TRUE) |
|
|
373 |
#' @param resolution_level the level of resolution of Visium image: "lowres" (default) or "hires" |
|
|
374 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
375 |
#' |
|
|
376 |
#' @importFrom magick image_read image_info |
|
|
377 |
#' @importFrom rjson fromJSON |
|
|
378 |
#' @importFrom utils read.csv |
|
|
379 |
#' |
|
|
380 |
#' @export |
|
|
381 |
#' |
|
|
382 |
importVisiumHD <- function(dir.path, bin.size = "8", selected_assay = "Gene Expression", assay_name = "VisiumHD", sample_name = NULL, image_name = "main", channel_name = "H&E", inTissue = TRUE, resolution_level = "lowres", ...){ |
|
|
383 |
|
|
|
384 |
# raw counts |
|
|
385 |
bin.size <- formatC(as.numeric(bin.size), width = 3, format = "d", flag = "0") |
|
|
386 |
dir.path <- paste0(dir.path, "binned_outputs/square_", bin.size, "um/") |
|
|
387 |
listoffiles <- list.files(dir.path) |
|
|
388 |
datafile <- listoffiles[grepl("filtered_feature_bc_matrix.h5", listoffiles)][1] |
|
|
389 |
datafile <- paste0(dir.path, "/", datafile) |
|
|
390 |
if(file.exists(datafile)){ |
|
|
391 |
rawdata <- import10Xh5(filename = datafile) |
|
|
392 |
if(any(names(rawdata) %in% selected_assay)){ |
|
|
393 |
rawdata <- as(rawdata[[selected_assay]], "CsparseMatrix") |
|
|
394 |
} else { |
|
|
395 |
stop("There are no assays called ", selected_assay, " in the h5 file!") |
|
|
396 |
} |
|
|
397 |
} else { |
|
|
398 |
stop("There are no files named 'filtered_feature_bc_matrix.h5' in the path") |
|
|
399 |
} |
|
|
400 |
|
|
|
401 |
# resolution |
|
|
402 |
if(!resolution_level %in% c("lowres","hires")) |
|
|
403 |
stop("resolution_level should be either 'lowres' or 'hires'!") |
|
|
404 |
|
|
|
405 |
# image |
|
|
406 |
image_file <- paste0(dir.path, paste0("/spatial/tissue_", resolution_level, "_image.png")) |
|
|
407 |
if(file.exists(image_file)){ |
|
|
408 |
image <- magick::image_read(image_file) |
|
|
409 |
info <- magick::image_info(image) |
|
|
410 |
} else { |
|
|
411 |
stop("There are no spatial image files in the path") |
|
|
412 |
} |
|
|
413 |
|
|
|
414 |
# coordinates |
|
|
415 |
if(!requireNamespace("arrow")) |
|
|
416 |
stop("Please install arrow package to import VisiumHD!: install.packages('arrow')") |
|
|
417 |
coords_file <- list.files(paste0(dir.path, "/spatial/"), full.names = TRUE) |
|
|
418 |
coords_file <- coords_file[grepl("tissue_positions",coords_file)] |
|
|
419 |
if(length(coords_file) == 1){ |
|
|
420 |
coords <- data.table::as.data.table(arrow::read_parquet(coords_file, as_data_frame = FALSE)) |
|
|
421 |
} else if(length(coords_file) > 1) { |
|
|
422 |
stop("There are more than 1 position files in the path") |
|
|
423 |
} else { |
|
|
424 |
stop("There are no files named 'tissue_positions.parquet' in the path") |
|
|
425 |
} |
|
|
426 |
if(inTissue){ |
|
|
427 |
coords <- subset(coords, in_tissue == 1) |
|
|
428 |
rawdata <- rawdata[,colnames(rawdata) %in% coords$barcode] |
|
|
429 |
} |
|
|
430 |
coords <- coords[match(colnames(rawdata), coords$barcode),] |
|
|
431 |
spotID <- coords$barcode |
|
|
432 |
coords <- as.matrix(coords[,c("pxl_col_in_fullres", "pxl_row_in_fullres")], ) |
|
|
433 |
colnames(coords) <- c("x", "y") |
|
|
434 |
rownames(coords) <- spotID |
|
|
435 |
|
|
|
436 |
# scale coordinates |
|
|
437 |
scale_file <- paste0(dir.path, "/spatial/scalefactors_json.json") |
|
|
438 |
if(file.exists(scale_file)){ |
|
|
439 |
scalefactors <- rjson::fromJSON(file = scale_file) |
|
|
440 |
scales <- scalefactors[[paste0("tissue_", resolution_level, "_scalef")]] |
|
|
441 |
params <- list( |
|
|
442 |
nearestpost.distance = scalefactors$spot_diameter_fullres*scales*(3/sqrt(2)), |
|
|
443 |
spot.radius = scalefactors$spot_diameter_fullres*scales, |
|
|
444 |
vis.spot.radius = scalefactors$spot_diameter_fullres*scales, |
|
|
445 |
spot.type = "square") |
|
|
446 |
coords <- coords*scales |
|
|
447 |
coords[,2] <- info$height - coords[,2] |
|
|
448 |
} else { |
|
|
449 |
stop("There are no files named 'scalefactors_json.json' in the path") |
|
|
450 |
} |
|
|
451 |
|
|
|
452 |
# create VoltRon |
|
|
453 |
formVoltRon(rawdata, metadata = NULL, image, coords, main.assay = assay_name, params = params, assay.type = "spot", |
|
|
454 |
image_name = image_name, main_channel = channel_name, sample_name = sample_name, |
|
|
455 |
feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...) |
|
|
456 |
} |
|
|
457 |
|
|
|
458 |
#### |
|
|
459 |
## Auxiliary #### |
|
|
460 |
#### |
|
|
461 |
|
|
|
462 |
#' import10Xh5 |
|
|
463 |
#' |
|
|
464 |
#' import the sparse matrix from the H5 file |
|
|
465 |
#' |
|
|
466 |
#' @param filename the path to h5 file |
|
|
467 |
#' |
|
|
468 |
#' @importFrom Matrix sparseMatrix |
|
|
469 |
#' |
|
|
470 |
#' @export |
|
|
471 |
import10Xh5 <- function(filename){ |
|
|
472 |
|
|
|
473 |
# check package |
|
|
474 |
if(!requireNamespace("rhdf5")){ |
|
|
475 |
stop("You have to install the rhdf5 package!: BiocManager::install('rhdf5')") |
|
|
476 |
} |
|
|
477 |
|
|
|
478 |
# check file |
|
|
479 |
if(file.exists(filename)){ |
|
|
480 |
matrix.10X <- rhdf5::h5read(file = filename, name = "matrix") |
|
|
481 |
} else { |
|
|
482 |
stop("There are no files named ", filename," in the path") |
|
|
483 |
} |
|
|
484 |
|
|
|
485 |
# get data, barcodes and feature |
|
|
486 |
features <- matrix.10X[["features"]][["name"]] |
|
|
487 |
feature_type <- matrix.10X[["features"]][["feature_type"]] |
|
|
488 |
cells <- matrix.10X[["barcodes"]] |
|
|
489 |
mat <- matrix.10X[["data"]] |
|
|
490 |
indices <- matrix.10X[["indices"]] |
|
|
491 |
indptr <- matrix.10X[["indptr"]] |
|
|
492 |
sparse.mat <- sparseMatrix(i = indices + 1, p = indptr, |
|
|
493 |
x = as.numeric(mat), dims = c(length(features), length(cells)), |
|
|
494 |
repr = "T") |
|
|
495 |
colnames(sparse.mat) <- cells |
|
|
496 |
rownames(sparse.mat) <- make.unique(features) |
|
|
497 |
|
|
|
498 |
# separate feature types |
|
|
499 |
matrix.10X <- list() |
|
|
500 |
for(feat in unique(feature_type)){ |
|
|
501 |
cur_features <- features[feature_type %in% feat] |
|
|
502 |
cur_mat <- sparse.mat[features %in% cur_features,] |
|
|
503 |
matrix.10X[[feat]] <- cur_mat |
|
|
504 |
} |
|
|
505 |
|
|
|
506 |
return(matrix.10X) |
|
|
507 |
} |
|
|
508 |
|
|
|
509 |
#### |
|
|
510 |
# Nanostring #### |
|
|
511 |
#### |
|
|
512 |
|
|
|
513 |
#### |
|
|
514 |
## GeoMx #### |
|
|
515 |
#### |
|
|
516 |
|
|
|
517 |
#' importGeoMx |
|
|
518 |
#' |
|
|
519 |
#' Import GeoMx data |
|
|
520 |
#' |
|
|
521 |
#' @param dcc.path path to the folder where the dcc files are found |
|
|
522 |
#' @param pkc.file path to the pkc file |
|
|
523 |
#' @param summarySegment the metadata csv (sep = ";") or excel file, if the file is an excel file, \code{summarySegmentSheetName} should be provided as well. |
|
|
524 |
#' @param summarySegmentSheetName the sheet name of the excel file, \code{summarySegment} |
|
|
525 |
#' @param assay_name the assay name, default: GeoMx |
|
|
526 |
#' @param image the reference morphology image of the GeoMx assay |
|
|
527 |
#' @param segment_polygons if TRUE, the ROI polygons are parsed from the OME.TIFF file |
|
|
528 |
#' @param ome.tiff the OME.TIFF file of the GeoMx experiment if exists |
|
|
529 |
#' @param resolution_level the level of resolution within GeoMx OME-TIFF image, Default: 3 |
|
|
530 |
#' @param image_name the image name of the Visium assay, Default: main |
|
|
531 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
532 |
#' |
|
|
533 |
#' @importFrom dplyr %>% full_join |
|
|
534 |
#' @importFrom utils read.csv |
|
|
535 |
#' @importFrom magick image_info image_read |
|
|
536 |
#' |
|
|
537 |
#' @export |
|
|
538 |
#' |
|
|
539 |
importGeoMx <- function(dcc.path, pkc.file, summarySegment, summarySegmentSheetName, assay_name = "GeoMx", |
|
|
540 |
image = NULL, segment_polygons = FALSE, ome.tiff = NULL, resolution_level = 3, image_name = "main", ...) |
|
|
541 |
{ |
|
|
542 |
# Get pkc file |
|
|
543 |
if(file.exists(pkc.file)){ |
|
|
544 |
pkcdata <- readPKC(pkc.file) |
|
|
545 |
} else { |
|
|
546 |
stop("pkc file is not found!") |
|
|
547 |
} |
|
|
548 |
|
|
|
549 |
# Get dcc file |
|
|
550 |
if(file.exists(dcc.path)){ |
|
|
551 |
dcc_files <- dir(dcc.path, pattern = ".dcc$", full.names = TRUE) |
|
|
552 |
if(length(dcc_files) == 0){ |
|
|
553 |
stop("no dcc files are found under ", dcc.path) |
|
|
554 |
} else { |
|
|
555 |
dcc_files <- dcc_files[!grepl("A01.dcc$", dcc_files)] |
|
|
556 |
dcc_filenames <- dir(dcc.path, pattern = ".dcc$", full.names = FALSE) |
|
|
557 |
dcc_filenames <- dcc_filenames[!grepl("A01.dcc$", dcc_filenames)] |
|
|
558 |
dcc_filenames <- gsub(".dcc$", "", dcc_filenames) |
|
|
559 |
dcc_filenames <- gsub("-", "_", dcc_filenames) |
|
|
560 |
dccData <- sapply(dcc_files, readDCC, simplify = FALSE, USE.NAMES = FALSE) |
|
|
561 |
names(dccData) <- dcc_filenames |
|
|
562 |
} |
|
|
563 |
} else { |
|
|
564 |
stop("path to dcc files does not exist!") |
|
|
565 |
} |
|
|
566 |
|
|
|
567 |
# merge dcc files |
|
|
568 |
rawdata <- NULL |
|
|
569 |
for(i in seq_len(length(dccData))){ |
|
|
570 |
cur_data <- dccData[[i]]$Code_Summary |
|
|
571 |
colnames(cur_data) <- c("RTS_ID", dcc_filenames[i]) |
|
|
572 |
if(i == 1){ |
|
|
573 |
rawdata <- cur_data |
|
|
574 |
} else { |
|
|
575 |
suppressMessages(rawdata <- rawdata %>% full_join(cur_data)) |
|
|
576 |
} |
|
|
577 |
} |
|
|
578 |
rawdata[is.na(rawdata)] <- 0 |
|
|
579 |
rownames(rawdata) <- rawdata$RTS_ID |
|
|
580 |
rawdata <- rawdata[,!colnames(rawdata) %in% "RTS_ID"] |
|
|
581 |
|
|
|
582 |
# get negative probes and targets |
|
|
583 |
# NegProbes <- pkcdata$RTS_ID[pkcdata$Target == "NegProbe-WTX"] |
|
|
584 |
NegProbes <- pkcdata$RTS_ID[grepl("NegProbe", pkcdata$Target)] |
|
|
585 |
|
|
|
586 |
# negative probes |
|
|
587 |
rawdata_neg <- rawdata[rownames(rawdata) %in% NegProbes, ] |
|
|
588 |
rownames(rawdata_neg) <- paste0(rownames(rawdata_neg), "_", |
|
|
589 |
pkcdata$Target[match(rownames(rawdata_neg), pkcdata$RTS_ID)]) |
|
|
590 |
rawdata_neg <- as.matrix(rawdata_neg) |
|
|
591 |
|
|
|
592 |
# other probes |
|
|
593 |
rawdata <- rawdata[!rownames(rawdata) %in% NegProbes, ] |
|
|
594 |
rownames(rawdata) <- pkcdata$Target[match(rownames(rawdata), pkcdata$RTS_ID)] |
|
|
595 |
rawdata <- as.matrix(rawdata) |
|
|
596 |
|
|
|
597 |
# get segment summary |
|
|
598 |
if(file.exists(summarySegment)){ |
|
|
599 |
if(grepl(".xls$|.xlsx$", summarySegment)){ |
|
|
600 |
if (!requireNamespace('xlsx')) |
|
|
601 |
stop("Please install xlsx package for using the read.xlsx function!: install.packages('xlsx')") |
|
|
602 |
if(!is.null(summarySegmentSheetName)){ |
|
|
603 |
segmentsummary <- xlsx::read.xlsx(summarySegment, sheetName = summarySegmentSheetName) |
|
|
604 |
} else { |
|
|
605 |
stop("Please provide 'summarySegmentSheetName' for the excel sheet name!") |
|
|
606 |
} |
|
|
607 |
} else if(grepl(".csv$", summarySegment)) { |
|
|
608 |
segmentsummary <- utils::read.csv(summarySegment, row.names = NULL, header = T, sep = ";") |
|
|
609 |
} |
|
|
610 |
rownames(segmentsummary) <- gsub(".dcc$", "", segmentsummary$Sample_ID) |
|
|
611 |
rownames(segmentsummary) <- gsub("-", "_", rownames(segmentsummary)) |
|
|
612 |
if(all(dcc_filenames %in% rownames(segmentsummary))){ |
|
|
613 |
segmentsummary <- segmentsummary[dcc_filenames, ] |
|
|
614 |
} else{ |
|
|
615 |
stop("Some GeoMx dcc files are not represented in the segment summary file!") |
|
|
616 |
} |
|
|
617 |
} else { |
|
|
618 |
stop(summarySegment, " is not found!") |
|
|
619 |
} |
|
|
620 |
|
|
|
621 |
# get image |
|
|
622 |
if(!is.null(image)){ |
|
|
623 |
if(file.exists(image)){ |
|
|
624 |
image <- magick::image_read(image) |
|
|
625 |
} else { |
|
|
626 |
stop(image, " is not found!") |
|
|
627 |
} |
|
|
628 |
} else { |
|
|
629 |
stop("Please provide a image for the GeoMx data!") |
|
|
630 |
} |
|
|
631 |
geomx_image_info <- magick::image_info(image) |
|
|
632 |
|
|
|
633 |
# get coordinates |
|
|
634 |
coords <- segmentsummary[,c("X","Y")] |
|
|
635 |
colnames(coords) <- c("x", "y") |
|
|
636 |
rownames(coords) <- segmentsummary$ROI.name |
|
|
637 |
coords <- rescaleGeoMxPoints(coords, segmentsummary, geomx_image_info) |
|
|
638 |
|
|
|
639 |
# get ROI segments (polygons) |
|
|
640 |
segments <- list() |
|
|
641 |
if(!is.null(ome.tiff)){ |
|
|
642 |
|
|
|
643 |
# get segments |
|
|
644 |
segments <- importGeoMxSegments(ome.tiff, segmentsummary, geomx_image_info) |
|
|
645 |
segments <- segments[rownames(coords)] |
|
|
646 |
|
|
|
647 |
# parse channels if ome.tiff is given |
|
|
648 |
channels <- importGeoMxChannels(ome.tiff, segmentsummary, geomx_image_info, resolution_level = resolution_level) |
|
|
649 |
image <- c(list(scanimage = image), channels) |
|
|
650 |
} |
|
|
651 |
|
|
|
652 |
# create VoltRon for non-negative probes |
|
|
653 |
object <- formVoltRon(rawdata, metadata = segmentsummary, image, coords, segments, main.assay = assay_name, assay.type = "ROI", |
|
|
654 |
feature_name = "RNA", image_name = image_name, ...) |
|
|
655 |
|
|
|
656 |
# add negative probe assay as new feature set |
|
|
657 |
object <- addFeature(object, assay = assay_name, data = rawdata_neg, feature_name = "NegProbe") |
|
|
658 |
|
|
|
659 |
# return |
|
|
660 |
return(object) |
|
|
661 |
} |
|
|
662 |
|
|
|
663 |
#' readPKC |
|
|
664 |
#' |
|
|
665 |
#' Read a NanoString Probe Kit Configuration (PKC) file. Adapted from \code{GeomxTools} package. |
|
|
666 |
#' |
|
|
667 |
#' @param file A character string containing the path to the PKC file. |
|
|
668 |
#' @param default_pkc_vers Optional list of pkc file names to use as default if more than one pkc version of each module is provided. |
|
|
669 |
#' |
|
|
670 |
#' @importFrom rjson fromJSON |
|
|
671 |
#' |
|
|
672 |
#' @noRd |
|
|
673 |
readPKC <- function (file, default_pkc_vers = NULL) |
|
|
674 |
{ |
|
|
675 |
pkc_json_list <- lapply(file, function(pkc_file) { |
|
|
676 |
rjson::fromJSON(file = pkc_file) |
|
|
677 |
}) |
|
|
678 |
pkc_names <- unlist(lapply(file, function(pkc_file) { |
|
|
679 |
base_pkc_name <- gsub(".pkc", "", trimws(basename(pkc_file))) |
|
|
680 |
return(base_pkc_name) |
|
|
681 |
})) |
|
|
682 |
names(pkc_json_list) <- pkc_names |
|
|
683 |
pkc_modules <- basename(unlist(lapply(pkc_names, sub, pattern = "_[^_]+$", |
|
|
684 |
replacement = ""))) |
|
|
685 |
names(pkc_modules) <- pkc_names |
|
|
686 |
header <- list(PKCFileName = sapply(pkc_json_list, function(list) list[["Name"]]), |
|
|
687 |
PKCModule = pkc_modules, |
|
|
688 |
PKCFileVersion = sapply(pkc_json_list, function(list) list[["Version"]]), |
|
|
689 |
PKCFileDate = sapply(pkc_json_list, function(list) list[["Date"]]), |
|
|
690 |
AnalyteType = sapply(pkc_json_list, function(list) list[["AnalyteType"]]), |
|
|
691 |
MinArea = sapply(pkc_json_list, function(list) list[["MinArea"]]), |
|
|
692 |
MinNuclei = sapply(pkc_json_list, function(list) list[["MinNuclei"]])) |
|
|
693 |
|
|
|
694 |
multi_idx <- duplicated(header[["PKCModule"]]) |
|
|
695 |
multi_mods <- unique(header[["PKCModule"]][multi_idx]) |
|
|
696 |
if (length(multi_mods) < 1) { |
|
|
697 |
if (!is.null(default_pkc_vers)) { |
|
|
698 |
warning("Only one version found per PKC module. ", |
|
|
699 |
"No PKCs need to be combined. ", "Therefore, no default PKC versions will be used.") |
|
|
700 |
} |
|
|
701 |
} |
|
|
702 |
else { |
|
|
703 |
use_pkc_names <- lapply(multi_mods, function(mod) { |
|
|
704 |
mod_idx <- header[["PKCModule"]] == mod |
|
|
705 |
max_vers <- as.numeric(as.character(max(as.numeric_version(header[["PKCFileVersion"]][mod_idx])))) |
|
|
706 |
max_name <- names(header[["PKCFileVersion"]][header[["PKCFileVersion"]] == max_vers]) |
|
|
707 |
return(max_name) |
|
|
708 |
}) |
|
|
709 |
names(use_pkc_names) <- multi_mods |
|
|
710 |
if (!is.null(default_pkc_vers)) { |
|
|
711 |
default_names <- unlist(lapply(default_pkc_vers, function(pkc_file) { |
|
|
712 |
base_pkc_name <- gsub(".pkc", "", trimws(basename(pkc_file))) |
|
|
713 |
return(base_pkc_name) |
|
|
714 |
})) |
|
|
715 |
default_mods <- unlist(lapply(default_names, sub, pattern = "_[^_]+$", |
|
|
716 |
replacement = "")) |
|
|
717 |
dup_defaults <- default_names[duplicated(default_mods) | |
|
|
718 |
duplicated(default_mods, fromLast = TRUE)] |
|
|
719 |
if (!all(default_names %in% names(header[["PKCFileName"]]))) { |
|
|
720 |
removed_pkcs <- default_pkc_vers[!default_names %in% |
|
|
721 |
names(header[["PKCFileName"]])] |
|
|
722 |
stop("Could not match all default PKC versions with a PKC file name. ", |
|
|
723 |
"Check default file names match exactly to a PKC file name.\n", |
|
|
724 |
paste0("Unmatched default PKC versions: ", |
|
|
725 |
removed_pkcs)) |
|
|
726 |
} |
|
|
727 |
else if (length(dup_defaults) > 0) { |
|
|
728 |
stop("There should only be one default PKC version per module. ", |
|
|
729 |
"Ensure only one version per module in default PKCs list.\n", |
|
|
730 |
"Multiple default PKC version conflicts: ", |
|
|
731 |
paste(dup_defaults, collapse = ", ")) |
|
|
732 |
} |
|
|
733 |
else { |
|
|
734 |
use_pkc_names[default_mods] <- default_names |
|
|
735 |
} |
|
|
736 |
} |
|
|
737 |
} |
|
|
738 |
rtsid_lookup_df <- generate_pkc_lookup(pkc_json_list) |
|
|
739 |
rtsid_lookup_df$Negative <- grepl("Negative", rtsid_lookup_df$CodeClass) |
|
|
740 |
rtsid_lookup_df$RTS_ID <- gsub("RNA", "RTS00", rtsid_lookup_df[["RTS_ID"]]) |
|
|
741 |
# rtsid_lookup_df <- S4Vectors::DataFrame(rtsid_lookup_df) |
|
|
742 |
rtsid_lookup_df <- data.table::data.table(rtsid_lookup_df) |
|
|
743 |
if (length(multi_mods) > 0) { |
|
|
744 |
for (mod in names(use_pkc_names)) { |
|
|
745 |
mod_vers <- names(header[["PKCModule"]])[header[["PKCModule"]] == |
|
|
746 |
mod] |
|
|
747 |
mod_lookup <- rtsid_lookup_df[rtsid_lookup_df$Module %in% |
|
|
748 |
mod_vers, ] |
|
|
749 |
mod_tab <- table(mod_lookup$RTS_ID) |
|
|
750 |
remove_rts <- names(mod_tab[mod_tab != length(mod_vers)]) |
|
|
751 |
if (length(remove_rts) > 0) { |
|
|
752 |
warning("The following probes were removed from analysis", |
|
|
753 |
" as they were not found in all PKC module versions used.\n", |
|
|
754 |
# paste(utils::capture.output(print(subset(rtsid_lookup_df, |
|
|
755 |
# subset = RTS_ID %in% remove_rts))), collapse = "\n") |
|
|
756 |
) |
|
|
757 |
rtsid_lookup_df <- subset(rtsid_lookup_df, subset = !RTS_ID %in% |
|
|
758 |
remove_rts) |
|
|
759 |
} |
|
|
760 |
remove_vers <- mod_vers[mod_vers != use_pkc_names[mod]] |
|
|
761 |
rtsid_lookup_df <- subset(rtsid_lookup_df, subset = !Module %in% |
|
|
762 |
remove_vers) |
|
|
763 |
warning("The following PKC versions were removed from analysis", |
|
|
764 |
" as they were overridden by a newer PKC version or", |
|
|
765 |
" were overridden by a user-defined default PKC version.\n", |
|
|
766 |
paste(remove_vers, collapse = ", ")) |
|
|
767 |
header <- lapply(header, function(elem) { |
|
|
768 |
elem[!names(elem) %in% remove_vers] |
|
|
769 |
}) |
|
|
770 |
} |
|
|
771 |
} |
|
|
772 |
# S4Vectors::metadata(rtsid_lookup_df) <- header |
|
|
773 |
return(rtsid_lookup_df) |
|
|
774 |
} |
|
|
775 |
|
|
|
776 |
#' readDCC |
|
|
777 |
#' |
|
|
778 |
#' Read a NanoString GeoMx Digital Count Conversion (DCC) file. |
|
|
779 |
#' |
|
|
780 |
#' @param file A character string containing the path to the DCC file. |
|
|
781 |
#' |
|
|
782 |
#' @noRd |
|
|
783 |
readDCC <- function(file) |
|
|
784 |
{ |
|
|
785 |
lines <- trimws(readLines(file)) |
|
|
786 |
trimGalore <- grep("trimGalore", lines) |
|
|
787 |
if (length(trimGalore) > 0) { |
|
|
788 |
Raw <- grep("Raw", lines) |
|
|
789 |
lines <- lines[-c(trimGalore:(Raw - 1))] |
|
|
790 |
} |
|
|
791 |
lines <- gsub("SoftwareVersion,\"GeoMx_NGS_Pipeline_ ", |
|
|
792 |
"SoftwareVersion,", lines) |
|
|
793 |
lines <- gsub("SoftwareVersion,\"GeoMx_NGS_Pipeline_", "SoftwareVersion,", |
|
|
794 |
lines) |
|
|
795 |
lines <- gsub("SoftwareVersion,DRAGEN_GeoMx_", "SoftwareVersion,", |
|
|
796 |
lines) |
|
|
797 |
lines[grepl("SoftwareVersion", lines)] <- gsub("\"", "", |
|
|
798 |
lines[grepl("SoftwareVersion", lines)]) |
|
|
799 |
tags <- c("Header", "Scan_Attributes", "NGS_Processing_Attributes", "Code_Summary") |
|
|
800 |
output <- sapply(tags, function(tag) { |
|
|
801 |
bounds <- charmatch(sprintf(c("<%s>", "</%s>"), tag), |
|
|
802 |
lines) |
|
|
803 |
if (anyNA(bounds) || bounds[1L] + 1L >= bounds[2L]) |
|
|
804 |
lines[integer(0)] |
|
|
805 |
else lines[(bounds[1L] + 1L):(bounds[2L] - 1L)] |
|
|
806 |
}, simplify = FALSE) |
|
|
807 |
for (tag in c("Header", "Scan_Attributes", "NGS_Processing_Attributes")) { |
|
|
808 |
while (length(bad <- grep(",", output[[tag]], invert = TRUE)) > |
|
|
809 |
0L) { |
|
|
810 |
bad <- bad[1L] |
|
|
811 |
if (bad == 1L) |
|
|
812 |
stop(sprintf("%s section has malformed first line", |
|
|
813 |
tag)) |
|
|
814 |
fixed <- output[[tag]] |
|
|
815 |
fixed[bad - 1L] <- sprintf("%s %s", fixed[bad - |
|
|
816 |
1L], fixed[bad]) |
|
|
817 |
output[[tag]] <- fixed[-bad] |
|
|
818 |
} |
|
|
819 |
output[[tag]] <- strsplit(output[[tag]], split = ",") |
|
|
820 |
output[[tag]] <- structure(lapply(output[[tag]], function(x) if (length(x) == |
|
|
821 |
1L) |
|
|
822 |
"" |
|
|
823 |
else x[2L]), names = lapply(output[[tag]], `[`, 1L), |
|
|
824 |
class = "data.frame", row.names = basename(file)) |
|
|
825 |
} |
|
|
826 |
cols <- c("FileVersion", "SoftwareVersion") |
|
|
827 |
if (!(all(cols %in% colnames(output[["Header"]])))) |
|
|
828 |
stop("Header section must contain \"FileVersion\" and \"SoftwareVersion\"") |
|
|
829 |
output[["Header"]][, cols] <- lapply(output[["Header"]][, |
|
|
830 |
cols], numeric_version) |
|
|
831 |
fileVersion <- output[["Header"]][1L, "FileVersion"] |
|
|
832 |
if (!(numeric_version(fileVersion) %in% numeric_version(c("0.01", |
|
|
833 |
"0.02")))) |
|
|
834 |
stop("\"FileVersion\" in Header section must be 0.01 or 0.02") |
|
|
835 |
for (section in c("Header", "Scan_Attributes", "NGS_Processing_Attributes")) { |
|
|
836 |
valid <- .validDccSchema(output[[section]], fileVersion, |
|
|
837 |
section) |
|
|
838 |
if (!isTRUE(valid)) |
|
|
839 |
stop(valid) |
|
|
840 |
} |
|
|
841 |
output[["Header"]][["Date"]] <- as.Date(output[["Header"]][["Date"]], |
|
|
842 |
format = "%Y-%m-%d") |
|
|
843 |
cols <- c("Raw", "Trimmed", "Stitched", "Aligned", "umiQ30", |
|
|
844 |
"rtsQ30") |
|
|
845 |
output[["NGS_Processing_Attributes"]][, cols] <- lapply(output[["NGS_Processing_Attributes"]][, |
|
|
846 |
cols], as.numeric) |
|
|
847 |
names(output[["Scan_Attributes"]])[names(output[["Scan_Attributes"]]) == |
|
|
848 |
"ID"] <- "SampleID" |
|
|
849 |
output[["Code_Summary"]] <- paste0("RTS_ID,Count\n", paste(output[["Code_Summary"]], |
|
|
850 |
collapse = "\n")) |
|
|
851 |
output[["Code_Summary"]] <- utils::read.csv(textConnection(output[["Code_Summary"]]), |
|
|
852 |
colClasses = c(RTS_ID = "character", Count = "numeric")) |
|
|
853 |
output[["Code_Summary"]][["Count"]] <- as.integer(round(output[["Code_Summary"]][["Count"]])) |
|
|
854 |
rn <- output[["Code_Summary"]][["RTS_ID"]] |
|
|
855 |
if ((ndups <- anyDuplicated(rn)) > 0L) { |
|
|
856 |
warning(sprintf("removed %d rows from \"Code_Summary\" due to duplicate rownames", |
|
|
857 |
ndups)) |
|
|
858 |
ok <- which(!duplicated(rn, fromLast = FALSE) & !duplicated(rn, |
|
|
859 |
fromLast = TRUE)) |
|
|
860 |
rn <- rn[ok] |
|
|
861 |
output[["Code_Summary"]] <- output[["Code_Summary"]][ok, |
|
|
862 |
, drop = FALSE] |
|
|
863 |
} |
|
|
864 |
rownames(output[["Code_Summary"]]) <- rn |
|
|
865 |
output[["NGS_Processing_Attributes"]][, "DeduplicatedReads"] <- sum(output[["Code_Summary"]][["Count"]]) |
|
|
866 |
return(output) |
|
|
867 |
} |
|
|
868 |
|
|
|
869 |
|
|
|
870 |
#' importGeoMxSegments |
|
|
871 |
#' |
|
|
872 |
#' Import ROI polygons from the OME.TIFF file |
|
|
873 |
#' |
|
|
874 |
#' @param ome.tiff the OME.TIFF file of the GeoMx Experiment |
|
|
875 |
#' @param summary segmentation summary data frame |
|
|
876 |
#' @param imageinfo image information |
|
|
877 |
#' |
|
|
878 |
#' @noRd |
|
|
879 |
importGeoMxSegments <- function(ome.tiff, summary, imageinfo){ |
|
|
880 |
|
|
|
881 |
# check file |
|
|
882 |
if(file.exists(ome.tiff)){ |
|
|
883 |
if(grepl(".ome.tiff$|.ome.tif$", ome.tiff)){ |
|
|
884 |
if (!requireNamespace('RBioFormats')) |
|
|
885 |
stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") |
|
|
886 |
if (!requireNamespace('XML')) |
|
|
887 |
stop("Please install XML package to extract xml from the ome.tiff file!") |
|
|
888 |
omexml <- RBioFormats::read.omexml(ome.tiff) |
|
|
889 |
} else if(grepl(".xml$", ome.tiff)){ |
|
|
890 |
omexml <- XML::xmlParse(file = ome.tiff) |
|
|
891 |
} else { |
|
|
892 |
stop("Please provide either an ome.tiff or .xml file!") |
|
|
893 |
} |
|
|
894 |
omexml <- XML::xmlToList(omexml, simplify = TRUE) |
|
|
895 |
} else { |
|
|
896 |
stop("There are no files named ", ome.tiff," in the path") |
|
|
897 |
} |
|
|
898 |
|
|
|
899 |
# get ROIs |
|
|
900 |
ROIs <- omexml[which(names(omexml) == "ROI")] |
|
|
901 |
|
|
|
902 |
# get masks for each ROI |
|
|
903 |
mask_lists <- list() |
|
|
904 |
for(i in seq_len(length(ROIs))){ |
|
|
905 |
cur_ROI <- ROIs[[i]] |
|
|
906 |
|
|
|
907 |
# if the shape is a polygon |
|
|
908 |
if("Polygon" %in% names(cur_ROI$Union)){ |
|
|
909 |
coords <- strsplit(cur_ROI$Union$Polygon, split = "\\n") |
|
|
910 |
coords <- strsplit(coords$Points, split = " ")[[1]] |
|
|
911 |
coords <- sapply(coords, function(x) as.numeric(strsplit(x, split = ",")[[1]]), USE.NAMES = FALSE) |
|
|
912 |
coords <- as.data.frame(t(coords)) |
|
|
913 |
colnames(coords) <- c("x", "y") |
|
|
914 |
coords <- rescaleGeoMxPoints(coords, summary, imageinfo) |
|
|
915 |
coords <- data.frame(id = cur_ROI$Union$Label[["Text"]], coords) |
|
|
916 |
mask_lists[[cur_ROI$Union$Label[["Text"]]]] <- data.frame(coords) |
|
|
917 |
|
|
|
918 |
# if the shape is an ellipse |
|
|
919 |
} else if("Ellipse" %in% names(cur_ROI$Union)){ |
|
|
920 |
coords <- as.numeric(cur_ROI$Union$Ellipse[c("X","Y", "RadiusX", "RadiusY")]) |
|
|
921 |
coords <- as.data.frame(matrix(coords, nrow = 1)) |
|
|
922 |
colnames(coords) <- c("x", "y", "rx", "ry") |
|
|
923 |
coords[,c("x", "y")] <- rescaleGeoMxPoints(coords[,c("x", "y")], summary, imageinfo) |
|
|
924 |
coords$rx <- coords$rx * imageinfo$width/summary$Scan.Width[1] |
|
|
925 |
coords$ry <- coords$ry * imageinfo$height/summary$Scan.Height[1] |
|
|
926 |
coords <- data.frame(id = cur_ROI$Union$Label[["Text"]], coords) |
|
|
927 |
mask_lists[[cur_ROI$Union$Label[["Text"]]]] <- coords |
|
|
928 |
} |
|
|
929 |
} |
|
|
930 |
|
|
|
931 |
mask_lists |
|
|
932 |
} |
|
|
933 |
|
|
|
934 |
#' rescaleGeoMxROIs |
|
|
935 |
#' |
|
|
936 |
#' Rescale GeoMx point (center or polygon corners of ROI) coordinates |
|
|
937 |
#' |
|
|
938 |
#' @param pts coordinates of the cells from the Xenium assays |
|
|
939 |
#' @param summary segmentation summary data frame |
|
|
940 |
#' @param imageinfo image information |
|
|
941 |
#' |
|
|
942 |
#' @noRd |
|
|
943 |
rescaleGeoMxPoints <- function(pts, summary, imageinfo){ |
|
|
944 |
|
|
|
945 |
xRatio = imageinfo$width/summary$Scan.Width[1] |
|
|
946 |
yRatio = imageinfo$height/summary$Scan.Height[1] |
|
|
947 |
pts$x = (pts$x - summary$Scan.Offset.X[1]) * xRatio |
|
|
948 |
pts$y = (pts$y - summary$Scan.Offset.Y[1]) * yRatio |
|
|
949 |
pts$y = imageinfo$height - pts$y |
|
|
950 |
pts <- as.matrix(pts) |
|
|
951 |
|
|
|
952 |
# return |
|
|
953 |
return(pts) |
|
|
954 |
} |
|
|
955 |
|
|
|
956 |
#' importGeoMxChannels |
|
|
957 |
#' |
|
|
958 |
#' Rescale GeoMx channels with respect to the scan image |
|
|
959 |
#' |
|
|
960 |
#' @param ome.tiff the OME.TIFF file of the GeoMx Experiment |
|
|
961 |
#' @param summary segmentation summary data frame |
|
|
962 |
#' @param imageinfo image information |
|
|
963 |
#' @param resolution_level the resolution level (1-7) of the image parsed from the OME.TIFF file |
|
|
964 |
#' |
|
|
965 |
#' @param RBioFormats read.image |
|
|
966 |
#' @param EBImage as.Image |
|
|
967 |
#' @param grDevices as.raster |
|
|
968 |
#' @param magick image_read |
|
|
969 |
#' |
|
|
970 |
#' @noRd |
|
|
971 |
importGeoMxChannels <- function(ome.tiff, summary, imageinfo, resolution_level){ |
|
|
972 |
|
|
|
973 |
# check file |
|
|
974 |
if(file.exists(ome.tiff)){ |
|
|
975 |
if(grepl(".ome.tiff$|.ome.tif$", ome.tiff)){ |
|
|
976 |
if (!requireNamespace('RBioFormats')) |
|
|
977 |
stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") |
|
|
978 |
if (!requireNamespace('XML')) |
|
|
979 |
stop("Please install XML package to extract xml from the ome.tiff file!") |
|
|
980 |
omexml <- RBioFormats::read.omexml(ome.tiff) |
|
|
981 |
omexml <- XML::xmlToList(omexml, simplify = TRUE) |
|
|
982 |
} else { |
|
|
983 |
warning("ome.tiff format not found!") |
|
|
984 |
return(NULL) |
|
|
985 |
} |
|
|
986 |
} else { |
|
|
987 |
stop("There are no files named ", ome.tiff," in the path") |
|
|
988 |
} |
|
|
989 |
|
|
|
990 |
# get all channels |
|
|
991 |
ome.tiff <- RBioFormats::read.image(ome.tiff, resolution = resolution_level) |
|
|
992 |
|
|
|
993 |
# get frame information |
|
|
994 |
nframes <- ome.tiff@metadata$coreMetadata$imageCount |
|
|
995 |
frames <- EBImage::getFrames(ome.tiff) |
|
|
996 |
frames <- lapply(frames, function(x){ |
|
|
997 |
img <- magick::image_read(grDevices::as.raster(EBImage::as.Image(x))) |
|
|
998 |
rescaleGeoMxImage(img, summary, imageinfo, resolution_level = resolution_level) |
|
|
999 |
}) |
|
|
1000 |
|
|
|
1001 |
# get channel names |
|
|
1002 |
omexml <- omexml$StructuredAnnotations[seq(from = 1, by = 2, length.out = nframes)] |
|
|
1003 |
channel_names <- lapply(omexml, function(x){ |
|
|
1004 |
x$Value$ChannelInfo$BiologicalTarget |
|
|
1005 |
}) |
|
|
1006 |
names(frames) <- channel_names |
|
|
1007 |
|
|
|
1008 |
# return frames |
|
|
1009 |
return(frames) |
|
|
1010 |
} |
|
|
1011 |
|
|
|
1012 |
#' rescaleGeoMxImage |
|
|
1013 |
#' |
|
|
1014 |
#' Rescale GeoMx channels with respect to the scan image |
|
|
1015 |
#' |
|
|
1016 |
#' @param img coordinates of the cells from the Xenium assays |
|
|
1017 |
#' @param summary segmentation summary data frame |
|
|
1018 |
#' @param imageinfo image information |
|
|
1019 |
#' @param resolution_level the resolution level (1-7) of the image parsed from the OME.TIFF file |
|
|
1020 |
#' |
|
|
1021 |
#' @param magick image_crop |
|
|
1022 |
#' |
|
|
1023 |
#' @noRd |
|
|
1024 |
rescaleGeoMxImage <- function(img, summary, imageinfo, resolution_level){ |
|
|
1025 |
|
|
|
1026 |
# adjust offset and scan size to the resolution level |
|
|
1027 |
offset.x <- summary$Scan.Offset.X[1]/(2*(resolution_level-1)) |
|
|
1028 |
offset.y <- summary$Scan.Offset.Y[1]/(2*(resolution_level-1)) |
|
|
1029 |
scan.width <- summary$Scan.Width[1]/(2*(resolution_level-1)) |
|
|
1030 |
scan.height <- summary$Scan.Height[1]/(2*(resolution_level-1)) |
|
|
1031 |
|
|
|
1032 |
# crop image given adjusted offsets |
|
|
1033 |
new_img <- magick::image_crop(img, geometry = paste0(scan.width, "x", scan.height, "+", offset.x, "+", offset.y)) |
|
|
1034 |
|
|
|
1035 |
# resize image |
|
|
1036 |
new_img <- magick::image_resize(new_img, geometry = paste0(imageinfo$width, "x", imageinfo$height)) |
|
|
1037 |
|
|
|
1038 |
# return |
|
|
1039 |
return(new_img) |
|
|
1040 |
} |
|
|
1041 |
|
|
|
1042 |
#### |
|
|
1043 |
## CosMx #### |
|
|
1044 |
#### |
|
|
1045 |
|
|
|
1046 |
#' importCosMx |
|
|
1047 |
#' |
|
|
1048 |
#' Import CosMx data |
|
|
1049 |
#' |
|
|
1050 |
#' @param tiledbURI the path to the tiledb folder |
|
|
1051 |
#' @param assay_name the assay name, default: CosMx |
|
|
1052 |
#' @param image the reference morphology image of the CosMx assay |
|
|
1053 |
#' @param image_name the image name of the CosMx assay, Default: main |
|
|
1054 |
#' @param ome.tiff the OME.TIFF file of the CosMx experiment if exists |
|
|
1055 |
#' @param import_molecules if TRUE, molecule assay will be created along with cell assay. |
|
|
1056 |
#' @param verbose verbose |
|
|
1057 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1058 |
#' |
|
|
1059 |
#' @importFrom data.table data.table |
|
|
1060 |
#' @importFrom ids random_id |
|
|
1061 |
#' |
|
|
1062 |
#' @export |
|
|
1063 |
#' |
|
|
1064 |
importCosMx <- function(tiledbURI, assay_name = "CosMx", |
|
|
1065 |
image = NULL, image_name = "main", ome.tiff = NULL, import_molecules = FALSE, verbose = TRUE, ...) |
|
|
1066 |
{ |
|
|
1067 |
# check tiledb and tiledbsc |
|
|
1068 |
if (!requireNamespace("tiledb", quietly = TRUE)) |
|
|
1069 |
stop("Please install the tiledb package: \n |
|
|
1070 |
remotes::install_github('TileDB-Inc/TileDB-R', force = TRUE, ref = '0.17.0')") |
|
|
1071 |
if (!requireNamespace("tiledbsc", quietly = TRUE)) |
|
|
1072 |
stop("Please install the tiledbsc package: \n |
|
|
1073 |
remotes::install_github('tiledb-inc/tiledbsc', force = TRUE, ref = '8157b7d54398b1f957832f37fff0b173d355530e')") |
|
|
1074 |
|
|
|
1075 |
# get tiledb |
|
|
1076 |
if(verbose) |
|
|
1077 |
message("Scanning TileDB array for cell data ...") |
|
|
1078 |
tiledb_scdataset <- tiledbsc::SOMACollection$new(uri = tiledbURI, verbose = FALSE) |
|
|
1079 |
|
|
|
1080 |
# raw counts |
|
|
1081 |
counts <- tiledb_scdataset$somas$RNA$X$members$counts$to_matrix(batch_mode = TRUE) |
|
|
1082 |
counts <- as.matrix(counts) |
|
|
1083 |
|
|
|
1084 |
# cell metadata |
|
|
1085 |
metadata <- tiledb_scdataset$somas$RNA$obs$to_dataframe() |
|
|
1086 |
|
|
|
1087 |
# coordinates |
|
|
1088 |
coords <- as.matrix(metadata[,c("x_slide_mm", "y_slide_mm")]) |
|
|
1089 |
colnames(coords) <- c("x","y") |
|
|
1090 |
|
|
|
1091 |
# transcripts |
|
|
1092 |
if(import_molecules){ |
|
|
1093 |
if(verbose) |
|
|
1094 |
message("Scanning TileDB array for molecule data ...") |
|
|
1095 |
subcellular <- tiledb::tiledb_array( |
|
|
1096 |
tiledb_scdataset$somas$RNA$obsm$members$transcriptCoords$uri, |
|
|
1097 |
return_as="data.table")[] |
|
|
1098 |
colnames(subcellular)[colnames(subcellular)=="target"] <- "gene" |
|
|
1099 |
} |
|
|
1100 |
|
|
|
1101 |
# get slides and construct VoltRon objects for each slides |
|
|
1102 |
slides <- unique(metadata$slide_ID_numeric) |
|
|
1103 |
|
|
|
1104 |
# for each slide create a VoltRon object with combined layers |
|
|
1105 |
vr_list <- list() |
|
|
1106 |
for(slide in slides){ |
|
|
1107 |
|
|
|
1108 |
# cell assay |
|
|
1109 |
if(verbose) |
|
|
1110 |
message("Creating cell level assay for slide ", slide, " ...") |
|
|
1111 |
|
|
|
1112 |
# slide info |
|
|
1113 |
cur_coords <- coords[metadata$slide_ID_numeric == slide,] |
|
|
1114 |
cur_counts <- counts[,rownames(cur_coords)] |
|
|
1115 |
cur_metadata <- metadata[rownames(cur_coords),] |
|
|
1116 |
|
|
|
1117 |
# create VoltRon object |
|
|
1118 |
cell_object <- formVoltRon(data = cur_counts, metadata = cur_metadata, image = image, coords = cur_coords, |
|
|
1119 |
main.assay = assay_name, assay.type = "cell", image_name = image_name, main_featureset = "RNA", ...) |
|
|
1120 |
cell_object$Sample <- paste0("Slide", slide) |
|
|
1121 |
|
|
|
1122 |
# molecule assay |
|
|
1123 |
if(import_molecules){ |
|
|
1124 |
|
|
|
1125 |
# get slide |
|
|
1126 |
if(verbose) |
|
|
1127 |
message("Creating molecule level assay for slide ", slide, " ...") |
|
|
1128 |
cur_subcellular <- subset(subcellular, slideID == slide) |
|
|
1129 |
|
|
|
1130 |
# coordinates |
|
|
1131 |
mol_coords <- as.matrix(cur_subcellular[,c("x_FOV_px", "y_FOV_px")]) |
|
|
1132 |
colnames(mol_coords) <- c("x", "y") |
|
|
1133 |
|
|
|
1134 |
# get subcellular data components |
|
|
1135 |
mol_metadata <- cur_subcellular[,colnames(cur_subcellular)[!colnames(cur_subcellular) %in% c("CellId", "cell_id", "x_FOV_px", "y_FOV_px")], with = FALSE] |
|
|
1136 |
set.seed(nrow(mol_metadata)) |
|
|
1137 |
mol_metadata$id <- rownames(mol_metadata) |
|
|
1138 |
mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE)) |
|
|
1139 |
mol_metadata$assay_id <- "Assay1" |
|
|
1140 |
mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")] |
|
|
1141 |
|
|
|
1142 |
# coord names |
|
|
1143 |
rownames(mol_coords) <- mol_metadata$id |
|
|
1144 |
|
|
|
1145 |
# create VoltRon assay for molecules |
|
|
1146 |
mol_assay <- formAssay(coords = mol_coords, image = image, type = "molecule", main_image = image_name) |
|
|
1147 |
|
|
|
1148 |
# merge assays in one section |
|
|
1149 |
if(verbose) |
|
|
1150 |
message("Merging assays for slide ", slide, " ...") |
|
|
1151 |
sample.metadata <- SampleMetadata(cell_object) |
|
|
1152 |
cell_object <- addAssay(cell_object, |
|
|
1153 |
assay = mol_assay, |
|
|
1154 |
metadata = mol_metadata, |
|
|
1155 |
assay_name = paste0(assay_name, "_mol"), |
|
|
1156 |
sample = sample.metadata["Assay1", "Sample"], |
|
|
1157 |
layer = sample.metadata["Assay1", "Layer"]) |
|
|
1158 |
} |
|
|
1159 |
vr_list <- append(vr_list, cell_object) |
|
|
1160 |
} |
|
|
1161 |
|
|
|
1162 |
# return |
|
|
1163 |
if(verbose) |
|
|
1164 |
message("Merging slides ...") |
|
|
1165 |
vr <- merge(vr_list[[1]], vr_list[-1]) |
|
|
1166 |
} |
|
|
1167 |
|
|
|
1168 |
#' generateCosMxImage |
|
|
1169 |
#' |
|
|
1170 |
#' Generates a low resolution Morphology image of the CosMx experiment |
|
|
1171 |
#' |
|
|
1172 |
#' @param dir.path CosMx output folder |
|
|
1173 |
#' @param increase.contrast increase the contrast of the image before writing |
|
|
1174 |
#' @param output.path The path to the new morphology image created if the image should be saved to a location other than Xenium output folder. |
|
|
1175 |
#' @param verbose verbose |
|
|
1176 |
#' @param ... additional parameters passed to the \link{writeImage} function |
|
|
1177 |
#' |
|
|
1178 |
#' @importFrom magick image_read image_contrast |
|
|
1179 |
#' @importFrom EBImage writeImage |
|
|
1180 |
#' |
|
|
1181 |
#' @export |
|
|
1182 |
generateCosMxImage <- function(dir.path, increase.contrast = TRUE, output.path = NULL, verbose = TRUE, ...) { |
|
|
1183 |
|
|
|
1184 |
# check package |
|
|
1185 |
if(!requireNamespace("reshape2")){ |
|
|
1186 |
stop("You have to install the reshape2 package!: install.packages('reshape2')") |
|
|
1187 |
} |
|
|
1188 |
|
|
|
1189 |
# file path to either Xenium output folder or specified folder |
|
|
1190 |
file.path <- paste0(dir.path, "/CellComposite_lowres.tif") |
|
|
1191 |
output.file <- paste0(output.path, "/CellComposite_lowres.tif") |
|
|
1192 |
|
|
|
1193 |
# check if the file exists in either Xenium output folder, or the specified location |
|
|
1194 |
if(file.exists(file.path) | file.exists(paste0(output.file))){ |
|
|
1195 |
if(verbose) |
|
|
1196 |
message("CellComposite_lowres.tif already exists! \n") |
|
|
1197 |
return(NULL) |
|
|
1198 |
} |
|
|
1199 |
|
|
|
1200 |
# FOV positions of CosMx |
|
|
1201 |
list_of_files <- list.files(dir.path) |
|
|
1202 |
fov_positions_path <- paste0(dir.path, "/", list_of_files[grepl("fov_positions_file.csv$",list_of_files)][1]) |
|
|
1203 |
fov_positions <- read.csv(fov_positions_path) |
|
|
1204 |
|
|
|
1205 |
# manipulate fov positions matrix |
|
|
1206 |
if(verbose) |
|
|
1207 |
message("Getting FOV Positions \n") |
|
|
1208 |
relative_fov_positions <- fov_positions |
|
|
1209 |
x_min <- min(relative_fov_positions$x_global_px) |
|
|
1210 |
y_min <- min(relative_fov_positions$y_global_px) |
|
|
1211 |
x_gap <- diff(unique(fov_positions$x_global_px))[1] |
|
|
1212 |
y_gap <- diff(unique(fov_positions$y_global_px))[1] |
|
|
1213 |
relative_fov_positions[,c("x_global_px","y_global_px")] <- t(apply(relative_fov_positions[,c("x_global_px","y_global_px")], 1, function(cell){ |
|
|
1214 |
c((cell[1]-x_min)/x_gap,(cell[2]-y_min)/y_gap) |
|
|
1215 |
})) |
|
|
1216 |
relative_fov_positions <- relative_fov_positions[order(relative_fov_positions$y_global_px, decreasing = TRUE),] |
|
|
1217 |
|
|
|
1218 |
# Combine Images of the FOV grid |
|
|
1219 |
if(verbose) |
|
|
1220 |
message("Loading FOV tif files \n") |
|
|
1221 |
image.dir.path <- paste0(dir.path,"/CellComposite/") |
|
|
1222 |
morphology_image_data <- NULL |
|
|
1223 |
for(i in relative_fov_positions$fov){ |
|
|
1224 |
image_path <- paste0(image.dir.path, "CellComposite_F", str_pad(as.character(i), 3, pad = 0), ".jpg") |
|
|
1225 |
image_data <- magick::image_read(image_path) %>% magick::image_resize("x500") %>% magick::image_raster() |
|
|
1226 |
if(is.null(morphology_image_data)) |
|
|
1227 |
dim_image <- apply(image_data[,seq_len(2)], 2, max) |
|
|
1228 |
scale_dim <- relative_fov_positions[i,2:3]*dim_image |
|
|
1229 |
image_data[,seq_len(2)] <- image_data[,seq_len(2)] + |
|
|
1230 |
rep(1, nrow(image_data)) %o% as.matrix(scale_dim)[1,] |
|
|
1231 |
morphology_image_data <- rbind(morphology_image_data, image_data) |
|
|
1232 |
} |
|
|
1233 |
morphology_image_data_array <- reshape2::acast(morphology_image_data, y ~ x) |
|
|
1234 |
morphology_image <- magick::image_read(morphology_image_data_array) %>% magick::image_resize("x800") |
|
|
1235 |
|
|
|
1236 |
# pick a resolution level |
|
|
1237 |
morphology_image_info <- image_info(morphology_image) |
|
|
1238 |
if(verbose) |
|
|
1239 |
message("Image Resolution (X:", morphology_image_info$width, " Y:", morphology_image_info$height, ") \n") |
|
|
1240 |
|
|
|
1241 |
# increase contrast using EBImage |
|
|
1242 |
if(increase.contrast) { |
|
|
1243 |
if(verbose) |
|
|
1244 |
message("Increasing Contrast \n") |
|
|
1245 |
morphology_image <- magick::image_contrast(morphology_image, sharpen = 1) |
|
|
1246 |
} |
|
|
1247 |
|
|
|
1248 |
# write to the same folder |
|
|
1249 |
if(verbose) |
|
|
1250 |
message("Writing Tiff File \n") |
|
|
1251 |
if(is.null(output.path)){ |
|
|
1252 |
EBImage::writeImage(magick::as_EBImage(morphology_image), file = file.path, ...) |
|
|
1253 |
} else { |
|
|
1254 |
EBImage::writeImage(magick::as_EBImage(morphology_image), file = output.file, ...) |
|
|
1255 |
} |
|
|
1256 |
|
|
|
1257 |
return(NULL) |
|
|
1258 |
} |
|
|
1259 |
|
|
|
1260 |
#### |
|
|
1261 |
# Spatial Genomics #### |
|
|
1262 |
#### |
|
|
1263 |
|
|
|
1264 |
#### |
|
|
1265 |
## GenePs #### |
|
|
1266 |
#### |
|
|
1267 |
|
|
|
1268 |
#' importGenePS |
|
|
1269 |
#' |
|
|
1270 |
#' Importing GenePS data |
|
|
1271 |
#' |
|
|
1272 |
#' @param dir.path path to Xenium output folder |
|
|
1273 |
#' @param assay_name the assay name of the SR object |
|
|
1274 |
#' @param sample_name the name of the sample |
|
|
1275 |
#' @param use_image if TRUE, the DAPI image will be used. |
|
|
1276 |
#' @param resolution_level the level of resolution within TIFF image. Default: 7 (971x638) |
|
|
1277 |
#' @param image_name the image name of the Xenium assay, Default: main |
|
|
1278 |
#' @param channel_name the channel name of the image of the Xenium assay, Default: DAPI |
|
|
1279 |
#' @param import_molecules if TRUE, molecule assay will be created along with cell assay. |
|
|
1280 |
#' @param verbose verbose |
|
|
1281 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1282 |
#' |
|
|
1283 |
#' @importFrom magick image_read image_info |
|
|
1284 |
#' @importFrom utils read.csv |
|
|
1285 |
#' @importFrom data.table fread |
|
|
1286 |
#' @importFrom ids random_id |
|
|
1287 |
#' @importFrom grDevices as.raster |
|
|
1288 |
#' @importFrom EBImage as.Image |
|
|
1289 |
#' |
|
|
1290 |
#' @export |
|
|
1291 |
#' |
|
|
1292 |
importGenePS <- function (dir.path, assay_name = "GenePS", sample_name = NULL, use_image = TRUE, |
|
|
1293 |
resolution_level = 7, image_name = "main", channel_name = "DAPI", import_molecules = FALSE, |
|
|
1294 |
verbose = TRUE, ...) |
|
|
1295 |
{ |
|
|
1296 |
# cell assay |
|
|
1297 |
if(verbose) |
|
|
1298 |
message("Creating cell level assay ...") |
|
|
1299 |
|
|
|
1300 |
# raw counts |
|
|
1301 |
DataOutputfiles <- list.files(paste0(dir.path, "DataOutput/")) |
|
|
1302 |
datafile <- DataOutputfiles[grepl("_cellxgene.csv", DataOutputfiles)] |
|
|
1303 |
if(length(datafile) > 0){ |
|
|
1304 |
rawdata <- utils::read.csv(file = paste0(dir.path, "DataOutput/", datafile), row.names = NULL) |
|
|
1305 |
rawdata <- rawdata[,colnames(rawdata)[!colnames(rawdata) %in% "X"]] |
|
|
1306 |
rawdata <- t(rawdata) |
|
|
1307 |
} else { |
|
|
1308 |
stop("There are no files ending with '_cellxgene.csv' in the path") |
|
|
1309 |
} |
|
|
1310 |
|
|
|
1311 |
# image |
|
|
1312 |
if(use_image){ |
|
|
1313 |
|
|
|
1314 |
# check RBioFormats |
|
|
1315 |
if (!requireNamespace('RBioFormats')) |
|
|
1316 |
stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')") |
|
|
1317 |
|
|
|
1318 |
# check image |
|
|
1319 |
image_file <- paste0(dir.path, "/images/DAPI.tiff") |
|
|
1320 |
if(file.exists(image_file)){ |
|
|
1321 |
image <- RBioFormats::read.image(image_file, resolution = resolution_level) |
|
|
1322 |
image <- EBImage::as.Image(image) |
|
|
1323 |
image <- grDevices::as.raster(image) |
|
|
1324 |
image <- magick::image_read(image) |
|
|
1325 |
} else { |
|
|
1326 |
stop("There are no image files in the path") |
|
|
1327 |
} |
|
|
1328 |
# scale the xenium image instructed by 10x Genomics help page |
|
|
1329 |
scaleparam <- ceiling(2^(resolution_level-1)) |
|
|
1330 |
} else { |
|
|
1331 |
image <- NULL |
|
|
1332 |
} |
|
|
1333 |
|
|
|
1334 |
# coordinates |
|
|
1335 |
coordsfile <- DataOutputfiles[grepl("_cellcoordinate.csv", DataOutputfiles)] |
|
|
1336 |
if(length(coordsfile) > 0){ |
|
|
1337 |
coords <- utils::read.csv(file = paste0(dir.path, "DataOutput/", coordsfile)) |
|
|
1338 |
coords <- as.matrix(coords[,c("center_x", "center_y")]) |
|
|
1339 |
colnames(coords) <- c("x", "y") |
|
|
1340 |
if(use_image) { |
|
|
1341 |
coords <- coords/scaleparam |
|
|
1342 |
imageinfo <- unlist(magick::image_info(image)[c("height")]) |
|
|
1343 |
range_coords <- c(0,imageinfo) |
|
|
1344 |
} else { |
|
|
1345 |
range_coords <- range(coords[,2]) |
|
|
1346 |
} |
|
|
1347 |
coords[,2] <- range_coords[2] - coords[,2] + range_coords[1] |
|
|
1348 |
} else { |
|
|
1349 |
stop("There are no files ending with '_cellcoordinate.csv' in the path") |
|
|
1350 |
} |
|
|
1351 |
|
|
|
1352 |
# create VoltRon object for cells |
|
|
1353 |
cell_object <- formVoltRon(rawdata, metadata = NULL, image = image, coords, main.assay = assay_name, |
|
|
1354 |
assay.type = "cell", image_name = image_name, main_channel = channel_name, sample_name = sample_name, |
|
|
1355 |
feature_name = "RNA", ...) |
|
|
1356 |
|
|
|
1357 |
# add molecules |
|
|
1358 |
if(!import_molecules){ |
|
|
1359 |
return(cell_object) |
|
|
1360 |
} else { |
|
|
1361 |
if(verbose) |
|
|
1362 |
message("Creating molecule level assay ...") |
|
|
1363 |
transcripts_file <- DataOutputfiles[grepl("_transcriptlocation.csv", DataOutputfiles)] |
|
|
1364 |
if(length(transcripts_file) == 0){ |
|
|
1365 |
stop("There are no files ending with '_transcriptlocation.csv' in the path") |
|
|
1366 |
} else { |
|
|
1367 |
|
|
|
1368 |
# get subcellur data components |
|
|
1369 |
subcellular_data <- data.table::fread(paste0(dir.path, "DataOutput/", transcripts_file)) |
|
|
1370 |
subcellular_data$id <- seq_len(nrow(subcellular_data)) |
|
|
1371 |
subcellular_data <- subcellular_data[,c("id", colnames(subcellular_data)[!colnames(subcellular_data) %in% "id"]), with = FALSE] |
|
|
1372 |
colnames(subcellular_data)[colnames(subcellular_data)=="name"] <- "gene" |
|
|
1373 |
|
|
|
1374 |
# coordinates |
|
|
1375 |
coords <- as.matrix(subcellular_data[,c("x", "y")]) |
|
|
1376 |
colnames(coords) <- c("x", "y") |
|
|
1377 |
if(use_image){ |
|
|
1378 |
coords <- coords/scaleparam |
|
|
1379 |
} |
|
|
1380 |
coords[,"y"] <- range_coords[2] - coords[,"y"] + range_coords[1] |
|
|
1381 |
|
|
|
1382 |
# metadata |
|
|
1383 |
mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell", "x", "y")], with = FALSE] |
|
|
1384 |
set.seed(nrow(mol_metadata$id)) |
|
|
1385 |
mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE)) |
|
|
1386 |
mol_metadata$assay_id <- "Assay1" |
|
|
1387 |
mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")] |
|
|
1388 |
|
|
|
1389 |
# coord names |
|
|
1390 |
rownames(coords) <- mol_metadata$id |
|
|
1391 |
|
|
|
1392 |
# create VoltRon assay for molecules |
|
|
1393 |
mol_assay <- formAssay(coords = coords, image = image, type = "molecule", main_image = image_name, main_channel = channel_name) |
|
|
1394 |
|
|
|
1395 |
# merge assays in one section |
|
|
1396 |
if(verbose) |
|
|
1397 |
message("Merging assays ...") |
|
|
1398 |
sample.metadata <- SampleMetadata(cell_object) |
|
|
1399 |
object <- addAssay(cell_object, |
|
|
1400 |
assay = mol_assay, |
|
|
1401 |
metadata = mol_metadata, |
|
|
1402 |
assay_name = paste0(assay_name, "_mol"), |
|
|
1403 |
sample = sample.metadata["Assay1", "Sample"], |
|
|
1404 |
layer = sample.metadata["Assay1", "Layer"]) |
|
|
1405 |
|
|
|
1406 |
# connectivity |
|
|
1407 |
connectivity <- data.table::data.table(transcript_id = mol_metadata$id, cell_id = subcellular_data[["cell"]]) |
|
|
1408 |
connectivity <- subset(connectivity, cell_id != 0) |
|
|
1409 |
connectivity[["cell_id"]] <- vrSpatialPoints(cell_object)[connectivity[["cell_id"]]] |
|
|
1410 |
|
|
|
1411 |
# add connectivity of spatial points across assays |
|
|
1412 |
object <- addLayerConnectivity(object, |
|
|
1413 |
connectivity = connectivity, |
|
|
1414 |
sample = sample.metadata["Assay1", "Sample"], |
|
|
1415 |
layer = sample.metadata["Assay1", "Layer"]) |
|
|
1416 |
|
|
|
1417 |
# return |
|
|
1418 |
return(object) |
|
|
1419 |
} |
|
|
1420 |
} |
|
|
1421 |
} |
|
|
1422 |
|
|
|
1423 |
#### |
|
|
1424 |
# BGI Genomics #### |
|
|
1425 |
#### |
|
|
1426 |
|
|
|
1427 |
#### |
|
|
1428 |
## STOmics #### |
|
|
1429 |
#### |
|
|
1430 |
|
|
|
1431 |
#' importSTOmics |
|
|
1432 |
#' |
|
|
1433 |
#' Importing STOmics (Stereo-Seq) data |
|
|
1434 |
#' |
|
|
1435 |
#' @param h5ad.path path to h5ad file of STOmics output |
|
|
1436 |
#' @param assay_name the assay name |
|
|
1437 |
#' @param sample_name the name of the sample |
|
|
1438 |
#' @param image_name the image name of the Visium assay, Default: main |
|
|
1439 |
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E |
|
|
1440 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1441 |
#' |
|
|
1442 |
#' @importFrom methods as |
|
|
1443 |
#' @export |
|
|
1444 |
#' |
|
|
1445 |
importSTOmics <- function(h5ad.path, assay_name = "STOmics", sample_name = NULL, image_name = "main", channel_name = "H&E", ...) |
|
|
1446 |
{ |
|
|
1447 |
# check package |
|
|
1448 |
if(!requireNamespace('anndataR')) |
|
|
1449 |
stop("Please install anndataR package!: devtools::install_github('scverse/anndataR')") |
|
|
1450 |
|
|
|
1451 |
# get h5ad data |
|
|
1452 |
stdata <- anndataR::read_h5ad(h5ad.path, to = "HDF5AnnData") |
|
|
1453 |
|
|
|
1454 |
# observation and feature names |
|
|
1455 |
obs_names <- stdata$obs_names |
|
|
1456 |
var_names <- stdata$var_names |
|
|
1457 |
|
|
|
1458 |
# raw counts |
|
|
1459 |
rawdata <- Matrix::t(stdata$X) |
|
|
1460 |
rownames(rawdata) <- var_names |
|
|
1461 |
colnames(rawdata) <- obs_names |
|
|
1462 |
rawdata <- methods::as(rawdata, 'CsparseMatrix') |
|
|
1463 |
|
|
|
1464 |
# metadata |
|
|
1465 |
metadata <- stdata$obs |
|
|
1466 |
rownames(metadata) <- obs_names |
|
|
1467 |
|
|
|
1468 |
# coordinates |
|
|
1469 |
coords <- stdata$obsm$spatial |
|
|
1470 |
rownames(coords) <- obs_names |
|
|
1471 |
|
|
|
1472 |
# scale coordinates |
|
|
1473 |
binsize <- stdata$uns$bin_size |
|
|
1474 |
params <- list( |
|
|
1475 |
spot.radius = 0.5 + (binsize-1), |
|
|
1476 |
vis.spot.radius = 0.5 + (binsize-1)) |
|
|
1477 |
|
|
|
1478 |
# create VoltRon |
|
|
1479 |
formVoltRon(rawdata, metadata = metadata, coords = coords, main.assay = assay_name, params = params, assay.type = "spot", |
|
|
1480 |
image_name = image_name, main_channel = channel_name, sample_name = sample_name, feature_name = "RNA", ...) |
|
|
1481 |
} |
|
|
1482 |
|
|
|
1483 |
#### |
|
|
1484 |
# Akoya #### |
|
|
1485 |
#### |
|
|
1486 |
|
|
|
1487 |
#### |
|
|
1488 |
## PhenoCycler #### |
|
|
1489 |
#### |
|
|
1490 |
|
|
|
1491 |
#' importPhenoCycler |
|
|
1492 |
#' |
|
|
1493 |
#' Importing PhenoCycler data |
|
|
1494 |
#' |
|
|
1495 |
#' @param dir.path path to PhenoCycler output folder |
|
|
1496 |
#' @param assay_name the assay name of the SR object |
|
|
1497 |
#' @param sample_name the name of the sample |
|
|
1498 |
#' @param image_name the image name of the Xenium assay, Default: main |
|
|
1499 |
#' @param type Specify which type matrix is being provided. |
|
|
1500 |
#' \itemize{ |
|
|
1501 |
#' \item \dQuote{\code{processor}}: matrix generated by CODEX Processor |
|
|
1502 |
#' \item \dQuote{\code{inform}}: matrix generated by inForm |
|
|
1503 |
#' \item \dQuote{\code{qupath}}: matrix generated by QuPath |
|
|
1504 |
#' } |
|
|
1505 |
#' @param filter A pattern to filter features by; pass \code{NA} to skip feature filtering |
|
|
1506 |
#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the quantification level to read in |
|
|
1507 |
#' @param verbose verbose |
|
|
1508 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1509 |
#' |
|
|
1510 |
#' @importFrom magick image_info image_read |
|
|
1511 |
#' |
|
|
1512 |
#' @export |
|
|
1513 |
importPhenoCycler <- function(dir.path, assay_name = "PhenoCycler", sample_name = NULL, image_name = "main", |
|
|
1514 |
type = c('inform', 'processor', 'qupath'), filter = 'DAPI|Blank|Empty', |
|
|
1515 |
inform.quant = c('mean', 'total', 'min', 'max', 'std'), verbose = TRUE, ...){ |
|
|
1516 |
|
|
|
1517 |
# raw counts, metadata and coordinates |
|
|
1518 |
listoffiles <- list.files(paste0(dir.path, "/processed/segm/segm-1/fcs/compensated/"), full.names = TRUE) |
|
|
1519 |
datafile <- listoffiles[grepl("_compensated.csv$", listoffiles)][1] |
|
|
1520 |
if(file.exists(datafile)){ |
|
|
1521 |
rawdata <- readPhenoCyclerMat(filename = datafile, type = type, filter = filter, inform.quant = inform.quant) |
|
|
1522 |
} else { |
|
|
1523 |
stop("There are no files named ending with '_compensated.csv' in the processed/segm/segm-1/fcs/compensated/ subfolder") |
|
|
1524 |
} |
|
|
1525 |
|
|
|
1526 |
# cell id |
|
|
1527 |
cellid <- paste0("cell", colnames(rawdata$matrix)) |
|
|
1528 |
|
|
|
1529 |
# coordinates |
|
|
1530 |
coords <- rawdata$centroids[,c("x", "y")] |
|
|
1531 |
rownames(coords) <- cellid |
|
|
1532 |
coords <- as.matrix(coords) |
|
|
1533 |
|
|
|
1534 |
# metadata |
|
|
1535 |
metadata <- rawdata$metadata |
|
|
1536 |
rownames(metadata) <- cellid |
|
|
1537 |
|
|
|
1538 |
# data |
|
|
1539 |
rawdata <- rawdata$matrix |
|
|
1540 |
colnames(rawdata) <- cellid |
|
|
1541 |
rownames(rawdata) <- gsub("(\t|\r|\n)", "", rownames(rawdata)) |
|
|
1542 |
|
|
|
1543 |
# images |
|
|
1544 |
image_dir <- paste0(dir.path, "/processed/stitched/reg001/") |
|
|
1545 |
list_files <- list.files(image_dir) |
|
|
1546 |
if(!dir.exists(image_dir)){ |
|
|
1547 |
if(verbose) |
|
|
1548 |
message("There are no images of channels!") |
|
|
1549 |
image_list <- NULL |
|
|
1550 |
} else { |
|
|
1551 |
if(!any(grepl(".tif$", list_files))){ |
|
|
1552 |
stop("The folder doesnt have any images associated with channels!") |
|
|
1553 |
} else{ |
|
|
1554 |
image_channel_names <- sapply(list_files, function(x) { |
|
|
1555 |
name <- strsplit(x, split = "_")[[1]] |
|
|
1556 |
name <- gsub(".tif$", "", name[length(name)]) |
|
|
1557 |
name <- make.unique(name) |
|
|
1558 |
return(name) |
|
|
1559 |
}) |
|
|
1560 |
image_channel_names <- c(image_channel_names[grepl("DAPI", image_channel_names)][1], |
|
|
1561 |
image_channel_names[image_channel_names %in% rownames(rawdata)]) |
|
|
1562 |
list_files <- names(image_channel_names) |
|
|
1563 |
image_list <- lapply(list_files, function(x){ |
|
|
1564 |
magick::image_read(paste0(image_dir, "/", x)) |
|
|
1565 |
}) |
|
|
1566 |
names(image_list) <- image_channel_names |
|
|
1567 |
coords[,2] <- magick::image_info(image_list[[1]])$height - coords[,2] |
|
|
1568 |
} |
|
|
1569 |
} |
|
|
1570 |
|
|
|
1571 |
# voltron object |
|
|
1572 |
object <- formVoltRon(data = rawdata, metadata = metadata, image = image_list, coords = coords, assay.type = "cell", |
|
|
1573 |
sample_name = sample_name, main.assay = assay_name, image_name = image_name, feature_name = "RNA", ...) |
|
|
1574 |
|
|
|
1575 |
# return |
|
|
1576 |
object |
|
|
1577 |
} |
|
|
1578 |
|
|
|
1579 |
#' readPhenoCyclerMat |
|
|
1580 |
#' |
|
|
1581 |
#' Read and Load Akoya CODEX data, adapted from the \code{ReadAkoya} function \code{Seurat} package |
|
|
1582 |
#' |
|
|
1583 |
#' @param filename Path to matrix generated by upstream processing. |
|
|
1584 |
#' @param type Specify which type matrix is being provided. |
|
|
1585 |
#' \itemize{ |
|
|
1586 |
#' \item \dQuote{\code{processor}}: matrix generated by CODEX Processor |
|
|
1587 |
#' \item \dQuote{\code{inform}}: matrix generated by inForm |
|
|
1588 |
#' \item \dQuote{\code{qupath}}: matrix generated by QuPath |
|
|
1589 |
#' } |
|
|
1590 |
#' @param filter A pattern to filter features by; pass \code{NA} to skip feature filtering |
|
|
1591 |
#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the quantification level to read in |
|
|
1592 |
#' |
|
|
1593 |
#' @importFrom methods as |
|
|
1594 |
#' |
|
|
1595 |
#' @noRd |
|
|
1596 |
readPhenoCyclerMat <- function( |
|
|
1597 |
filename, |
|
|
1598 |
type = c('inform', 'processor', 'qupath'), |
|
|
1599 |
filter = 'DAPI|Blank|Empty', |
|
|
1600 |
inform.quant = c('mean', 'total', 'min', 'max', 'std') |
|
|
1601 |
) { |
|
|
1602 |
# Check arguments |
|
|
1603 |
if (!file.exists(filename)) { |
|
|
1604 |
stop("Can't find file: ", filename) |
|
|
1605 |
} |
|
|
1606 |
type <- tolower(x = type[1L]) |
|
|
1607 |
type <- match.arg(arg = type) |
|
|
1608 |
ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4) |
|
|
1609 |
# Preload matrix |
|
|
1610 |
sep <- switch(EXPR = type, 'inform' = '\t', ',') |
|
|
1611 |
mtx <- data.table::fread( |
|
|
1612 |
file = filename, |
|
|
1613 |
sep = sep, |
|
|
1614 |
data.table = FALSE, |
|
|
1615 |
verbose = FALSE |
|
|
1616 |
) |
|
|
1617 |
# Assemble outputs |
|
|
1618 |
outs <- switch( |
|
|
1619 |
EXPR = type, |
|
|
1620 |
'processor' = { |
|
|
1621 |
# Create centroids data frame |
|
|
1622 |
centroids <- data.frame( |
|
|
1623 |
x = mtx[['x:x']], |
|
|
1624 |
y = mtx[['y:y']], |
|
|
1625 |
cell = as.character(x = mtx[['cell_id:cell_id']]), |
|
|
1626 |
stringsAsFactors = FALSE |
|
|
1627 |
) |
|
|
1628 |
rownames(x = mtx) <- as.character(x = mtx[['cell_id:cell_id']]) |
|
|
1629 |
# Create metadata data frame |
|
|
1630 |
md <- mtx[, !grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE] |
|
|
1631 |
colnames(x = md) <- vapply( |
|
|
1632 |
X = strsplit(x = colnames(x = md), split = ':'), |
|
|
1633 |
FUN = '[[', |
|
|
1634 |
FUN.VALUE = character(length = 1L), |
|
|
1635 |
2L |
|
|
1636 |
) |
|
|
1637 |
# Create expression matrix |
|
|
1638 |
mtx <- mtx[, grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE] |
|
|
1639 |
colnames(x = mtx) <- vapply( |
|
|
1640 |
X = strsplit(x = colnames(x = mtx), split = ':'), |
|
|
1641 |
FUN = '[[', |
|
|
1642 |
FUN.VALUE = character(length = 1L), |
|
|
1643 |
2L |
|
|
1644 |
) |
|
|
1645 |
if (!is.na(x = filter)) { |
|
|
1646 |
mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE] |
|
|
1647 |
} |
|
|
1648 |
mtx <- t(x = mtx) |
|
|
1649 |
if ((sum(mtx == 0) / length(x = mtx)) > ratio) { |
|
|
1650 |
# mtx <- as.sparse(x = mtx) |
|
|
1651 |
mtx <- methods::as(mtx, 'CsparseMatrix') |
|
|
1652 |
} |
|
|
1653 |
list(matrix = mtx, centroids = centroids, metadata = md) |
|
|
1654 |
}, |
|
|
1655 |
'inform' = { |
|
|
1656 |
inform.quant <- tolower(x = inform.quant[1L]) |
|
|
1657 |
inform.quant <- match.arg(arg = inform.quant) |
|
|
1658 |
expr.key <- c( |
|
|
1659 |
mean = 'Mean', |
|
|
1660 |
total = 'Total', |
|
|
1661 |
min = 'Min', |
|
|
1662 |
max = 'Max', |
|
|
1663 |
std = 'Std Dev' |
|
|
1664 |
)[inform.quant] |
|
|
1665 |
expr.pattern <- '\\(Normalized Counts, Total Weighting\\)' |
|
|
1666 |
rownames(x = mtx) <- mtx[['Cell ID']] |
|
|
1667 |
mtx <- mtx[, setdiff(x = colnames(x = mtx), y = 'Cell ID'), drop = FALSE] |
|
|
1668 |
# Create centroids |
|
|
1669 |
centroids <- data.frame( |
|
|
1670 |
x = mtx[['Cell X Position']], |
|
|
1671 |
y = mtx[['Cell Y Position']], |
|
|
1672 |
cell = rownames(x = mtx), |
|
|
1673 |
stringsAsFactors = FALSE |
|
|
1674 |
) |
|
|
1675 |
# Create metadata |
|
|
1676 |
cols <- setdiff( |
|
|
1677 |
x = grep( |
|
|
1678 |
pattern = expr.pattern, |
|
|
1679 |
x = colnames(x = mtx), |
|
|
1680 |
value = TRUE, |
|
|
1681 |
invert = TRUE |
|
|
1682 |
), |
|
|
1683 |
y = paste('Cell', c('X', 'Y'), 'Position') |
|
|
1684 |
) |
|
|
1685 |
md <- mtx[, cols, drop = FALSE] |
|
|
1686 |
# Create expression matrices |
|
|
1687 |
exprs <- data.frame( |
|
|
1688 |
cols = grep( |
|
|
1689 |
pattern = paste(expr.key, expr.pattern), |
|
|
1690 |
x = colnames(x = mtx), |
|
|
1691 |
value = TRUE |
|
|
1692 |
) |
|
|
1693 |
) |
|
|
1694 |
exprs$feature <- vapply( |
|
|
1695 |
X = trimws(x = gsub( |
|
|
1696 |
pattern = paste(expr.key, expr.pattern), |
|
|
1697 |
replacement = '', |
|
|
1698 |
x = exprs$cols |
|
|
1699 |
)), |
|
|
1700 |
FUN = function(x) { |
|
|
1701 |
x <- unlist(x = strsplit(x = x, split = ' ')) |
|
|
1702 |
x <- x[length(x = x)] |
|
|
1703 |
return(gsub(pattern = '\\(|\\)', replacement = '', x = x)) |
|
|
1704 |
}, |
|
|
1705 |
FUN.VALUE = character(length = 1L) |
|
|
1706 |
) |
|
|
1707 |
exprs$class <- tolower(x = vapply( |
|
|
1708 |
X = strsplit(x = exprs$cols, split = ' '), |
|
|
1709 |
FUN = '[[', |
|
|
1710 |
FUN.VALUE = character(length = 1L), |
|
|
1711 |
1L |
|
|
1712 |
)) |
|
|
1713 |
classes <- unique(x = exprs$class) |
|
|
1714 |
outs <- vector( |
|
|
1715 |
mode = 'list', |
|
|
1716 |
length = length(x = classes) + 2L |
|
|
1717 |
) |
|
|
1718 |
names(x = outs) <- c( |
|
|
1719 |
'matrix', |
|
|
1720 |
'centroids', |
|
|
1721 |
'metadata', |
|
|
1722 |
setdiff(x = classes, y = 'entire') |
|
|
1723 |
) |
|
|
1724 |
outs$centroids <- centroids |
|
|
1725 |
outs$metadata <- md |
|
|
1726 |
for (i in classes) { |
|
|
1727 |
df <- exprs[exprs$class == i, , drop = FALSE] |
|
|
1728 |
expr <- mtx[, df$cols] |
|
|
1729 |
colnames(x = expr) <- df$feature |
|
|
1730 |
if (!is.na(x = filter)) { |
|
|
1731 |
expr <- expr[, !grepl(pattern = filter, x = colnames(x = expr)), drop = FALSE] |
|
|
1732 |
} |
|
|
1733 |
expr <- t(x = expr) |
|
|
1734 |
if ((sum(expr == 0, na.rm = TRUE) / length(x = expr)) > ratio) { |
|
|
1735 |
# expr <- as.sparse(x = expr) |
|
|
1736 |
expr <- methods::as(expr, 'CsparseMatrix') |
|
|
1737 |
} |
|
|
1738 |
outs[[switch(EXPR = i, 'entire' = 'matrix', i)]] <- expr |
|
|
1739 |
} |
|
|
1740 |
outs |
|
|
1741 |
}, |
|
|
1742 |
'qupath' = { |
|
|
1743 |
rownames(x = mtx) <- as.character(x = seq_len(length.out = nrow(x = mtx))) |
|
|
1744 |
# Create centroids |
|
|
1745 |
xpos <- sort( |
|
|
1746 |
x = grep(pattern = 'Centroid X', x = colnames(x = mtx), value = TRUE), |
|
|
1747 |
decreasing = TRUE |
|
|
1748 |
)[1L] |
|
|
1749 |
ypos <- sort( |
|
|
1750 |
x = grep(pattern = 'Centroid Y', x = colnames(x = mtx), value = TRUE), |
|
|
1751 |
decreasing = TRUE |
|
|
1752 |
)[1L] |
|
|
1753 |
centroids <- data.frame( |
|
|
1754 |
x = mtx[[xpos]], |
|
|
1755 |
y = mtx[[ypos]], |
|
|
1756 |
cell = rownames(x = mtx), |
|
|
1757 |
stringsAsFactors = FALSE |
|
|
1758 |
) |
|
|
1759 |
# Create metadata |
|
|
1760 |
cols <- setdiff( |
|
|
1761 |
x = grep( |
|
|
1762 |
pattern = 'Cell: Mean', |
|
|
1763 |
x = colnames(x = mtx), |
|
|
1764 |
ignore.case = TRUE, |
|
|
1765 |
value = TRUE, |
|
|
1766 |
invert = TRUE |
|
|
1767 |
), |
|
|
1768 |
y = c(xpos, ypos) |
|
|
1769 |
) |
|
|
1770 |
md <- mtx[, cols, drop = FALSE] |
|
|
1771 |
# Create expression matrix |
|
|
1772 |
idx <- which(x = grepl( |
|
|
1773 |
pattern = 'Cell: Mean', |
|
|
1774 |
x = colnames(x = mtx), |
|
|
1775 |
ignore.case = TRUE |
|
|
1776 |
)) |
|
|
1777 |
mtx <- mtx[, idx, drop = FALSE] |
|
|
1778 |
colnames(x = mtx) <- vapply( |
|
|
1779 |
X = strsplit(x = colnames(x = mtx), split = ':'), |
|
|
1780 |
FUN = '[[', |
|
|
1781 |
FUN.VALUE = character(length = 1L), |
|
|
1782 |
1L |
|
|
1783 |
) |
|
|
1784 |
if (!is.na(x = filter)) { |
|
|
1785 |
mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE] |
|
|
1786 |
} |
|
|
1787 |
mtx <- t(x = mtx) |
|
|
1788 |
if ((sum(mtx == 0) / length(x = mtx)) > ratio) { |
|
|
1789 |
# mtx <- as.sparse(x = mtx) |
|
|
1790 |
mtx <- methods::as(mtx, 'CsparseMatrix') |
|
|
1791 |
} |
|
|
1792 |
list(matrix = mtx, centroids = centroids, metadata = md) |
|
|
1793 |
}, |
|
|
1794 |
stop("Unknown matrix type: ", type) |
|
|
1795 |
) |
|
|
1796 |
return(outs) |
|
|
1797 |
} |
|
|
1798 |
|
|
|
1799 |
#### |
|
|
1800 |
# Non-Commercial #### |
|
|
1801 |
#### |
|
|
1802 |
|
|
|
1803 |
#### |
|
|
1804 |
## OpenST #### |
|
|
1805 |
#### |
|
|
1806 |
|
|
|
1807 |
#' importOpenST |
|
|
1808 |
#' |
|
|
1809 |
#' Importing OpenST data |
|
|
1810 |
#' |
|
|
1811 |
#' @param h5ad.path path to h5ad file of STOmics output |
|
|
1812 |
#' @param assay_name the assay name |
|
|
1813 |
#' @param sample_name the name of the sample |
|
|
1814 |
#' @param image_name the image name of the Visium assay, Default: main |
|
|
1815 |
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E |
|
|
1816 |
#' @param verbose verbose |
|
|
1817 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1818 |
#' |
|
|
1819 |
#' @importFrom methods as |
|
|
1820 |
#' @importFrom Matrix t |
|
|
1821 |
#' |
|
|
1822 |
#' @export |
|
|
1823 |
importOpenST <- function(h5ad.path, assay_name = "OpenST", sample_name = NULL, image_name = "main", channel_name = "H&E", verbose = TRUE, ...) |
|
|
1824 |
{ |
|
|
1825 |
# check package |
|
|
1826 |
if(!requireNamespace('anndataR')) |
|
|
1827 |
stop("Please install anndataR package!: devtools::install_github('scverse/anndataR')") |
|
|
1828 |
|
|
|
1829 |
# get h5ad data |
|
|
1830 |
stdata <- anndataR::read_h5ad(h5ad.path, to = "HDF5AnnData") |
|
|
1831 |
|
|
|
1832 |
# observation and feature names |
|
|
1833 |
obs_names <- stdata$obs_names |
|
|
1834 |
var_names <- stdata$var_names |
|
|
1835 |
|
|
|
1836 |
# raw counts |
|
|
1837 |
rawdata <- Matrix::t(stdata$X) |
|
|
1838 |
rownames(rawdata) <- var_names |
|
|
1839 |
colnames(rawdata) <- obs_names |
|
|
1840 |
rawdata <- methods::as(rawdata, 'CsparseMatrix') |
|
|
1841 |
|
|
|
1842 |
# metadata |
|
|
1843 |
metadata <- stdata$obs |
|
|
1844 |
rownames(metadata) <- obs_names |
|
|
1845 |
|
|
|
1846 |
# coordinates |
|
|
1847 |
coords <- stdata$obsm$spatial_3d_aligned |
|
|
1848 |
rownames(coords) <- obs_names |
|
|
1849 |
zlocation <- unique(coords[,3]) |
|
|
1850 |
|
|
|
1851 |
# get individual sections as voltron data |
|
|
1852 |
sections <- unique(metadata$n_section) |
|
|
1853 |
zlocation <- zlocation[order(sections)] |
|
|
1854 |
connectivity <- data.frame(Var1 = rep(seq_len(length(sections)), length(sections)), |
|
|
1855 |
Var2 = rep(seq_len(length(sections)), each = length(sections))) |
|
|
1856 |
sections <- sections[order(sections)] |
|
|
1857 |
vr_data_list <- list() |
|
|
1858 |
if(verbose) |
|
|
1859 |
message("Creating Layers ...") |
|
|
1860 |
for(i in seq_len(length(sections))){ |
|
|
1861 |
ind <- metadata$n_section == sections[i] |
|
|
1862 |
spatialpoints <- rownames(metadata[metadata$n_section == sections[i],]) |
|
|
1863 |
cur_data <- rawdata[,spatialpoints] |
|
|
1864 |
cur_metadata <- metadata[spatialpoints,] |
|
|
1865 |
cur_coords <- coords[ind,c(1,2)] |
|
|
1866 |
rownames(cur_coords) <- spatialpoints |
|
|
1867 |
vr_data_list[[i]] <- formVoltRon(data = cur_data, metadata = cur_metadata, coords = cur_coords, |
|
|
1868 |
main.assay = assay_name, sample_name = paste0("Section", sections[i]), |
|
|
1869 |
image_name = image_name, main_channel = channel_name, feature_name = "RNA", ...) |
|
|
1870 |
} |
|
|
1871 |
|
|
|
1872 |
# create VoltRon |
|
|
1873 |
sample_name <- ifelse(is.null(sample_name), "Sample", sample_name) |
|
|
1874 |
vr_data <- mergeVoltRon(vr_data_list[[1]], vr_data_list[-1], samples = sample_name) |
|
|
1875 |
|
|
|
1876 |
# set zlocations and adjacency of layer in the vrBlock |
|
|
1877 |
vr_data <- addBlockConnectivity(vr_data, connectivity = connectivity, zlocation = zlocation, sample = sample_name) |
|
|
1878 |
|
|
|
1879 |
# return |
|
|
1880 |
vr_data |
|
|
1881 |
} |
|
|
1882 |
|
|
|
1883 |
#### |
|
|
1884 |
## DBIT-Seq #### |
|
|
1885 |
#### |
|
|
1886 |
|
|
|
1887 |
#' importDBITSeq |
|
|
1888 |
#' |
|
|
1889 |
#' Importing DBIT-Seq data |
|
|
1890 |
#' |
|
|
1891 |
#' @param path.rna path to rna count matrix |
|
|
1892 |
#' @param path.prot path to protein count matrix |
|
|
1893 |
#' @param size the size of the in situ pixel (defualt is 10 (micron)) |
|
|
1894 |
#' @param assay_name the assay name |
|
|
1895 |
#' @param sample_name the name of the sample |
|
|
1896 |
#' @param image_name the image name of the Visium assay, Default: main |
|
|
1897 |
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E |
|
|
1898 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1899 |
#' |
|
|
1900 |
#' @importFrom utils read.table |
|
|
1901 |
#' |
|
|
1902 |
#' @export |
|
|
1903 |
importDBITSeq <- function(path.rna, path.prot = NULL, size = 10, assay_name = "DBIT-Seq", sample_name = NULL, image_name = "main", channel_name = "H&E", ...) |
|
|
1904 |
{ |
|
|
1905 |
# count matrix RNA |
|
|
1906 |
rnadata <- utils::read.table(path.rna, header = TRUE, sep = "\t", row.names = 1) |
|
|
1907 |
rnadata <- t(as.matrix(rnadata)) |
|
|
1908 |
|
|
|
1909 |
# count matrix Protein |
|
|
1910 |
protdata <- utils::read.table(path.prot, header = TRUE, sep = "\t", row.names = 1) |
|
|
1911 |
protdata <- t(as.matrix(protdata)) |
|
|
1912 |
|
|
|
1913 |
# coords |
|
|
1914 |
coords <- sapply(colnames(rnadata), function(x) as.numeric(strsplit(x, split = "x")[[1]])) |
|
|
1915 |
coords <- t(coords) |
|
|
1916 |
colnames(coords) <- c("x", "y") |
|
|
1917 |
|
|
|
1918 |
# get DBIT-Seq parameters |
|
|
1919 |
params <- list( |
|
|
1920 |
spot.radius = size/2, |
|
|
1921 |
vis.spot.radius = size, |
|
|
1922 |
nearestpost.distance = size*2*sqrt(2)) |
|
|
1923 |
coords <- coords*size*3/2 |
|
|
1924 |
|
|
|
1925 |
# make voltron object |
|
|
1926 |
object <- formVoltRon(data = rnadata, coords = coords, image = NULL, assay.type = "spot", params = params, image_name = "main", |
|
|
1927 |
main.assay = assay_name, sample_name = sample_name, feature_name = "RNA", ...) |
|
|
1928 |
|
|
|
1929 |
# add protein assay |
|
|
1930 |
if(!is.null(path.prot)){ |
|
|
1931 |
|
|
|
1932 |
# # create protein assay |
|
|
1933 |
# new_assay <- formAssay(data = protdata, |
|
|
1934 |
# coords = coords, |
|
|
1935 |
# type = "spot") |
|
|
1936 |
# new_assay@image <- object[["Assay1"]]@image |
|
|
1937 |
# sample.metadata <- SampleMetadata(object) |
|
|
1938 |
|
|
|
1939 |
# # add new assay |
|
|
1940 |
# object <- addAssay(object, |
|
|
1941 |
# assay = new_assay, |
|
|
1942 |
# metadata = Metadata(object), |
|
|
1943 |
# assay_name = paste(assay_name, "Prot", sep = "-"), |
|
|
1944 |
# sample = sample.metadata["Assay1", "Sample"], |
|
|
1945 |
# layer = sample.metadata["Assay1", "Layer"]) |
|
|
1946 |
# |
|
|
1947 |
# # add connectivity of spatial points across assays |
|
|
1948 |
# connectivity <- cbind(vrSpatialPoints(object, assay = "Assay1"), |
|
|
1949 |
# vrSpatialPoints(object, assay = "Assay2")) |
|
|
1950 |
# object <- addConnectivity(object, |
|
|
1951 |
# connectivity = connectivity, |
|
|
1952 |
# sample = sample.metadata["Assay1", "Sample"], |
|
|
1953 |
# layer = sample.metadata["Assay1", "Layer"]) |
|
|
1954 |
|
|
|
1955 |
# add negative probe assay as new feature set |
|
|
1956 |
object <- addFeature(object, assay = assay_name, data = protdata, feature_name = "Protein") |
|
|
1957 |
|
|
|
1958 |
} |
|
|
1959 |
|
|
|
1960 |
# return |
|
|
1961 |
return(object) |
|
|
1962 |
} |
|
|
1963 |
|
|
|
1964 |
#### |
|
|
1965 |
# Image Data #### |
|
|
1966 |
#### |
|
|
1967 |
|
|
|
1968 |
#' importImageData |
|
|
1969 |
#' |
|
|
1970 |
#' import an image as VoltRon object |
|
|
1971 |
#' |
|
|
1972 |
#' @param image a single or a list of image paths or magick-image objects |
|
|
1973 |
#' @param tile.size the size of tiles |
|
|
1974 |
#' @param segments Either a list of segments or a GeoJSON file. This will result in a second assay in the VoltRon object to be created |
|
|
1975 |
#' @param image_name the image name of the Image assay, Default: main |
|
|
1976 |
#' @param channel_names the channel names of the images if multiple images are provided |
|
|
1977 |
#' @param ... additional parameters passed to \link{formVoltRon} |
|
|
1978 |
#' |
|
|
1979 |
#' @importFrom magick image_read image_info |
|
|
1980 |
#' @importFrom data.table data.table |
|
|
1981 |
#' |
|
|
1982 |
#' @examples |
|
|
1983 |
#' # single image |
|
|
1984 |
#' imgfile <- system.file("extdata", "DAPI.tif", package = "VoltRon") |
|
|
1985 |
#' vrdata <- importImageData(imgfile, image_name = "main") |
|
|
1986 |
#' |
|
|
1987 |
#' # multiple images |
|
|
1988 |
#' imgfile <- c(system.file("extdata", "DAPI.tif", package = "VoltRon"), |
|
|
1989 |
#' system.file("extdata", "DAPI.tif", package = "VoltRon")) |
|
|
1990 |
#' vrdata <- importImageData(imgfile, image_name = "main", channel_name = c("DAPI", "DAPI2")) |
|
|
1991 |
#' |
|
|
1992 |
#' @export |
|
|
1993 |
importImageData <- function(image, tile.size = 10, segments = NULL, image_name = "main", channel_names = NULL, ...){ |
|
|
1994 |
|
|
|
1995 |
# images and channel names |
|
|
1996 |
if(!is.null(channel_names)){ |
|
|
1997 |
if(length(image) != length(channel_names)) |
|
|
1998 |
stop("Provided channel names should of the same length as the images!") |
|
|
1999 |
if(any(!is.character(channel_names))) |
|
|
2000 |
stop("Invalid channel names!") |
|
|
2001 |
} |
|
|
2002 |
|
|
|
2003 |
# get image |
|
|
2004 |
if(!is.list(image)){} |
|
|
2005 |
image <- as.list(image) |
|
|
2006 |
image <- sapply(image, function(img){ |
|
|
2007 |
if(!inherits(img, "magick-image")){ |
|
|
2008 |
if(!is.character(img)){ |
|
|
2009 |
stop("image should either be a magick-image object or a file.path") |
|
|
2010 |
} else{ |
|
|
2011 |
if(file.exists(img)){ |
|
|
2012 |
img <- magick::image_read(img) |
|
|
2013 |
} else { |
|
|
2014 |
stop(img, " is not found!") |
|
|
2015 |
} |
|
|
2016 |
} |
|
|
2017 |
} |
|
|
2018 |
img |
|
|
2019 |
}, USE.NAMES = TRUE, simplify = FALSE) |
|
|
2020 |
|
|
|
2021 |
# channel names |
|
|
2022 |
if(!is.null(channel_names)){ |
|
|
2023 |
names(image) <- channel_names |
|
|
2024 |
} |
|
|
2025 |
|
|
|
2026 |
# check image size |
|
|
2027 |
imageinfo <- vapply(image, function(img) { |
|
|
2028 |
info <- magick::image_info(img) |
|
|
2029 |
c(info$width, info$height) |
|
|
2030 |
}, numeric(2)) |
|
|
2031 |
unique_width <- unique(imageinfo[1,]) |
|
|
2032 |
unique_height <- unique(imageinfo[2,]) |
|
|
2033 |
if(length(unique_width) == 1 && length(unique_height) == 1){ |
|
|
2034 |
imageinfo <- list(width = imageinfo[1,1], height = imageinfo[2,1]) |
|
|
2035 |
} |
|
|
2036 |
|
|
|
2037 |
# coordinates |
|
|
2038 |
even_odd_correction <- (!tile.size%%2)*(0.5) |
|
|
2039 |
x_coords <- seq((tile.size/2) + even_odd_correction, imageinfo$width, tile.size)[seq_len(imageinfo$width %/% tile.size)] |
|
|
2040 |
y_coords <- seq((tile.size/2) + even_odd_correction, imageinfo$height, tile.size)[seq_len(imageinfo$height %/% tile.size)] |
|
|
2041 |
y_coords <- rev(y_coords) |
|
|
2042 |
coords <- as.matrix(expand.grid(x_coords, y_coords)) |
|
|
2043 |
colnames(coords) <- c("x", "y") |
|
|
2044 |
rownames(coords) <- paste0("tile", seq_len(nrow(coords))) |
|
|
2045 |
|
|
|
2046 |
# metadata |
|
|
2047 |
metadata <- data.table::data.table(id = rownames(coords)) |
|
|
2048 |
|
|
|
2049 |
# create voltron object with tiles |
|
|
2050 |
object <- formVoltRon(data = NULL, metadata = metadata, image = image, coords, main.assay = "ImageData", assay.type = "tile", params = list(tile.size = tile.size), image_name = image_name, ...) |
|
|
2051 |
|
|
|
2052 |
# check if segments are defined |
|
|
2053 |
if(is.null(segments)){ |
|
|
2054 |
return(object) |
|
|
2055 |
} else { |
|
|
2056 |
|
|
|
2057 |
# check if segments are paths |
|
|
2058 |
if(inherits(segments, "character")){ |
|
|
2059 |
if(grepl(".geojson$", segments)){ |
|
|
2060 |
segments <- generateSegments(geojson.file = segments) |
|
|
2061 |
} else { |
|
|
2062 |
stop("Only lists or GeoJSON files are accepted as segments input!") |
|
|
2063 |
} |
|
|
2064 |
} |
|
|
2065 |
|
|
|
2066 |
# make coordinates out of segments |
|
|
2067 |
coords <- t(vapply(segments, function(dat){ |
|
|
2068 |
apply(dat[,c("x", "y")], 2, mean) |
|
|
2069 |
}, numeric(2))) |
|
|
2070 |
rownames(coords) <- names(segments) |
|
|
2071 |
|
|
|
2072 |
# make segment assay |
|
|
2073 |
assay <- formAssay(coords = coords, segments = segments, image = image, type = "ROI", main_image = image_name) |
|
|
2074 |
|
|
|
2075 |
# add segments as assay |
|
|
2076 |
sample_metadata <- SampleMetadata(object) |
|
|
2077 |
object <- addAssay(object, |
|
|
2078 |
assay = assay, |
|
|
2079 |
metadata = data.frame(row.names = paste0(names(segments), "_Assay2")), |
|
|
2080 |
assay_name = "ROIAnnotation", |
|
|
2081 |
sample = sample_metadata$Sample, |
|
|
2082 |
layer = sample_metadata$Layer) |
|
|
2083 |
|
|
|
2084 |
# return |
|
|
2085 |
return(object) |
|
|
2086 |
} |
|
|
2087 |
} |
|
|
2088 |
|
|
|
2089 |
|
|
|
2090 |
#' generateSegments |
|
|
2091 |
#' |
|
|
2092 |
#' The function to import segments from a geojson file |
|
|
2093 |
#' |
|
|
2094 |
#' @param geojson.file the GeoJSON file, typically generated by QuPath software |
|
|
2095 |
#' |
|
|
2096 |
#' @importFrom rjson fromJSON |
|
|
2097 |
#' @importFrom dplyr tibble |
|
|
2098 |
#' |
|
|
2099 |
#' @export |
|
|
2100 |
generateSegments <- function(geojson.file){ |
|
|
2101 |
|
|
|
2102 |
# get segments |
|
|
2103 |
if(inherits(geojson.file, "character")){ |
|
|
2104 |
if(file.exists(geojson.file)){ |
|
|
2105 |
segments <- rjson::fromJSON(file = geojson.file) |
|
|
2106 |
} else { |
|
|
2107 |
stop("geojson.file doesn't exist!") |
|
|
2108 |
} |
|
|
2109 |
} else { |
|
|
2110 |
stop("geojson.file should be the path to the GeoJSON file!") |
|
|
2111 |
} |
|
|
2112 |
|
|
|
2113 |
# parse polygons as segments/ROIs |
|
|
2114 |
segments <- lapply(segments, function(x){ |
|
|
2115 |
type <- x$geometry$type |
|
|
2116 |
poly <- x$geometry$coordinates |
|
|
2117 |
if(grepl("Polygon", type)){ |
|
|
2118 |
poly <- as.data.frame(matrix(unlist(poly[[1]]), ncol = 2, byrow = TRUE)) |
|
|
2119 |
} |
|
|
2120 |
colnames(poly) <- c("x", "y") |
|
|
2121 |
dplyr::tibble(poly) |
|
|
2122 |
}) |
|
|
2123 |
|
|
|
2124 |
# attach names to segments |
|
|
2125 |
segments <- mapply(function(x, sgt){ |
|
|
2126 |
dplyr::tibble(data.frame(id = x, sgt)) |
|
|
2127 |
}, seq_len(length(segments)), segments, SIMPLIFY = FALSE) |
|
|
2128 |
|
|
|
2129 |
# generate ROI names |
|
|
2130 |
names(segments) <- paste0("ROI", seq_len(length(segments))) |
|
|
2131 |
|
|
|
2132 |
# return |
|
|
2133 |
return(segments) |
|
|
2134 |
} |
|
|
2135 |
|
|
|
2136 |
#' generateGeoJSON |
|
|
2137 |
#' |
|
|
2138 |
#' generating geojson files from segments |
|
|
2139 |
#' |
|
|
2140 |
#' @param segments the segments, typically from \link{vrSegments}. |
|
|
2141 |
#' @param file the GeoJSON file, typically to be used by QuPath software. |
|
|
2142 |
#' |
|
|
2143 |
#' @importFrom rjson fromJSON |
|
|
2144 |
#' |
|
|
2145 |
#' @export |
|
|
2146 |
generateGeoJSON <- function(segments, file){ |
|
|
2147 |
|
|
|
2148 |
if(!requireNamespace('geojsonR')) |
|
|
2149 |
stop("Please install geojsonR package for using geojsonR functions") |
|
|
2150 |
|
|
|
2151 |
# get segments |
|
|
2152 |
if(!inherits(file, "character")){ |
|
|
2153 |
stop("file should be the path to the GeoJSON file!") |
|
|
2154 |
} |
|
|
2155 |
|
|
|
2156 |
# reshape segments |
|
|
2157 |
segments <- mapply(function(id, sgt){ |
|
|
2158 |
poly <- na.omit(as.matrix(sgt[,c("x", "y")])) |
|
|
2159 |
poly <- rbind(poly, poly[1,,drop = FALSE]) |
|
|
2160 |
poly <- as.list(data.frame(t(poly))) |
|
|
2161 |
names(poly) <- NULL |
|
|
2162 |
init <- geojsonR::TO_GeoJson$new() |
|
|
2163 |
geometry <- init$Polygon(list(poly), stringify = TRUE) |
|
|
2164 |
feature <- list(type = "Feature", |
|
|
2165 |
id = id, |
|
|
2166 |
geometry = geometry[!names(geometry) %in% "json_dump"], |
|
|
2167 |
properties = list(objectType = "annotation")) |
|
|
2168 |
feature |
|
|
2169 |
}, names(segments), segments, SIMPLIFY = FALSE, USE.NAMES = FALSE) |
|
|
2170 |
|
|
|
2171 |
# save as json |
|
|
2172 |
segments <- rjson::toJSON(segments) |
|
|
2173 |
write(segments, file = file) |
|
|
2174 |
} |