Diff of /R/import.R [000000] .. [413088]

Switch to unified view

a b/R/import.R
1
####
2
# 10X Genomics ####
3
####
4
5
####
6
## Xenium ####
7
####
8
9
#' importXenium
10
#'
11
#' Importing Xenium data
12
#'
13
#' @param dir.path path to Xenium output folder
14
#' @param selected_assay selected assay from Xenium
15
#' @param assay_name the assay name of the SR object
16
#' @param sample_name the name of the sample
17
#' @param use_image if TRUE, the DAPI image will be used.
18
#' @param morphology_image the name of the lowred morphology image. Default: morphology_lowres.tif
19
#' @param resolution_level the level of resolution within Xenium OME-TIFF image, see \link{generateXeniumImage}. Default: 7 (553x402)
20
#' @param overwrite_resolution if TRUE, the image "file.name" will be generated again although it exists at "dir.path"
21
#' @param image_name the image name of the Xenium assay, Default: main
22
#' @param channel_name the channel name of the image of the Xenium assay, Default: DAPI
23
#' @param import_molecules if TRUE, molecule assay will be created along with cell assay.
24
#' @param verbose verbose
25
#' @param ... additional parameters passed to \link{formVoltRon}
26
#'
27
#' @importFrom magick image_read image_info
28
#' @importFrom utils read.csv
29
#' @importFrom data.table fread
30
#' @importFrom ids random_id
31
#'
32
#' @export
33
#'
34
importXenium <- function (dir.path, selected_assay = "Gene Expression", assay_name = "Xenium", sample_name = NULL, use_image = TRUE, 
35
                          morphology_image = "morphology_lowres.tif", resolution_level = 7, overwrite_resolution = FALSE, 
36
                          image_name = "main", channel_name = "DAPI", import_molecules = FALSE, verbose = TRUE, ...)
37
{
38
  # cell assay
39
  if(verbose)
40
    message("Creating cell level assay ...")
41
42
  # raw counts
43
  datafile <- paste0(dir.path, "/cell_feature_matrix.h5")
44
  if(file.exists(datafile)){
45
    rawdata <- import10Xh5(filename = datafile)
46
    if(any(names(rawdata) %in% selected_assay)){
47
      rawdata <- as.matrix(rawdata[[selected_assay]])
48
    } else {
49
      stop("There are no assays called ", selected_assay, " in the h5 file!")
50
    }
51
  } else {
52
    stop("There are no files named 'cell_feature_matrix.h5' in the path")
53
  }
54
55
  # image
56
  if(use_image){
57
    generateXeniumImage(dir.path, file.name = morphology_image, resolution_level = resolution_level, overwrite_resolution = overwrite_resolution,
58
                        verbose = verbose)
59
    image_file <- paste0(dir.path, "/", morphology_image)
60
    if(file.exists(image_file)){
61
      image <-  image_read(image_file)
62
    } else {
63
      stop("There are no spatial image files in the path")
64
    }
65
    # scale the xenium image instructed by 10x Genomics help page
66
    scaleparam <- 0.2125*(2^(resolution_level-1))
67
  } else {
68
    image <- NULL
69
  }
70
71
  # coordinates
72
  coord_file <- paste0(dir.path, "/cells.csv.gz")
73
  if(file.exists(coord_file)){
74
    Xenium_coords <- utils::read.csv(file = coord_file)
75
    coords <- as.matrix(Xenium_coords[,c("x_centroid", "y_centroid")])
76
    colnames(coords) <- c("x","y")
77
    if(use_image) {
78
      coords <- coords/scaleparam
79
      imageinfo <- unlist(magick::image_info(image)[c("height")])
80
      range_coords <- c(0,imageinfo)
81
    } else {
82
      range_coords <- range(coords[,2])
83
    }
84
    coords[,2] <- range_coords[2] - coords[,2] + range_coords[1]
85
  } else {
86
    stop("There are no file named 'cells.csv.gz' in the path")
87
  }
88
89
  # segments
90
  segments_file <- paste0(dir.path, "/cell_boundaries.csv.gz")
91
  if(file.exists(segments_file)){
92
    segments <- as.data.frame(data.table::fread(segments_file))
93
    segments <- segments[,c("cell_id", "vertex_x", "vertex_y")]
94
    colnames(segments) <- c("cell_id", "x", "y")
95
    if(use_image){
96
      segments[,c("x","y")] <- segments[,c("x","y")]/scaleparam
97
    }
98
    segments[,"y"] <- range_coords[2] - segments[,"y"]  + range_coords[1]
99
    segments <- segments %>% dplyr::group_split(cell_id)
100
    segments <- as.list(segments)
101
    names(segments) <- rownames(coords)
102
  } else {
103
    stop("There are no file named 'cell_boundaries.csv.gz' in the path")
104
  }
105
106
  # create VoltRon object for cells
107
  cell_object <- formVoltRon(rawdata, metadata = NULL, image = image, coords, segments = segments, main.assay = assay_name, 
108
                             assay.type = "cell", image_name = image_name, main_channel = channel_name, sample_name = sample_name, 
109
                             feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...)
110
111
  # molecule assay
112
  if(!import_molecules){
113
    return(cell_object)
114
  } else {
115
    if(verbose)
116
      message("Creating molecule level assay ...")
117
    # transcripts
118
    transcripts_file <- paste0(dir.path, "/transcripts.csv.gz")
119
    if(!file.exists(transcripts_file)){
120
      stop("There are no file named 'transcripts.csv.gz' in the path")
121
    } else {
122
      # get subcellur data components
123
      subcellular_data <- data.table::fread(transcripts_file)
124
      subcellular_data <- subcellular_data[,c("transcript_id", colnames(subcellular_data)[!colnames(subcellular_data) %in% "transcript_id"]), with = FALSE]
125
      colnames(subcellular_data)[colnames(subcellular_data)=="transcript_id"] <- "id"
126
      colnames(subcellular_data)[colnames(subcellular_data)=="feature_name"] <- "gene"
127
      subcellular_data <- subcellular_data[subcellular_data$qv >= 20, ]
128
129
      # coordinates
130
      coords <- as.matrix(subcellular_data[,c("x_location", "y_location", "z_location")])
131
      colnames(coords) <- c("x", "y", "z")
132
      if(use_image){
133
        coords[,c("x", "y")] <- coords[,c("x", "y")]/scaleparam
134
      }
135
      coords[,"y"] <- range_coords[2] - coords[,"y"]  + range_coords[1]
136
137
      # metadata
138
      # mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell_id", "transcript_id", "x_location", "y_location")], with = FALSE]
139
      mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell_id", "transcript_id", "x_location", "y_location", "z_location")], with = FALSE]
140
      set.seed(nrow(mol_metadata$id))
141
      mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE))
142
      mol_metadata$assay_id <- "Assay1"
143
      mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")]
144
145
      # coord names
146
      rownames(coords) <- mol_metadata$id
147
148
      # create VoltRon assay for molecules
149
      mol_assay <- formAssay(coords = coords, image = image, type = "molecule", main_image = image_name, main_channel = channel_name)
150
151
      # merge assays in one section
152
      if(verbose)
153
        message("Merging assays ...")
154
      sample.metadata <- SampleMetadata(cell_object)
155
      object <- addAssay(cell_object,
156
                         assay = mol_assay,
157
                         metadata = mol_metadata,
158
                         assay_name = paste0(assay_name, "_mol"),
159
                         sample = sample.metadata["Assay1", "Sample"],
160
                         layer = sample.metadata["Assay1", "Layer"])
161
162
      # connectivity
163
      connectivity <- data.table::data.table(transcript_id = mol_metadata$id, cell_id = subcellular_data[["cell_id"]])
164
      if(is.numeric(connectivity$cell_id)){
165
        connectivity <- subset(connectivity, cell_id != -1)
166
        connectivity[["cell_id"]] <- vrSpatialPoints(cell_object)[connectivity[["cell_id"]]]
167
      } else {
168
        connectivity <- subset(connectivity, cell_id != "UNASSIGNED")
169
        connectivity$cell_assay_id <- "_Assay1"
170
        connectivity[, cell_id:=do.call(paste0,.SD), .SDcols=c("cell_id", "cell_assay_id")]
171
        connectivity$cell_assay_id <- NULL
172
      }
173
174
      # add connectivity of spatial points across assays
175
      object <- addLayerConnectivity(object,
176
                                connectivity = connectivity,
177
                                sample = sample.metadata["Assay1", "Sample"],
178
                                layer = sample.metadata["Assay1", "Layer"])
179
180
      # return
181
      return(object)
182
    }
183
  }
184
}
185
186
#' generateXeniumImage
187
#'
188
#' Generate a low resolution DAPI image of the Xenium experiment
189
#'
190
#' @param dir.path Xenium output folder
191
#' @param increase.contrast increase the contrast of the image before writing
192
#' @param resolution_level the level of resolution within Xenium OME-TIFF image. Default: 7 (553x402)
193
#' @param overwrite_resolution if TRUE, the image "file.name" will be generated again although it exists at "dir.path"
194
#' @param output.path The path to the new morphology image created if the image should be saved to a location other than Xenium output folder.
195
#' @param file.name the name of the lowred morphology image. Default: morphology_lowres.tif
196
#' @param verbose verbose
197
#' @param ... additional parameters passed to the \link{writeImage} function
198
#'
199
#' @importFrom EBImage writeImage
200
#'
201
#' @details
202
#' The Xenium morphology_mip.ome.tif file that is found under the outs folder comes is an hyperstack of different resolutions of the DAPI image.
203
#' \link{generateXeniumImage} allows extracting only one of these layers by specifying the \code{resolution} parameter (Default: 7 for 553x402) among 1 to 8.
204
#' Lower incides of resolutions have higher higher resolutions, e.g. 1 for 35416x25778. Note that you may need to allocate larger memory of Java to import
205
#' higher resolution images.
206
#'
207
#' @export
208
#'
209
generateXeniumImage <- function(dir.path, increase.contrast = TRUE, resolution_level = 7, overwrite_resolution = FALSE, 
210
                                output.path = NULL, file.name = "morphology_lowres.tif", verbose = TRUE, ...) {
211
  
212
  # file path to either Xenium output folder or specified folder
213
  file.path <- paste0(dir.path, "/", file.name)
214
  output.file <- paste0(output.path, "/", file.name)
215
  
216
  # check if the file exists in either Xenium output folder, or the specified location
217
  if((file.exists(file.path) | file.exists(paste0(output.file))) & !overwrite_resolution){
218
    if(verbose)
219
      message(file.name, " already exists!")
220
  } else {
221
    if (!requireNamespace('RBioFormats'))
222
      stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')")
223
    if(dir.exists(paste0(dir.path, "/morphology_focus"))){
224
      if(verbose)
225
        message("Loading morphology_focus_0000.ome.tif ...")
226
      morphology_image_lowres <- RBioFormats::read.image(paste0(dir.path, "/morphology_focus/morphology_focus_0000.ome.tif"),
227
                                                         resolution = resolution_level,
228
                                                         subset=list(C=1))
229
    } else if(file.exists(paste0(dir.path, "/morphology_mip.ome.tif"))) {
230
      if(verbose)
231
        message("Loading morphology_mip.ome.tif ...")
232
      morphology_image_lowres <- RBioFormats::read.image(paste0(dir.path, "/morphology_mip.ome.tif"), resolution = resolution_level)
233
    }
234
    
235
    # pick a resolution level
236
    image_info <- morphology_image_lowres@metadata$coreMetadata
237
    if(verbose)
238
      message("  Image Resolution (X:", image_info$sizeX, " Y:", image_info$sizeY, ") ...")
239
    
240
    # increase contrast using EBImage
241
    if(increase.contrast) {
242
      if(verbose)
243
        message("  Increasing Contrast ...")
244
      morphology_image_lowres <- (morphology_image_lowres/max(morphology_image_lowres))
245
    }
246
    
247
    # write to the same folder
248
    if(verbose)
249
      message("  Writing Tiff File ...")
250
    if(is.null(output.path)){
251
      EBImage::writeImage(morphology_image_lowres, file = file.path, ...)
252
    } else {
253
      EBImage::writeImage(morphology_image_lowres, file = output.file, ...)
254
    }
255
  }
256
  invisible()
257
}
258
259
####
260
## Visium ####
261
####
262
263
#' importVisium
264
#'
265
#' Importing Visium data
266
#'
267
#' @param dir.path path to Visium output folder
268
#' @param selected_assay selected assay from Visium
269
#' @param assay_name the assay name
270
#' @param sample_name the name of the sample
271
#' @param image_name the image name of the Visium assay, Default: main
272
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E
273
#' @param inTissue if TRUE, only barcodes that are in the tissue will be kept (default: TRUE)
274
#' @param resolution_level the level of resolution of Visium image: "lowres" (default) or "hires"
275
#' @param ... additional parameters passed to \link{formVoltRon}
276
#'
277
#' @importFrom magick image_read image_info
278
#' @importFrom rjson fromJSON
279
#' @importFrom utils read.csv
280
#'
281
#' @export
282
#'
283
importVisium <- function(dir.path, selected_assay = "Gene Expression", assay_name = "Visium", sample_name = NULL, image_name = "main", channel_name = "H&E", inTissue = TRUE, resolution_level = "lowres", ...)
284
{
285
  # raw counts
286
  listoffiles <- list.files(dir.path)
287
  datafile <- listoffiles[grepl("filtered_feature_bc_matrix.h5", listoffiles)][1]
288
  datafile <- paste0(dir.path, "/", datafile)
289
  if(file.exists(datafile)){
290
    rawdata <- import10Xh5(filename = datafile)
291
    if(any(names(rawdata) %in% selected_assay)){
292
      rawdata <- as.matrix(rawdata[[selected_assay]])
293
    } else {
294
      stop("There are no assays called ", selected_assay, " in the h5 file!")
295
    }
296
  } else {
297
    stop("There are no files named 'filtered_feature_bc_matrix.h5' in the path")
298
  }
299
300
  # resolution
301
  if(!resolution_level %in% c("lowres","hires"))
302
    stop("resolution_level should be either 'lowres' or 'hires'!")
303
304
  # image
305
  image_file <- paste0(dir.path, paste0("/spatial/tissue_", resolution_level, "_image.png"))
306
  if(file.exists(image_file)){
307
    image <-  magick::image_read(image_file)
308
    info <- image_info(image)
309
  } else {
310
    stop("There are no spatial image files in the path")
311
  }
312
313
  # coordinates
314
  coords_file <- list.files(paste0(dir.path, "/spatial/"), full.names = TRUE)
315
  coords_file <- coords_file[grepl("tissue_positions",coords_file)]
316
  if(length(coords_file) == 1){
317
    if(grepl("tissue_positions_list.csv", coords_file)) {
318
      coords <- utils::read.csv(file = coords_file, header = FALSE)
319
      colnames(coords) <- c("barcode", "in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres")
320
    } else {
321
      coords <- utils::read.csv(file = coords_file, header = TRUE)
322
    }
323
  } else if(length(coords_file) > 1) {
324
    stop("There are more than 1 position files in the path")
325
  } else {
326
    stop("There are no files named 'tissue_positions.csv' in the path")
327
  }
328
329
  if(inTissue){
330
    coords <- coords[coords$in_tissue==1,]
331
    rawdata <- rawdata[,colnames(rawdata) %in% coords$barcode]
332
  }
333
  coords <- coords[match(colnames(rawdata), coords$barcode),]
334
  spotID <- coords$barcode
335
  coords <- as.matrix(coords[,c("pxl_col_in_fullres", "pxl_row_in_fullres")], )
336
  colnames(coords) <- c("x", "y")
337
  rownames(coords) <- spotID
338
339
  # scale coordinates
340
  scale_file <- paste0(dir.path, "/spatial/scalefactors_json.json")
341
  if(file.exists(scale_file)){
342
    scalefactors <- rjson::fromJSON(file = scale_file)
343
    scales <- scalefactors[[paste0("tissue_", resolution_level, "_scalef")]]
344
    params <- list(
345
      nearestpost.distance = 200*scales, # distance to nearest spot
346
      spot.radius = scalefactors$spot_diameter_fullres*scales,
347
      vis.spot.radius = scalefactors$spot_diameter_fullres*scales*2,
348
      spot.type = "circle")
349
    coords <- coords*scales
350
    coords[,2] <- info$height - coords[,2]
351
  } else {
352
    stop("There are no files named 'scalefactors_json.json' in the path")
353
  }
354
355
  # create VoltRon
356
  formVoltRon(rawdata, metadata = NULL, image, coords, main.assay = assay_name, params = params, assay.type = "spot", 
357
              image_name = image_name, main_channel = channel_name, sample_name = sample_name, 
358
              feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...)
359
}
360
361
#' importVisiumHD
362
#'
363
#' Importing VisiumHD data
364
#'
365
#' @param dir.path path to Visium output folder
366
#' @param bin.size bin size of the VisiumHD output (Exp: "2", "8" and "16")
367
#' @param selected_assay selected assay from Visium
368
#' @param assay_name the assay name
369
#' @param sample_name the name of the sample
370
#' @param image_name the image name of the Visium assay, Default: main
371
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E
372
#' @param inTissue if TRUE, only barcodes that are in the tissue will be kept (default: TRUE)
373
#' @param resolution_level the level of resolution of Visium image: "lowres" (default) or "hires"
374
#' @param ... additional parameters passed to \link{formVoltRon}
375
#'
376
#' @importFrom magick image_read image_info
377
#' @importFrom rjson fromJSON
378
#' @importFrom utils read.csv
379
#'
380
#' @export
381
#'
382
importVisiumHD <- function(dir.path, bin.size = "8", selected_assay = "Gene Expression", assay_name = "VisiumHD", sample_name = NULL, image_name = "main", channel_name = "H&E", inTissue = TRUE, resolution_level = "lowres", ...){
383
  
384
  # raw counts
385
  bin.size <- formatC(as.numeric(bin.size), width = 3, format = "d", flag = "0")
386
  dir.path <- paste0(dir.path, "binned_outputs/square_", bin.size, "um/")
387
  listoffiles <- list.files(dir.path)
388
  datafile <- listoffiles[grepl("filtered_feature_bc_matrix.h5", listoffiles)][1]
389
  datafile <- paste0(dir.path, "/", datafile)
390
  if(file.exists(datafile)){
391
    rawdata <- import10Xh5(filename = datafile)
392
    if(any(names(rawdata) %in% selected_assay)){
393
      rawdata <- as(rawdata[[selected_assay]], "CsparseMatrix")
394
    } else {
395
      stop("There are no assays called ", selected_assay, " in the h5 file!")
396
    }
397
  } else {
398
    stop("There are no files named 'filtered_feature_bc_matrix.h5' in the path")
399
  }
400
  
401
  # resolution
402
  if(!resolution_level %in% c("lowres","hires"))
403
    stop("resolution_level should be either 'lowres' or 'hires'!")
404
  
405
  # image
406
  image_file <- paste0(dir.path, paste0("/spatial/tissue_", resolution_level, "_image.png"))
407
  if(file.exists(image_file)){
408
    image <-  magick::image_read(image_file)
409
    info <- magick::image_info(image)
410
  } else {
411
    stop("There are no spatial image files in the path")
412
  }
413
  
414
  # coordinates
415
  if(!requireNamespace("arrow"))
416
    stop("Please install arrow package to import VisiumHD!: install.packages('arrow')")
417
  coords_file <- list.files(paste0(dir.path, "/spatial/"), full.names = TRUE)
418
  coords_file <- coords_file[grepl("tissue_positions",coords_file)]
419
  if(length(coords_file) == 1){
420
    coords <- data.table::as.data.table(arrow::read_parquet(coords_file, as_data_frame = FALSE))
421
  } else if(length(coords_file) > 1) {
422
    stop("There are more than 1 position files in the path")
423
  } else {
424
    stop("There are no files named 'tissue_positions.parquet' in the path")
425
  }
426
  if(inTissue){
427
    coords <- subset(coords, in_tissue == 1)
428
    rawdata <- rawdata[,colnames(rawdata) %in% coords$barcode]
429
  }
430
  coords <- coords[match(colnames(rawdata), coords$barcode),]
431
  spotID <- coords$barcode
432
  coords <- as.matrix(coords[,c("pxl_col_in_fullres", "pxl_row_in_fullres")], )
433
  colnames(coords) <- c("x", "y")
434
  rownames(coords) <- spotID
435
  
436
  # scale coordinates
437
  scale_file <- paste0(dir.path, "/spatial/scalefactors_json.json")
438
  if(file.exists(scale_file)){
439
    scalefactors <- rjson::fromJSON(file = scale_file)
440
    scales <- scalefactors[[paste0("tissue_", resolution_level, "_scalef")]]
441
    params <- list(
442
      nearestpost.distance = scalefactors$spot_diameter_fullres*scales*(3/sqrt(2)),
443
      spot.radius = scalefactors$spot_diameter_fullres*scales,
444
      vis.spot.radius = scalefactors$spot_diameter_fullres*scales,
445
      spot.type = "square")
446
    coords <- coords*scales
447
    coords[,2] <- info$height - coords[,2]
448
  } else {
449
    stop("There are no files named 'scalefactors_json.json' in the path")
450
  }
451
  
452
  # create VoltRon
453
  formVoltRon(rawdata, metadata = NULL, image, coords, main.assay = assay_name, params = params, assay.type = "spot", 
454
              image_name = image_name, main_channel = channel_name, sample_name = sample_name, 
455
              feature_name = ifelse(selected_assay == "Gene Expression", "RNA", "main"), ...)
456
}
457
458
####
459
## Auxiliary ####
460
####
461
462
#' import10Xh5
463
#'
464
#' import the sparse matrix from the H5 file
465
#'
466
#' @param filename the path to h5 file
467
#'
468
#' @importFrom Matrix sparseMatrix
469
#'
470
#' @export
471
import10Xh5 <- function(filename){
472
473
  # check package
474
  if(!requireNamespace("rhdf5")){
475
    stop("You have to install the rhdf5 package!: BiocManager::install('rhdf5')")
476
  }
477
  
478
  # check file
479
  if(file.exists(filename)){
480
    matrix.10X <- rhdf5::h5read(file = filename, name = "matrix")
481
  } else {
482
    stop("There are no files named ", filename," in the path")
483
  }
484
485
  # get data, barcodes and feature
486
  features <- matrix.10X[["features"]][["name"]]
487
  feature_type <- matrix.10X[["features"]][["feature_type"]]
488
  cells <- matrix.10X[["barcodes"]]
489
  mat <- matrix.10X[["data"]]
490
  indices <- matrix.10X[["indices"]]
491
  indptr <- matrix.10X[["indptr"]]
492
  sparse.mat <- sparseMatrix(i = indices + 1, p = indptr,
493
                             x = as.numeric(mat), dims = c(length(features), length(cells)),
494
                             repr = "T")
495
  colnames(sparse.mat) <- cells
496
  rownames(sparse.mat) <- make.unique(features)
497
498
  # separate feature types
499
  matrix.10X <- list()
500
  for(feat in unique(feature_type)){
501
    cur_features <- features[feature_type %in% feat]
502
    cur_mat <- sparse.mat[features %in% cur_features,]
503
    matrix.10X[[feat]] <- cur_mat
504
  }
505
506
  return(matrix.10X)
507
}
508
509
####
510
# Nanostring ####
511
####
512
513
####
514
## GeoMx ####
515
####
516
517
#' importGeoMx
518
#'
519
#' Import GeoMx data
520
#'
521
#' @param dcc.path path to the folder where the dcc files are found
522
#' @param pkc.file path to the pkc file
523
#' @param summarySegment the metadata csv (sep = ";") or excel file, if the file is an excel file, \code{summarySegmentSheetName} should be provided as well.
524
#' @param summarySegmentSheetName the sheet name of the excel file, \code{summarySegment}
525
#' @param assay_name the assay name, default: GeoMx
526
#' @param image the reference morphology image of the GeoMx assay
527
#' @param segment_polygons if TRUE, the ROI polygons are parsed from the OME.TIFF file
528
#' @param ome.tiff the OME.TIFF file of the GeoMx experiment if exists
529
#' @param resolution_level the level of resolution within GeoMx OME-TIFF image, Default: 3
530
#' @param image_name the image name of the Visium assay, Default: main
531
#' @param ... additional parameters passed to \link{formVoltRon}
532
#'
533
#' @importFrom dplyr %>% full_join
534
#' @importFrom utils read.csv
535
#' @importFrom magick image_info image_read
536
#'
537
#' @export
538
#'
539
importGeoMx <- function(dcc.path, pkc.file, summarySegment, summarySegmentSheetName, assay_name = "GeoMx",
540
                        image = NULL, segment_polygons = FALSE, ome.tiff = NULL, resolution_level = 3, image_name = "main", ...)
541
{
542
  # Get pkc file
543
  if(file.exists(pkc.file)){
544
    pkcdata <- readPKC(pkc.file)
545
  } else {
546
    stop("pkc file is not found!")
547
  }
548
549
  # Get dcc file
550
  if(file.exists(dcc.path)){
551
    dcc_files <- dir(dcc.path, pattern = ".dcc$", full.names = TRUE)
552
    if(length(dcc_files) == 0){
553
      stop("no dcc files are found under ", dcc.path)
554
    } else {
555
      dcc_files <- dcc_files[!grepl("A01.dcc$", dcc_files)]
556
      dcc_filenames <- dir(dcc.path, pattern = ".dcc$", full.names = FALSE)
557
      dcc_filenames <- dcc_filenames[!grepl("A01.dcc$", dcc_filenames)]
558
      dcc_filenames <- gsub(".dcc$", "", dcc_filenames)
559
      dcc_filenames <- gsub("-", "_", dcc_filenames)
560
      dccData <- sapply(dcc_files, readDCC, simplify = FALSE, USE.NAMES = FALSE)
561
      names(dccData) <- dcc_filenames
562
    }
563
  } else {
564
    stop("path to dcc files does not exist!")
565
  }
566
567
  # merge dcc files
568
  rawdata <- NULL
569
  for(i in seq_len(length(dccData))){
570
    cur_data <- dccData[[i]]$Code_Summary
571
    colnames(cur_data) <- c("RTS_ID", dcc_filenames[i])
572
    if(i == 1){
573
      rawdata <- cur_data
574
    } else {
575
      suppressMessages(rawdata <- rawdata %>% full_join(cur_data))
576
    }
577
  }
578
  rawdata[is.na(rawdata)] <- 0
579
  rownames(rawdata) <- rawdata$RTS_ID
580
  rawdata <- rawdata[,!colnames(rawdata) %in% "RTS_ID"]
581
582
  # get negative probes and targets
583
  # NegProbes <- pkcdata$RTS_ID[pkcdata$Target == "NegProbe-WTX"]
584
  NegProbes <- pkcdata$RTS_ID[grepl("NegProbe", pkcdata$Target)]
585
586
  # negative probes
587
  rawdata_neg <- rawdata[rownames(rawdata) %in% NegProbes, ]
588
  rownames(rawdata_neg) <- paste0(rownames(rawdata_neg), "_",
589
                                  pkcdata$Target[match(rownames(rawdata_neg), pkcdata$RTS_ID)])
590
  rawdata_neg <- as.matrix(rawdata_neg)
591
592
  # other probes
593
  rawdata <- rawdata[!rownames(rawdata) %in% NegProbes, ]
594
  rownames(rawdata) <- pkcdata$Target[match(rownames(rawdata), pkcdata$RTS_ID)]
595
  rawdata <- as.matrix(rawdata)
596
597
  # get segment summary
598
  if(file.exists(summarySegment)){
599
    if(grepl(".xls$|.xlsx$", summarySegment)){
600
      if (!requireNamespace('xlsx'))
601
        stop("Please install xlsx package for using the read.xlsx function!: install.packages('xlsx')")
602
      if(!is.null(summarySegmentSheetName)){
603
        segmentsummary <- xlsx::read.xlsx(summarySegment, sheetName = summarySegmentSheetName)
604
      } else {
605
        stop("Please provide 'summarySegmentSheetName' for the excel sheet name!")
606
      }
607
    } else if(grepl(".csv$", summarySegment)) {
608
      segmentsummary <- utils::read.csv(summarySegment, row.names = NULL, header = T, sep = ";")
609
    }
610
    rownames(segmentsummary) <- gsub(".dcc$", "", segmentsummary$Sample_ID)
611
    rownames(segmentsummary) <- gsub("-", "_", rownames(segmentsummary))
612
    if(all(dcc_filenames %in% rownames(segmentsummary))){
613
      segmentsummary <- segmentsummary[dcc_filenames, ]
614
    } else{
615
      stop("Some GeoMx dcc files are not represented in the segment summary file!")
616
    }
617
  } else {
618
    stop(summarySegment, " is not found!")
619
  }
620
621
  # get image
622
  if(!is.null(image)){
623
    if(file.exists(image)){
624
      image <- magick::image_read(image)
625
    } else {
626
      stop(image, " is not found!")
627
    }
628
  } else {
629
    stop("Please provide a image for the GeoMx data!")
630
  }
631
  geomx_image_info <- magick::image_info(image)
632
633
  # get coordinates
634
  coords <- segmentsummary[,c("X","Y")]
635
  colnames(coords) <- c("x", "y")
636
  rownames(coords) <- segmentsummary$ROI.name
637
  coords <- rescaleGeoMxPoints(coords, segmentsummary, geomx_image_info)
638
639
  # get ROI segments (polygons)
640
  segments <- list()
641
  if(!is.null(ome.tiff)){
642
643
    # get segments
644
    segments <- importGeoMxSegments(ome.tiff, segmentsummary, geomx_image_info)
645
    segments <- segments[rownames(coords)]
646
647
    # parse channels if ome.tiff is given
648
    channels <- importGeoMxChannels(ome.tiff, segmentsummary, geomx_image_info, resolution_level = resolution_level)
649
    image <- c(list(scanimage = image), channels)
650
  }
651
652
  # create VoltRon for non-negative probes
653
  object <- formVoltRon(rawdata, metadata = segmentsummary, image, coords, segments, main.assay = assay_name, assay.type = "ROI", 
654
                        feature_name = "RNA", image_name = image_name, ...)
655
  
656
  # add negative probe assay as new feature set
657
  object <- addFeature(object, assay = assay_name, data = rawdata_neg, feature_name = "NegProbe")
658
659
  # return
660
  return(object)
661
}
662
663
#' readPKC
664
#'
665
#' Read a NanoString Probe Kit Configuration (PKC) file. Adapted from \code{GeomxTools} package.
666
#'
667
#' @param file A character string containing the path to the PKC file.
668
#' @param default_pkc_vers Optional list of pkc file names to use as default if more than one pkc version of each module is provided.
669
#'
670
#' @importFrom rjson fromJSON
671
#'
672
#' @noRd
673
readPKC <- function (file, default_pkc_vers = NULL)
674
{
675
  pkc_json_list <- lapply(file, function(pkc_file) {
676
    rjson::fromJSON(file = pkc_file)
677
  })
678
  pkc_names <- unlist(lapply(file, function(pkc_file) {
679
    base_pkc_name <- gsub(".pkc", "", trimws(basename(pkc_file)))
680
    return(base_pkc_name)
681
  }))
682
  names(pkc_json_list) <- pkc_names
683
  pkc_modules <- basename(unlist(lapply(pkc_names, sub, pattern = "_[^_]+$",
684
                                        replacement = "")))
685
  names(pkc_modules) <- pkc_names
686
  header <- list(PKCFileName = sapply(pkc_json_list, function(list) list[["Name"]]),
687
                 PKCModule = pkc_modules,
688
                 PKCFileVersion = sapply(pkc_json_list, function(list) list[["Version"]]),
689
                 PKCFileDate = sapply(pkc_json_list, function(list) list[["Date"]]),
690
                 AnalyteType = sapply(pkc_json_list, function(list) list[["AnalyteType"]]),
691
                 MinArea = sapply(pkc_json_list, function(list) list[["MinArea"]]),
692
                 MinNuclei = sapply(pkc_json_list, function(list) list[["MinNuclei"]]))
693
694
  multi_idx <- duplicated(header[["PKCModule"]])
695
  multi_mods <- unique(header[["PKCModule"]][multi_idx])
696
  if (length(multi_mods) < 1) {
697
    if (!is.null(default_pkc_vers)) {
698
      warning("Only one version found per PKC module. ",
699
              "No PKCs need to be combined. ", "Therefore, no default PKC versions will be used.")
700
    }
701
  }
702
  else {
703
    use_pkc_names <- lapply(multi_mods, function(mod) {
704
      mod_idx <- header[["PKCModule"]] == mod
705
      max_vers <- as.numeric(as.character(max(as.numeric_version(header[["PKCFileVersion"]][mod_idx]))))
706
      max_name <- names(header[["PKCFileVersion"]][header[["PKCFileVersion"]] == max_vers])
707
      return(max_name)
708
    })
709
    names(use_pkc_names) <- multi_mods
710
    if (!is.null(default_pkc_vers)) {
711
      default_names <- unlist(lapply(default_pkc_vers, function(pkc_file) {
712
        base_pkc_name <- gsub(".pkc", "", trimws(basename(pkc_file)))
713
        return(base_pkc_name)
714
      }))
715
      default_mods <- unlist(lapply(default_names, sub, pattern = "_[^_]+$",
716
                                replacement = ""))
717
      dup_defaults <- default_names[duplicated(default_mods) |
718
                                      duplicated(default_mods, fromLast = TRUE)]
719
      if (!all(default_names %in% names(header[["PKCFileName"]]))) {
720
        removed_pkcs <- default_pkc_vers[!default_names %in%
721
                                           names(header[["PKCFileName"]])]
722
        stop("Could not match all default PKC versions with a PKC file name. ",
723
             "Check default file names match exactly to a PKC file name.\n",
724
             paste0("Unmatched default PKC versions: ",
725
                    removed_pkcs))
726
      }
727
      else if (length(dup_defaults) > 0) {
728
        stop("There should only be one default PKC version per module. ",
729
             "Ensure only one version per module in default PKCs list.\n",
730
             "Multiple default PKC version conflicts: ",
731
             paste(dup_defaults, collapse = ", "))
732
      }
733
      else {
734
        use_pkc_names[default_mods] <- default_names
735
      }
736
    }
737
  }
738
  rtsid_lookup_df <- generate_pkc_lookup(pkc_json_list)
739
  rtsid_lookup_df$Negative <- grepl("Negative", rtsid_lookup_df$CodeClass)
740
  rtsid_lookup_df$RTS_ID <- gsub("RNA", "RTS00", rtsid_lookup_df[["RTS_ID"]])
741
  # rtsid_lookup_df <- S4Vectors::DataFrame(rtsid_lookup_df)
742
  rtsid_lookup_df <- data.table::data.table(rtsid_lookup_df)
743
  if (length(multi_mods) > 0) {
744
    for (mod in names(use_pkc_names)) {
745
      mod_vers <- names(header[["PKCModule"]])[header[["PKCModule"]] ==
746
                                                 mod]
747
      mod_lookup <- rtsid_lookup_df[rtsid_lookup_df$Module %in%
748
                                      mod_vers, ]
749
      mod_tab <- table(mod_lookup$RTS_ID)
750
      remove_rts <- names(mod_tab[mod_tab != length(mod_vers)])
751
      if (length(remove_rts) > 0) {
752
        warning("The following probes were removed from analysis",
753
                " as they were not found in all PKC module versions used.\n",
754
                # paste(utils::capture.output(print(subset(rtsid_lookup_df,
755
                #                                   subset = RTS_ID %in% remove_rts))), collapse = "\n")
756
                )
757
        rtsid_lookup_df <- subset(rtsid_lookup_df, subset = !RTS_ID %in%
758
                                    remove_rts)
759
      }
760
      remove_vers <- mod_vers[mod_vers != use_pkc_names[mod]]
761
      rtsid_lookup_df <- subset(rtsid_lookup_df, subset = !Module %in%
762
                                  remove_vers)
763
      warning("The following PKC versions were removed from analysis",
764
              " as they were overridden by a newer PKC version or",
765
              " were overridden by a user-defined default PKC version.\n",
766
              paste(remove_vers, collapse = ", "))
767
      header <- lapply(header, function(elem) {
768
        elem[!names(elem) %in% remove_vers]
769
      })
770
    }
771
  }
772
  # S4Vectors::metadata(rtsid_lookup_df) <- header
773
  return(rtsid_lookup_df)
774
}
775
776
#' readDCC
777
#'
778
#' Read a NanoString GeoMx Digital Count Conversion (DCC) file.
779
#'
780
#' @param file A character string containing the path to the DCC file.
781
#'
782
#' @noRd
783
readDCC <- function(file)
784
{
785
  lines <- trimws(readLines(file))
786
  trimGalore <- grep("trimGalore", lines)
787
  if (length(trimGalore) > 0) {
788
    Raw <- grep("Raw", lines)
789
    lines <- lines[-c(trimGalore:(Raw - 1))]
790
  }
791
  lines <- gsub("SoftwareVersion,\"GeoMx_NGS_Pipeline_ ",
792
                "SoftwareVersion,", lines)
793
  lines <- gsub("SoftwareVersion,\"GeoMx_NGS_Pipeline_", "SoftwareVersion,",
794
                lines)
795
  lines <- gsub("SoftwareVersion,DRAGEN_GeoMx_", "SoftwareVersion,",
796
                lines)
797
  lines[grepl("SoftwareVersion", lines)] <- gsub("\"", "",
798
                                                 lines[grepl("SoftwareVersion", lines)])
799
  tags <- c("Header", "Scan_Attributes", "NGS_Processing_Attributes", "Code_Summary")
800
  output <- sapply(tags, function(tag) {
801
    bounds <- charmatch(sprintf(c("<%s>", "</%s>"), tag),
802
                        lines)
803
    if (anyNA(bounds) || bounds[1L] + 1L >= bounds[2L])
804
      lines[integer(0)]
805
    else lines[(bounds[1L] + 1L):(bounds[2L] - 1L)]
806
  }, simplify = FALSE)
807
  for (tag in c("Header", "Scan_Attributes", "NGS_Processing_Attributes")) {
808
    while (length(bad <- grep(",", output[[tag]], invert = TRUE)) >
809
           0L) {
810
      bad <- bad[1L]
811
      if (bad == 1L)
812
        stop(sprintf("%s section has malformed first line",
813
                     tag))
814
      fixed <- output[[tag]]
815
      fixed[bad - 1L] <- sprintf("%s %s", fixed[bad -
816
                                                  1L], fixed[bad])
817
      output[[tag]] <- fixed[-bad]
818
    }
819
    output[[tag]] <- strsplit(output[[tag]], split = ",")
820
    output[[tag]] <- structure(lapply(output[[tag]], function(x) if (length(x) ==
821
                                                                     1L)
822
      ""
823
      else x[2L]), names = lapply(output[[tag]], `[`, 1L),
824
      class = "data.frame", row.names = basename(file))
825
  }
826
  cols <- c("FileVersion", "SoftwareVersion")
827
  if (!(all(cols %in% colnames(output[["Header"]]))))
828
    stop("Header section must contain \"FileVersion\" and \"SoftwareVersion\"")
829
  output[["Header"]][, cols] <- lapply(output[["Header"]][,
830
                                                          cols], numeric_version)
831
  fileVersion <- output[["Header"]][1L, "FileVersion"]
832
  if (!(numeric_version(fileVersion) %in% numeric_version(c("0.01",
833
                                                            "0.02"))))
834
    stop("\"FileVersion\" in Header section must be 0.01 or 0.02")
835
  for (section in c("Header", "Scan_Attributes", "NGS_Processing_Attributes")) {
836
    valid <- .validDccSchema(output[[section]], fileVersion,
837
                             section)
838
    if (!isTRUE(valid))
839
      stop(valid)
840
  }
841
  output[["Header"]][["Date"]] <- as.Date(output[["Header"]][["Date"]],
842
                                          format = "%Y-%m-%d")
843
  cols <- c("Raw", "Trimmed", "Stitched", "Aligned", "umiQ30",
844
            "rtsQ30")
845
  output[["NGS_Processing_Attributes"]][, cols] <- lapply(output[["NGS_Processing_Attributes"]][,
846
                                                                                                cols], as.numeric)
847
  names(output[["Scan_Attributes"]])[names(output[["Scan_Attributes"]]) ==
848
                                       "ID"] <- "SampleID"
849
  output[["Code_Summary"]] <- paste0("RTS_ID,Count\n", paste(output[["Code_Summary"]],
850
                                                             collapse = "\n"))
851
  output[["Code_Summary"]] <- utils::read.csv(textConnection(output[["Code_Summary"]]),
852
                                              colClasses = c(RTS_ID = "character", Count = "numeric"))
853
  output[["Code_Summary"]][["Count"]] <- as.integer(round(output[["Code_Summary"]][["Count"]]))
854
  rn <- output[["Code_Summary"]][["RTS_ID"]]
855
  if ((ndups <- anyDuplicated(rn)) > 0L) {
856
    warning(sprintf("removed %d rows from \"Code_Summary\" due to duplicate rownames",
857
                    ndups))
858
    ok <- which(!duplicated(rn, fromLast = FALSE) & !duplicated(rn,
859
                                                                fromLast = TRUE))
860
    rn <- rn[ok]
861
    output[["Code_Summary"]] <- output[["Code_Summary"]][ok,
862
                                                         , drop = FALSE]
863
  }
864
  rownames(output[["Code_Summary"]]) <- rn
865
  output[["NGS_Processing_Attributes"]][, "DeduplicatedReads"] <- sum(output[["Code_Summary"]][["Count"]])
866
  return(output)
867
}
868
869
870
#' importGeoMxSegments
871
#'
872
#' Import ROI polygons from the OME.TIFF file
873
#'
874
#' @param ome.tiff the OME.TIFF file of the GeoMx Experiment
875
#' @param summary segmentation summary data frame
876
#' @param imageinfo image information
877
#'
878
#' @noRd
879
importGeoMxSegments <- function(ome.tiff, summary, imageinfo){
880
881
  # check file
882
  if(file.exists(ome.tiff)){
883
    if(grepl(".ome.tiff$|.ome.tif$", ome.tiff)){
884
      if (!requireNamespace('RBioFormats'))
885
        stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')")
886
      if (!requireNamespace('XML'))
887
        stop("Please install XML package to extract xml from the ome.tiff file!")
888
      omexml <- RBioFormats::read.omexml(ome.tiff)
889
    } else if(grepl(".xml$", ome.tiff)){
890
      omexml <- XML::xmlParse(file = ome.tiff)
891
    } else {
892
      stop("Please provide either an ome.tiff or .xml file!")
893
    }
894
    omexml <- XML::xmlToList(omexml, simplify = TRUE)
895
  } else {
896
    stop("There are no files named ", ome.tiff," in the path")
897
  }
898
899
  # get ROIs
900
  ROIs <- omexml[which(names(omexml) == "ROI")]
901
902
  # get masks for each ROI
903
  mask_lists <- list()
904
  for(i in seq_len(length(ROIs))){
905
    cur_ROI <- ROIs[[i]]
906
907
    # if the shape is a polygon
908
    if("Polygon" %in% names(cur_ROI$Union)){
909
      coords <- strsplit(cur_ROI$Union$Polygon, split = "\\n")
910
      coords <- strsplit(coords$Points, split = " ")[[1]]
911
      coords <- sapply(coords, function(x) as.numeric(strsplit(x, split = ",")[[1]]), USE.NAMES = FALSE)
912
      coords <- as.data.frame(t(coords))
913
      colnames(coords) <- c("x", "y")
914
      coords <- rescaleGeoMxPoints(coords, summary, imageinfo)
915
      coords <- data.frame(id = cur_ROI$Union$Label[["Text"]], coords)
916
      mask_lists[[cur_ROI$Union$Label[["Text"]]]] <- data.frame(coords)
917
918
    # if the shape is an ellipse
919
    } else if("Ellipse" %in% names(cur_ROI$Union)){
920
      coords <- as.numeric(cur_ROI$Union$Ellipse[c("X","Y", "RadiusX", "RadiusY")])
921
      coords <-  as.data.frame(matrix(coords, nrow = 1))
922
      colnames(coords) <- c("x", "y", "rx", "ry")
923
      coords[,c("x", "y")] <- rescaleGeoMxPoints(coords[,c("x", "y")], summary, imageinfo)
924
      coords$rx <- coords$rx * imageinfo$width/summary$Scan.Width[1]
925
      coords$ry <- coords$ry * imageinfo$height/summary$Scan.Height[1]
926
      coords <- data.frame(id = cur_ROI$Union$Label[["Text"]], coords)
927
      mask_lists[[cur_ROI$Union$Label[["Text"]]]] <- coords
928
    }
929
  }
930
931
  mask_lists
932
}
933
934
#' rescaleGeoMxROIs
935
#'
936
#' Rescale GeoMx point (center or polygon corners of ROI) coordinates
937
#'
938
#' @param pts coordinates of the cells from the Xenium assays
939
#' @param summary segmentation summary data frame
940
#' @param imageinfo image information
941
#'
942
#' @noRd
943
rescaleGeoMxPoints <- function(pts, summary, imageinfo){
944
945
  xRatio = imageinfo$width/summary$Scan.Width[1]
946
  yRatio = imageinfo$height/summary$Scan.Height[1]
947
  pts$x = (pts$x - summary$Scan.Offset.X[1]) * xRatio
948
  pts$y = (pts$y - summary$Scan.Offset.Y[1]) * yRatio
949
  pts$y = imageinfo$height - pts$y
950
  pts <- as.matrix(pts)
951
952
  # return
953
  return(pts)
954
}
955
956
#' importGeoMxChannels
957
#'
958
#' Rescale GeoMx channels with respect to the scan image
959
#'
960
#' @param ome.tiff the OME.TIFF file of the GeoMx Experiment
961
#' @param summary segmentation summary data frame
962
#' @param imageinfo image information
963
#' @param resolution_level the resolution level (1-7) of the image parsed from the OME.TIFF file
964
#'
965
#' @param RBioFormats read.image
966
#' @param EBImage as.Image
967
#' @param grDevices as.raster
968
#' @param magick image_read
969
#'
970
#' @noRd
971
importGeoMxChannels <- function(ome.tiff, summary, imageinfo, resolution_level){
972
973
  # check file
974
  if(file.exists(ome.tiff)){
975
    if(grepl(".ome.tiff$|.ome.tif$", ome.tiff)){
976
      if (!requireNamespace('RBioFormats'))
977
        stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')")
978
      if (!requireNamespace('XML'))
979
        stop("Please install XML package to extract xml from the ome.tiff file!")
980
      omexml <- RBioFormats::read.omexml(ome.tiff)
981
      omexml <- XML::xmlToList(omexml, simplify = TRUE)
982
    } else {
983
      warning("ome.tiff format not found!")
984
      return(NULL)
985
    }
986
  } else {
987
    stop("There are no files named ", ome.tiff," in the path")
988
  }
989
990
  # get all channels
991
  ome.tiff <- RBioFormats::read.image(ome.tiff, resolution = resolution_level)
992
993
  # get frame information
994
  nframes <- ome.tiff@metadata$coreMetadata$imageCount
995
  frames <- EBImage::getFrames(ome.tiff)
996
  frames <- lapply(frames, function(x){
997
    img <- magick::image_read(grDevices::as.raster(EBImage::as.Image(x)))
998
    rescaleGeoMxImage(img, summary, imageinfo, resolution_level = resolution_level)
999
  })
1000
1001
  # get channel names
1002
  omexml <- omexml$StructuredAnnotations[seq(from = 1, by = 2, length.out = nframes)]
1003
  channel_names <- lapply(omexml, function(x){
1004
    x$Value$ChannelInfo$BiologicalTarget
1005
  })
1006
  names(frames) <- channel_names
1007
1008
  # return frames
1009
  return(frames)
1010
}
1011
1012
#' rescaleGeoMxImage
1013
#'
1014
#' Rescale GeoMx channels with respect to the scan image
1015
#'
1016
#' @param img coordinates of the cells from the Xenium assays
1017
#' @param summary segmentation summary data frame
1018
#' @param imageinfo image information
1019
#' @param resolution_level the resolution level (1-7) of the image parsed from the OME.TIFF file
1020
#'
1021
#' @param magick image_crop
1022
#'
1023
#' @noRd
1024
rescaleGeoMxImage <- function(img, summary, imageinfo, resolution_level){
1025
1026
  # adjust offset and scan size to the resolution level
1027
  offset.x <- summary$Scan.Offset.X[1]/(2*(resolution_level-1))
1028
  offset.y <- summary$Scan.Offset.Y[1]/(2*(resolution_level-1))
1029
  scan.width <- summary$Scan.Width[1]/(2*(resolution_level-1))
1030
  scan.height <- summary$Scan.Height[1]/(2*(resolution_level-1))
1031
1032
  # crop image given adjusted offsets
1033
  new_img <- magick::image_crop(img, geometry = paste0(scan.width, "x", scan.height, "+", offset.x, "+", offset.y))
1034
1035
  # resize image
1036
  new_img <- magick::image_resize(new_img, geometry = paste0(imageinfo$width, "x", imageinfo$height))
1037
1038
  # return
1039
  return(new_img)
1040
}
1041
1042
####
1043
## CosMx ####
1044
####
1045
1046
#' importCosMx
1047
#'
1048
#' Import CosMx data
1049
#'
1050
#' @param tiledbURI the path to the tiledb folder
1051
#' @param assay_name the assay name, default: CosMx
1052
#' @param image the reference morphology image of the CosMx assay
1053
#' @param image_name the image name of the CosMx assay, Default: main
1054
#' @param ome.tiff the OME.TIFF file of the CosMx experiment if exists
1055
#' @param import_molecules if TRUE, molecule assay will be created along with cell assay.
1056
#' @param verbose verbose
1057
#' @param ... additional parameters passed to \link{formVoltRon}
1058
#'
1059
#' @importFrom data.table data.table
1060
#' @importFrom ids random_id
1061
#'
1062
#' @export
1063
#'
1064
importCosMx <- function(tiledbURI, assay_name = "CosMx",
1065
                        image = NULL, image_name = "main", ome.tiff = NULL, import_molecules = FALSE, verbose = TRUE, ...)
1066
{
1067
  # check tiledb and tiledbsc
1068
  if (!requireNamespace("tiledb", quietly = TRUE))
1069
    stop("Please install the tiledb package: \n
1070
         remotes::install_github('TileDB-Inc/TileDB-R', force = TRUE, ref = '0.17.0')")
1071
  if (!requireNamespace("tiledbsc", quietly = TRUE))
1072
    stop("Please install the tiledbsc package: \n
1073
         remotes::install_github('tiledb-inc/tiledbsc', force = TRUE, ref = '8157b7d54398b1f957832f37fff0b173d355530e')")
1074
1075
  # get tiledb
1076
  if(verbose)
1077
    message("Scanning TileDB array for cell data ...")
1078
  tiledb_scdataset <- tiledbsc::SOMACollection$new(uri = tiledbURI, verbose = FALSE)
1079
1080
  # raw counts
1081
  counts <- tiledb_scdataset$somas$RNA$X$members$counts$to_matrix(batch_mode = TRUE)
1082
  counts <- as.matrix(counts)
1083
1084
  # cell metadata
1085
  metadata <- tiledb_scdataset$somas$RNA$obs$to_dataframe()
1086
1087
  # coordinates
1088
  coords <- as.matrix(metadata[,c("x_slide_mm", "y_slide_mm")])
1089
  colnames(coords) <- c("x","y")
1090
1091
  # transcripts
1092
  if(import_molecules){
1093
    if(verbose)
1094
      message("Scanning TileDB array for molecule data ...")
1095
    subcellular <- tiledb::tiledb_array(
1096
      tiledb_scdataset$somas$RNA$obsm$members$transcriptCoords$uri,
1097
      return_as="data.table")[]
1098
    colnames(subcellular)[colnames(subcellular)=="target"] <- "gene"
1099
  }
1100
1101
  # get slides and construct VoltRon objects for each slides
1102
  slides <- unique(metadata$slide_ID_numeric)
1103
1104
  # for each slide create a VoltRon object with combined layers
1105
  vr_list <- list()
1106
  for(slide in slides){
1107
1108
    # cell assay
1109
    if(verbose)
1110
      message("Creating cell level assay for slide ", slide, " ...")
1111
1112
    # slide info
1113
    cur_coords <- coords[metadata$slide_ID_numeric == slide,]
1114
    cur_counts <- counts[,rownames(cur_coords)]
1115
    cur_metadata <- metadata[rownames(cur_coords),]
1116
1117
    # create VoltRon object
1118
    cell_object <- formVoltRon(data = cur_counts, metadata = cur_metadata, image = image, coords = cur_coords, 
1119
                               main.assay = assay_name, assay.type = "cell", image_name = image_name, main_featureset = "RNA", ...)
1120
    cell_object$Sample <- paste0("Slide", slide)
1121
1122
    # molecule assay
1123
    if(import_molecules){
1124
1125
      # get slide
1126
      if(verbose)
1127
        message("Creating molecule level assay for slide ", slide, " ...")
1128
      cur_subcellular <- subset(subcellular, slideID == slide)
1129
1130
      # coordinates
1131
      mol_coords <- as.matrix(cur_subcellular[,c("x_FOV_px", "y_FOV_px")])
1132
      colnames(mol_coords) <- c("x", "y")
1133
1134
      # get subcellular data components
1135
      mol_metadata <- cur_subcellular[,colnames(cur_subcellular)[!colnames(cur_subcellular) %in% c("CellId", "cell_id", "x_FOV_px", "y_FOV_px")], with = FALSE]
1136
      set.seed(nrow(mol_metadata))
1137
      mol_metadata$id <- rownames(mol_metadata)
1138
      mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE))
1139
      mol_metadata$assay_id <- "Assay1"
1140
      mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")]
1141
      
1142
      # coord names
1143
      rownames(mol_coords) <- mol_metadata$id
1144
1145
      # create VoltRon assay for molecules
1146
      mol_assay <- formAssay(coords = mol_coords, image = image, type = "molecule", main_image = image_name)
1147
1148
      # merge assays in one section
1149
      if(verbose)
1150
        message("Merging assays for slide ", slide, " ...")
1151
      sample.metadata <- SampleMetadata(cell_object)
1152
      cell_object <- addAssay(cell_object,
1153
                              assay = mol_assay,
1154
                              metadata = mol_metadata,
1155
                              assay_name = paste0(assay_name, "_mol"),
1156
                              sample = sample.metadata["Assay1", "Sample"],
1157
                              layer = sample.metadata["Assay1", "Layer"])
1158
    }
1159
    vr_list <- append(vr_list, cell_object)
1160
  }
1161
1162
  # return
1163
  if(verbose)
1164
    message("Merging slides ...")
1165
  vr <- merge(vr_list[[1]], vr_list[-1])
1166
}
1167
1168
#' generateCosMxImage
1169
#'
1170
#' Generates a low resolution Morphology image of the CosMx experiment
1171
#'
1172
#' @param dir.path CosMx output folder
1173
#' @param increase.contrast increase the contrast of the image before writing
1174
#' @param output.path The path to the new morphology image created if the image should be saved to a location other than Xenium output folder.
1175
#' @param verbose verbose
1176
#' @param ... additional parameters passed to the \link{writeImage} function
1177
#'
1178
#' @importFrom magick image_read image_contrast
1179
#' @importFrom EBImage writeImage
1180
#'
1181
#' @export
1182
generateCosMxImage <- function(dir.path, increase.contrast = TRUE, output.path = NULL, verbose = TRUE, ...) {
1183
1184
  # check package
1185
  if(!requireNamespace("reshape2")){
1186
    stop("You have to install the reshape2 package!: install.packages('reshape2')")
1187
  }
1188
  
1189
  # file path to either Xenium output folder or specified folder
1190
  file.path <- paste0(dir.path, "/CellComposite_lowres.tif")
1191
  output.file <- paste0(output.path, "/CellComposite_lowres.tif")
1192
1193
  # check if the file exists in either Xenium output folder, or the specified location
1194
  if(file.exists(file.path) | file.exists(paste0(output.file))){
1195
    if(verbose)
1196
      message("CellComposite_lowres.tif already exists! \n")
1197
    return(NULL)
1198
  }
1199
1200
  # FOV positions of CosMx
1201
  list_of_files <- list.files(dir.path)
1202
  fov_positions_path <- paste0(dir.path, "/", list_of_files[grepl("fov_positions_file.csv$",list_of_files)][1])
1203
  fov_positions <- read.csv(fov_positions_path)
1204
1205
  # manipulate fov positions matrix
1206
  if(verbose)
1207
    message("Getting FOV Positions \n")
1208
  relative_fov_positions <- fov_positions
1209
  x_min <- min(relative_fov_positions$x_global_px)
1210
  y_min <- min(relative_fov_positions$y_global_px)
1211
  x_gap <- diff(unique(fov_positions$x_global_px))[1]
1212
  y_gap <- diff(unique(fov_positions$y_global_px))[1]
1213
  relative_fov_positions[,c("x_global_px","y_global_px")] <- t(apply(relative_fov_positions[,c("x_global_px","y_global_px")], 1, function(cell){
1214
    c((cell[1]-x_min)/x_gap,(cell[2]-y_min)/y_gap)
1215
  }))
1216
  relative_fov_positions <- relative_fov_positions[order(relative_fov_positions$y_global_px, decreasing = TRUE),]
1217
1218
  # Combine Images of the FOV grid
1219
  if(verbose)
1220
    message("Loading FOV tif files \n")
1221
  image.dir.path <- paste0(dir.path,"/CellComposite/")
1222
  morphology_image_data <- NULL
1223
  for(i in relative_fov_positions$fov){
1224
    image_path <- paste0(image.dir.path, "CellComposite_F", str_pad(as.character(i), 3, pad = 0), ".jpg")
1225
    image_data <- magick::image_read(image_path) %>% magick::image_resize("x500") %>% magick::image_raster()
1226
    if(is.null(morphology_image_data))
1227
      dim_image <- apply(image_data[,seq_len(2)], 2, max)
1228
    scale_dim <- relative_fov_positions[i,2:3]*dim_image
1229
    image_data[,seq_len(2)] <- image_data[,seq_len(2)] +
1230
      rep(1, nrow(image_data)) %o% as.matrix(scale_dim)[1,]
1231
    morphology_image_data <- rbind(morphology_image_data, image_data)
1232
  }
1233
  morphology_image_data_array <- reshape2::acast(morphology_image_data, y ~ x)
1234
  morphology_image <- magick::image_read(morphology_image_data_array) %>% magick::image_resize("x800")
1235
1236
  # pick a resolution level
1237
  morphology_image_info <- image_info(morphology_image)
1238
  if(verbose)
1239
    message("Image Resolution (X:", morphology_image_info$width, " Y:", morphology_image_info$height, ") \n")
1240
1241
  # increase contrast using EBImage
1242
  if(increase.contrast) {
1243
    if(verbose)
1244
      message("Increasing Contrast \n")
1245
    morphology_image <- magick::image_contrast(morphology_image, sharpen = 1)
1246
  }
1247
1248
  # write to the same folder
1249
  if(verbose)
1250
    message("Writing Tiff File \n")
1251
  if(is.null(output.path)){
1252
    EBImage::writeImage(magick::as_EBImage(morphology_image), file = file.path, ...)
1253
  } else {
1254
    EBImage::writeImage(magick::as_EBImage(morphology_image), file = output.file, ...)
1255
  }
1256
1257
  return(NULL)
1258
}
1259
1260
####
1261
# Spatial Genomics ####
1262
####
1263
1264
####
1265
## GenePs ####
1266
####
1267
1268
#' importGenePS
1269
#'
1270
#' Importing GenePS data
1271
#' 
1272
#' @param dir.path path to Xenium output folder
1273
#' @param assay_name the assay name of the SR object
1274
#' @param sample_name the name of the sample
1275
#' @param use_image if TRUE, the DAPI image will be used.
1276
#' @param resolution_level the level of resolution within TIFF image. Default: 7 (971x638)
1277
#' @param image_name the image name of the Xenium assay, Default: main
1278
#' @param channel_name the channel name of the image of the Xenium assay, Default: DAPI
1279
#' @param import_molecules if TRUE, molecule assay will be created along with cell assay.
1280
#' @param verbose verbose
1281
#' @param ... additional parameters passed to \link{formVoltRon}
1282
#' 
1283
#' @importFrom magick image_read image_info
1284
#' @importFrom utils read.csv
1285
#' @importFrom data.table fread
1286
#' @importFrom ids random_id
1287
#' @importFrom grDevices as.raster
1288
#' @importFrom EBImage as.Image
1289
#'
1290
#' @export
1291
#'
1292
importGenePS <- function (dir.path, assay_name = "GenePS", sample_name = NULL, use_image = TRUE, 
1293
                          resolution_level = 7, image_name = "main", channel_name = "DAPI", import_molecules = FALSE, 
1294
                          verbose = TRUE, ...)
1295
{
1296
  # cell assay
1297
  if(verbose)
1298
     message("Creating cell level assay ...")
1299
  
1300
  # raw counts
1301
  DataOutputfiles <- list.files(paste0(dir.path, "DataOutput/"))
1302
  datafile <- DataOutputfiles[grepl("_cellxgene.csv", DataOutputfiles)]
1303
  if(length(datafile) > 0){
1304
    rawdata <- utils::read.csv(file = paste0(dir.path, "DataOutput/", datafile), row.names = NULL)
1305
    rawdata <- rawdata[,colnames(rawdata)[!colnames(rawdata) %in% "X"]]
1306
    rawdata <- t(rawdata)
1307
  } else {
1308
    stop("There are no files ending with '_cellxgene.csv' in the path")
1309
  }
1310
  
1311
  # image
1312
  if(use_image){
1313
    
1314
    # check RBioFormats
1315
    if (!requireNamespace('RBioFormats'))
1316
      stop("Please install RBioFormats package to extract xml from the ome.tiff file!: BiocManager::install('RBioFormats')")
1317
    
1318
    # check image
1319
    image_file <- paste0(dir.path, "/images/DAPI.tiff")
1320
    if(file.exists(image_file)){
1321
      image <- RBioFormats::read.image(image_file, resolution = resolution_level)
1322
      image <- EBImage::as.Image(image)
1323
      image <- grDevices::as.raster(image)
1324
      image <- magick::image_read(image)
1325
    } else {
1326
      stop("There are no image files in the path")
1327
    }
1328
    # scale the xenium image instructed by 10x Genomics help page
1329
    scaleparam <- ceiling(2^(resolution_level-1))
1330
  } else {
1331
    image <- NULL
1332
  }
1333
  
1334
  # coordinates
1335
  coordsfile <- DataOutputfiles[grepl("_cellcoordinate.csv", DataOutputfiles)]
1336
  if(length(coordsfile) > 0){
1337
    coords <- utils::read.csv(file = paste0(dir.path, "DataOutput/", coordsfile))
1338
    coords <- as.matrix(coords[,c("center_x", "center_y")])
1339
    colnames(coords) <- c("x", "y")
1340
    if(use_image) {
1341
      coords <- coords/scaleparam
1342
      imageinfo <- unlist(magick::image_info(image)[c("height")])
1343
      range_coords <- c(0,imageinfo)
1344
    } else {
1345
      range_coords <- range(coords[,2])
1346
    }
1347
    coords[,2] <- range_coords[2] - coords[,2] + range_coords[1]
1348
  } else {
1349
    stop("There are no files ending with '_cellcoordinate.csv' in the path")
1350
  }
1351
  
1352
  # create VoltRon object for cells
1353
  cell_object <- formVoltRon(rawdata, metadata = NULL, image = image, coords, main.assay = assay_name, 
1354
                             assay.type = "cell", image_name = image_name, main_channel = channel_name, sample_name = sample_name, 
1355
                             feature_name = "RNA", ...)
1356
  
1357
  # add molecules
1358
  if(!import_molecules){
1359
    return(cell_object)
1360
  } else {
1361
    if(verbose)
1362
      message("Creating molecule level assay ...")
1363
    transcripts_file <- DataOutputfiles[grepl("_transcriptlocation.csv", DataOutputfiles)]
1364
    if(length(transcripts_file) == 0){
1365
      stop("There are no files ending with '_transcriptlocation.csv' in the path")
1366
    } else {
1367
      
1368
      # get subcellur data components
1369
      subcellular_data <- data.table::fread(paste0(dir.path, "DataOutput/", transcripts_file))
1370
      subcellular_data$id <- seq_len(nrow(subcellular_data))
1371
      subcellular_data <- subcellular_data[,c("id", colnames(subcellular_data)[!colnames(subcellular_data) %in% "id"]), with = FALSE]
1372
      colnames(subcellular_data)[colnames(subcellular_data)=="name"] <- "gene"
1373
      
1374
      # coordinates
1375
      coords <- as.matrix(subcellular_data[,c("x", "y")])
1376
      colnames(coords) <- c("x", "y")
1377
      if(use_image){
1378
        coords <- coords/scaleparam
1379
      }
1380
      coords[,"y"] <- range_coords[2] - coords[,"y"]  + range_coords[1]
1381
      
1382
      # metadata
1383
      mol_metadata <- subcellular_data[,colnames(subcellular_data)[!colnames(subcellular_data) %in% c("cell", "x", "y")], with = FALSE]
1384
      set.seed(nrow(mol_metadata$id))
1385
      mol_metadata$postfix <- paste0("_", ids::random_id(bytes = 3, use_openssl = FALSE))
1386
      mol_metadata$assay_id <- "Assay1"
1387
      mol_metadata[, id:=do.call(paste0,.SD), .SDcols=c("id", "postfix")]
1388
      
1389
      # coord names
1390
      rownames(coords) <- mol_metadata$id
1391
1392
      # create VoltRon assay for molecules
1393
      mol_assay <- formAssay(coords = coords, image = image, type = "molecule", main_image = image_name, main_channel = channel_name)
1394
      
1395
      # merge assays in one section
1396
      if(verbose)
1397
        message("Merging assays ...")
1398
      sample.metadata <- SampleMetadata(cell_object)
1399
      object <- addAssay(cell_object,
1400
                         assay = mol_assay,
1401
                         metadata = mol_metadata,
1402
                         assay_name = paste0(assay_name, "_mol"),
1403
                         sample = sample.metadata["Assay1", "Sample"],
1404
                         layer = sample.metadata["Assay1", "Layer"])
1405
      
1406
      # connectivity
1407
      connectivity <- data.table::data.table(transcript_id = mol_metadata$id, cell_id = subcellular_data[["cell"]])
1408
      connectivity <- subset(connectivity, cell_id != 0)
1409
      connectivity[["cell_id"]] <- vrSpatialPoints(cell_object)[connectivity[["cell_id"]]]
1410
      
1411
      # add connectivity of spatial points across assays
1412
      object <- addLayerConnectivity(object,
1413
                                connectivity = connectivity,
1414
                                sample = sample.metadata["Assay1", "Sample"],
1415
                                layer = sample.metadata["Assay1", "Layer"])
1416
      
1417
      # return
1418
      return(object)
1419
    }
1420
  }
1421
}
1422
  
1423
####
1424
# BGI Genomics ####
1425
####
1426
1427
####
1428
## STOmics ####
1429
####
1430
1431
#' importSTOmics
1432
#'
1433
#' Importing STOmics (Stereo-Seq) data
1434
#'
1435
#' @param h5ad.path path to h5ad file of STOmics output
1436
#' @param assay_name the assay name
1437
#' @param sample_name the name of the sample
1438
#' @param image_name the image name of the Visium assay, Default: main
1439
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E
1440
#' @param ... additional parameters passed to \link{formVoltRon}
1441
#'
1442
#' @importFrom methods as
1443
#' @export
1444
#'
1445
importSTOmics <- function(h5ad.path, assay_name = "STOmics", sample_name = NULL, image_name = "main", channel_name = "H&E", ...)
1446
{
1447
  # check package
1448
  if(!requireNamespace('anndataR'))
1449
    stop("Please install anndataR package!: devtools::install_github('scverse/anndataR')")
1450
  
1451
  # get h5ad data
1452
  stdata <- anndataR::read_h5ad(h5ad.path, to = "HDF5AnnData")
1453
  
1454
  # observation and feature names
1455
  obs_names <- stdata$obs_names
1456
  var_names <- stdata$var_names
1457
  
1458
  # raw counts
1459
  rawdata <- Matrix::t(stdata$X)
1460
  rownames(rawdata) <- var_names
1461
  colnames(rawdata) <- obs_names
1462
  rawdata <- methods::as(rawdata, 'CsparseMatrix')
1463
  
1464
  # metadata
1465
  metadata <- stdata$obs
1466
  rownames(metadata) <- obs_names
1467
  
1468
  # coordinates
1469
  coords <- stdata$obsm$spatial
1470
  rownames(coords) <- obs_names
1471
  
1472
  # scale coordinates
1473
  binsize <- stdata$uns$bin_size
1474
  params <- list(
1475
    spot.radius = 0.5 + (binsize-1),
1476
    vis.spot.radius = 0.5 + (binsize-1))
1477
  
1478
  # create VoltRon
1479
  formVoltRon(rawdata, metadata = metadata, coords = coords, main.assay = assay_name, params = params, assay.type = "spot", 
1480
              image_name = image_name, main_channel = channel_name, sample_name = sample_name, feature_name = "RNA", ...)
1481
}
1482
1483
####
1484
# Akoya ####
1485
####
1486
1487
####
1488
## PhenoCycler ####
1489
####
1490
1491
#' importPhenoCycler
1492
#'
1493
#' Importing PhenoCycler data
1494
#' 
1495
#' @param dir.path path to PhenoCycler output folder
1496
#' @param assay_name the assay name of the SR object
1497
#' @param sample_name the name of the sample
1498
#' @param image_name the image name of the Xenium assay, Default: main
1499
#' @param type Specify which type matrix is being provided.
1500
#' \itemize{
1501
#'  \item \dQuote{\code{processor}}: matrix generated by CODEX Processor
1502
#'  \item \dQuote{\code{inform}}: matrix generated by inForm
1503
#'  \item \dQuote{\code{qupath}}: matrix generated by QuPath
1504
#' }
1505
#' @param filter A pattern to filter features by; pass \code{NA} to skip feature filtering
1506
#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the quantification level to read in 
1507
#' @param verbose verbose
1508
#' @param ... additional parameters passed to \link{formVoltRon} 
1509
#' 
1510
#' @importFrom magick image_info image_read
1511
#'
1512
#' @export
1513
importPhenoCycler <- function(dir.path, assay_name = "PhenoCycler", sample_name = NULL, image_name = "main", 
1514
                              type = c('inform', 'processor', 'qupath'), filter = 'DAPI|Blank|Empty', 
1515
                              inform.quant = c('mean', 'total', 'min', 'max', 'std'), verbose = TRUE, ...){
1516
  
1517
  # raw counts, metadata and coordinates 
1518
  listoffiles <- list.files(paste0(dir.path, "/processed/segm/segm-1/fcs/compensated/"), full.names = TRUE)
1519
  datafile <- listoffiles[grepl("_compensated.csv$", listoffiles)][1]
1520
  if(file.exists(datafile)){
1521
    rawdata <- readPhenoCyclerMat(filename = datafile, type = type, filter = filter, inform.quant = inform.quant)
1522
  } else {
1523
    stop("There are no files named ending with '_compensated.csv' in the processed/segm/segm-1/fcs/compensated/ subfolder")
1524
  }
1525
  
1526
  # cell id 
1527
  cellid <- paste0("cell", colnames(rawdata$matrix))
1528
  
1529
  # coordinates
1530
  coords <- rawdata$centroids[,c("x", "y")]
1531
  rownames(coords) <- cellid
1532
  coords <- as.matrix(coords)
1533
  
1534
  # metadata
1535
  metadata <- rawdata$metadata
1536
  rownames(metadata) <- cellid
1537
  
1538
  # data
1539
  rawdata <- rawdata$matrix
1540
  colnames(rawdata) <- cellid
1541
  rownames(rawdata) <- gsub("(\t|\r|\n)", "", rownames(rawdata))
1542
  
1543
  # images 
1544
  image_dir <- paste0(dir.path, "/processed/stitched/reg001/")
1545
  list_files <- list.files(image_dir)
1546
  if(!dir.exists(image_dir)){
1547
    if(verbose)
1548
      message("There are no images of channels!")
1549
    image_list <- NULL
1550
  } else {
1551
    if(!any(grepl(".tif$", list_files))){
1552
      stop("The folder doesnt have any images associated with channels!")
1553
    } else{
1554
      image_channel_names <- sapply(list_files, function(x) {
1555
        name <- strsplit(x, split = "_")[[1]]
1556
        name <- gsub(".tif$", "", name[length(name)])
1557
        name <- make.unique(name)
1558
        return(name)
1559
      })
1560
      image_channel_names <- c(image_channel_names[grepl("DAPI", image_channel_names)][1], 
1561
                               image_channel_names[image_channel_names %in% rownames(rawdata)])
1562
      list_files <- names(image_channel_names)
1563
      image_list <- lapply(list_files, function(x){
1564
        magick::image_read(paste0(image_dir, "/", x))
1565
      })
1566
      names(image_list) <- image_channel_names   
1567
      coords[,2] <- magick::image_info(image_list[[1]])$height - coords[,2]
1568
    }
1569
  }
1570
  
1571
  # voltron object
1572
  object <- formVoltRon(data = rawdata, metadata = metadata, image = image_list, coords = coords, assay.type = "cell", 
1573
                        sample_name = sample_name, main.assay = assay_name, image_name = image_name, feature_name = "RNA", ...)
1574
  
1575
  # return
1576
  object
1577
}
1578
1579
#' readPhenoCyclerMat
1580
#' 
1581
#' Read and Load Akoya CODEX data, adapted from the \code{ReadAkoya} function \code{Seurat} package
1582
#'
1583
#' @param filename Path to matrix generated by upstream processing.
1584
#' @param type Specify which type matrix is being provided.
1585
#' \itemize{
1586
#'  \item \dQuote{\code{processor}}: matrix generated by CODEX Processor
1587
#'  \item \dQuote{\code{inform}}: matrix generated by inForm
1588
#'  \item \dQuote{\code{qupath}}: matrix generated by QuPath
1589
#' }
1590
#' @param filter A pattern to filter features by; pass \code{NA} to skip feature filtering
1591
#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the quantification level to read in
1592
#' 
1593
#' @importFrom methods as
1594
#' 
1595
#' @noRd
1596
readPhenoCyclerMat <- function(
1597
    filename,
1598
    type = c('inform', 'processor', 'qupath'),
1599
    filter = 'DAPI|Blank|Empty',
1600
    inform.quant = c('mean', 'total', 'min', 'max', 'std')
1601
) {
1602
  # Check arguments
1603
  if (!file.exists(filename)) {
1604
    stop("Can't find file: ", filename)
1605
  }
1606
  type <- tolower(x = type[1L])
1607
  type <- match.arg(arg = type)
1608
  ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4)
1609
  # Preload matrix
1610
  sep <- switch(EXPR = type, 'inform' = '\t', ',')
1611
  mtx <- data.table::fread(
1612
    file = filename,
1613
    sep = sep,
1614
    data.table = FALSE,
1615
    verbose = FALSE
1616
  )
1617
  # Assemble outputs
1618
  outs <- switch(
1619
    EXPR = type,
1620
    'processor' = {
1621
      # Create centroids data frame
1622
      centroids <- data.frame(
1623
        x = mtx[['x:x']],
1624
        y = mtx[['y:y']],
1625
        cell = as.character(x = mtx[['cell_id:cell_id']]),
1626
        stringsAsFactors = FALSE
1627
      )
1628
      rownames(x = mtx) <- as.character(x = mtx[['cell_id:cell_id']])
1629
      # Create metadata data frame
1630
      md <- mtx[, !grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE]
1631
      colnames(x = md) <- vapply(
1632
        X = strsplit(x = colnames(x = md), split = ':'),
1633
        FUN = '[[',
1634
        FUN.VALUE = character(length = 1L),
1635
        2L
1636
      )
1637
      # Create expression matrix
1638
      mtx <- mtx[, grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE]
1639
      colnames(x = mtx) <- vapply(
1640
        X = strsplit(x = colnames(x = mtx), split = ':'),
1641
        FUN = '[[',
1642
        FUN.VALUE = character(length = 1L),
1643
        2L
1644
      )
1645
      if (!is.na(x = filter)) {
1646
        mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE]
1647
      }
1648
      mtx <- t(x = mtx)
1649
      if ((sum(mtx == 0) / length(x = mtx)) > ratio) {
1650
        # mtx <- as.sparse(x = mtx)
1651
        mtx <- methods::as(mtx, 'CsparseMatrix')
1652
      }
1653
      list(matrix = mtx, centroids = centroids, metadata = md)
1654
    },
1655
    'inform' = {
1656
      inform.quant <- tolower(x = inform.quant[1L])
1657
      inform.quant <- match.arg(arg = inform.quant)
1658
      expr.key <- c(
1659
        mean = 'Mean',
1660
        total = 'Total',
1661
        min = 'Min',
1662
        max = 'Max',
1663
        std = 'Std Dev'
1664
      )[inform.quant]
1665
      expr.pattern <- '\\(Normalized Counts, Total Weighting\\)'
1666
      rownames(x = mtx) <- mtx[['Cell ID']]
1667
      mtx <- mtx[, setdiff(x = colnames(x = mtx), y = 'Cell ID'), drop = FALSE]
1668
      # Create centroids
1669
      centroids <- data.frame(
1670
        x = mtx[['Cell X Position']],
1671
        y = mtx[['Cell Y Position']],
1672
        cell  = rownames(x = mtx),
1673
        stringsAsFactors = FALSE
1674
      )
1675
      # Create metadata
1676
      cols <- setdiff(
1677
        x = grep(
1678
          pattern = expr.pattern,
1679
          x = colnames(x = mtx),
1680
          value = TRUE,
1681
          invert = TRUE
1682
        ),
1683
        y = paste('Cell', c('X', 'Y'), 'Position')
1684
      )
1685
      md <- mtx[, cols, drop = FALSE]
1686
      # Create expression matrices
1687
      exprs <- data.frame(
1688
        cols = grep(
1689
          pattern = paste(expr.key, expr.pattern),
1690
          x = colnames(x = mtx),
1691
          value = TRUE
1692
        )
1693
      )
1694
      exprs$feature <- vapply(
1695
        X = trimws(x = gsub(
1696
          pattern = paste(expr.key, expr.pattern),
1697
          replacement = '',
1698
          x = exprs$cols
1699
        )),
1700
        FUN = function(x) {
1701
          x <- unlist(x = strsplit(x = x, split = ' '))
1702
          x <- x[length(x = x)]
1703
          return(gsub(pattern = '\\(|\\)', replacement = '', x = x))
1704
        },
1705
        FUN.VALUE = character(length = 1L)
1706
      )
1707
      exprs$class <- tolower(x = vapply(
1708
        X = strsplit(x = exprs$cols, split = ' '),
1709
        FUN = '[[',
1710
        FUN.VALUE = character(length = 1L),
1711
        1L
1712
      ))
1713
      classes <- unique(x = exprs$class)
1714
      outs <- vector(
1715
        mode = 'list',
1716
        length = length(x = classes) + 2L
1717
      )
1718
      names(x = outs) <- c(
1719
        'matrix',
1720
        'centroids',
1721
        'metadata',
1722
        setdiff(x = classes, y = 'entire')
1723
      )
1724
      outs$centroids <- centroids
1725
      outs$metadata <- md
1726
      for (i in classes) {
1727
        df <- exprs[exprs$class == i, , drop = FALSE]
1728
        expr <- mtx[, df$cols]
1729
        colnames(x = expr) <- df$feature
1730
        if (!is.na(x = filter)) {
1731
          expr <- expr[, !grepl(pattern = filter, x = colnames(x = expr)), drop = FALSE]
1732
        }
1733
        expr <- t(x = expr)
1734
        if ((sum(expr == 0, na.rm = TRUE) / length(x = expr)) > ratio) {
1735
          # expr <- as.sparse(x = expr)
1736
          expr <- methods::as(expr, 'CsparseMatrix')
1737
        }
1738
        outs[[switch(EXPR = i, 'entire' = 'matrix', i)]] <- expr
1739
      }
1740
      outs
1741
    },
1742
    'qupath' = {
1743
      rownames(x = mtx) <- as.character(x = seq_len(length.out = nrow(x = mtx)))
1744
      # Create centroids
1745
      xpos <- sort(
1746
        x = grep(pattern = 'Centroid X', x = colnames(x = mtx), value = TRUE),
1747
        decreasing = TRUE
1748
      )[1L]
1749
      ypos <- sort(
1750
        x = grep(pattern = 'Centroid Y', x = colnames(x = mtx), value = TRUE),
1751
        decreasing = TRUE
1752
      )[1L]
1753
      centroids <- data.frame(
1754
        x = mtx[[xpos]],
1755
        y = mtx[[ypos]],
1756
        cell = rownames(x = mtx),
1757
        stringsAsFactors = FALSE
1758
      )
1759
      # Create metadata
1760
      cols <- setdiff(
1761
        x = grep(
1762
          pattern = 'Cell: Mean',
1763
          x = colnames(x = mtx),
1764
          ignore.case = TRUE,
1765
          value = TRUE,
1766
          invert = TRUE
1767
        ),
1768
        y = c(xpos, ypos)
1769
      )
1770
      md <- mtx[, cols, drop = FALSE]
1771
      # Create expression matrix
1772
      idx <- which(x = grepl(
1773
        pattern = 'Cell: Mean',
1774
        x = colnames(x = mtx),
1775
        ignore.case = TRUE
1776
      ))
1777
      mtx <- mtx[, idx, drop = FALSE]
1778
      colnames(x = mtx) <- vapply(
1779
        X = strsplit(x = colnames(x = mtx), split = ':'),
1780
        FUN = '[[',
1781
        FUN.VALUE = character(length = 1L),
1782
        1L
1783
      )
1784
      if (!is.na(x = filter)) {
1785
        mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE]
1786
      }
1787
      mtx <- t(x = mtx)
1788
      if ((sum(mtx == 0) / length(x = mtx)) > ratio) {
1789
        # mtx <- as.sparse(x = mtx)
1790
        mtx <- methods::as(mtx, 'CsparseMatrix')
1791
      }
1792
      list(matrix = mtx, centroids = centroids, metadata = md)
1793
    },
1794
    stop("Unknown matrix type: ", type)
1795
  )
1796
  return(outs)
1797
}
1798
1799
####
1800
# Non-Commercial ####
1801
####
1802
1803
####
1804
## OpenST ####
1805
####
1806
1807
#' importOpenST
1808
#'
1809
#' Importing OpenST data
1810
#'
1811
#' @param h5ad.path path to h5ad file of STOmics output
1812
#' @param assay_name the assay name
1813
#' @param sample_name the name of the sample
1814
#' @param image_name the image name of the Visium assay, Default: main
1815
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E
1816
#' @param verbose verbose
1817
#' @param ... additional parameters passed to \link{formVoltRon}
1818
#'
1819
#' @importFrom methods as
1820
#' @importFrom Matrix t
1821
#' 
1822
#' @export
1823
importOpenST <- function(h5ad.path, assay_name = "OpenST", sample_name = NULL, image_name = "main", channel_name = "H&E", verbose = TRUE, ...)
1824
{
1825
  # check package
1826
  if(!requireNamespace('anndataR'))
1827
    stop("Please install anndataR package!: devtools::install_github('scverse/anndataR')")
1828
  
1829
  # get h5ad data
1830
  stdata <- anndataR::read_h5ad(h5ad.path, to = "HDF5AnnData")
1831
  
1832
  # observation and feature names
1833
  obs_names <- stdata$obs_names
1834
  var_names <- stdata$var_names
1835
  
1836
  # raw counts
1837
  rawdata <- Matrix::t(stdata$X)
1838
  rownames(rawdata) <- var_names
1839
  colnames(rawdata) <- obs_names
1840
  rawdata <- methods::as(rawdata, 'CsparseMatrix')
1841
  
1842
  # metadata
1843
  metadata <- stdata$obs
1844
  rownames(metadata) <- obs_names
1845
  
1846
  # coordinates
1847
  coords <- stdata$obsm$spatial_3d_aligned
1848
  rownames(coords) <- obs_names
1849
  zlocation <- unique(coords[,3])
1850
  
1851
  # get individual sections as voltron data
1852
  sections <- unique(metadata$n_section)
1853
  zlocation <- zlocation[order(sections)]
1854
  connectivity <- data.frame(Var1 = rep(seq_len(length(sections)), length(sections)), 
1855
                             Var2 = rep(seq_len(length(sections)), each = length(sections)))
1856
  sections <- sections[order(sections)]
1857
  vr_data_list <- list()
1858
  if(verbose)
1859
    message("Creating Layers ...")
1860
  for(i in seq_len(length(sections))){
1861
    ind <- metadata$n_section == sections[i]
1862
    spatialpoints <- rownames(metadata[metadata$n_section == sections[i],])
1863
    cur_data <- rawdata[,spatialpoints]
1864
    cur_metadata <- metadata[spatialpoints,]
1865
    cur_coords <- coords[ind,c(1,2)]
1866
    rownames(cur_coords) <- spatialpoints
1867
    vr_data_list[[i]] <- formVoltRon(data = cur_data, metadata = cur_metadata, coords = cur_coords, 
1868
                                     main.assay = assay_name, sample_name = paste0("Section", sections[i]),
1869
                                     image_name = image_name, main_channel = channel_name, feature_name = "RNA", ...)
1870
  }
1871
  
1872
  # create VoltRon
1873
  sample_name <- ifelse(is.null(sample_name), "Sample", sample_name)
1874
  vr_data <- mergeVoltRon(vr_data_list[[1]], vr_data_list[-1], samples = sample_name)
1875
  
1876
  # set zlocations and adjacency of layer in the vrBlock
1877
  vr_data <- addBlockConnectivity(vr_data, connectivity = connectivity, zlocation = zlocation, sample = sample_name)
1878
  
1879
  # return
1880
  vr_data
1881
}
1882
1883
####
1884
## DBIT-Seq ####
1885
####
1886
 
1887
#' importDBITSeq
1888
#'
1889
#' Importing DBIT-Seq data
1890
#'
1891
#' @param path.rna path to rna count matrix
1892
#' @param path.prot path to protein count matrix
1893
#' @param size the size of the in situ pixel (defualt is 10 (micron))
1894
#' @param assay_name the assay name
1895
#' @param sample_name the name of the sample
1896
#' @param image_name the image name of the Visium assay, Default: main
1897
#' @param channel_name the channel name of the image of the Visium assay, Default: H&E
1898
#' @param ... additional parameters passed to \link{formVoltRon}
1899
#'
1900
#' @importFrom utils read.table
1901
#'
1902
#' @export
1903
importDBITSeq <- function(path.rna, path.prot = NULL, size = 10, assay_name = "DBIT-Seq", sample_name = NULL, image_name = "main", channel_name = "H&E", ...)
1904
{
1905
  # count matrix RNA
1906
  rnadata <- utils::read.table(path.rna, header = TRUE, sep = "\t", row.names = 1)
1907
  rnadata <- t(as.matrix(rnadata))
1908
  
1909
  # count matrix Protein
1910
  protdata <- utils::read.table(path.prot, header = TRUE, sep = "\t", row.names = 1)
1911
  protdata <- t(as.matrix(protdata))
1912
  
1913
  # coords 
1914
  coords <- sapply(colnames(rnadata), function(x) as.numeric(strsplit(x, split = "x")[[1]]))
1915
  coords <- t(coords)
1916
  colnames(coords) <- c("x", "y")
1917
  
1918
  # get DBIT-Seq parameters
1919
  params <- list(
1920
    spot.radius = size/2,
1921
    vis.spot.radius = size,
1922
    nearestpost.distance = size*2*sqrt(2))
1923
  coords <- coords*size*3/2
1924
  
1925
  # make voltron object
1926
  object <- formVoltRon(data = rnadata, coords = coords, image = NULL, assay.type = "spot", params = params, image_name = "main", 
1927
                        main.assay = assay_name, sample_name = sample_name, feature_name = "RNA", ...)
1928
  
1929
  # add protein assay
1930
  if(!is.null(path.prot)){
1931
   
1932
    # # create protein assay
1933
    # new_assay <- formAssay(data = protdata,
1934
    #                        coords = coords,
1935
    #                        type = "spot")
1936
    # new_assay@image <- object[["Assay1"]]@image
1937
    # sample.metadata <- SampleMetadata(object)
1938
    
1939
    # # add new assay
1940
    # object <- addAssay(object,
1941
    #                    assay = new_assay,
1942
    #                    metadata = Metadata(object),
1943
    #                    assay_name = paste(assay_name, "Prot", sep = "-"),
1944
    #                    sample = sample.metadata["Assay1", "Sample"],
1945
    #                    layer = sample.metadata["Assay1", "Layer"])
1946
    # 
1947
    # # add connectivity of spatial points across assays
1948
    # connectivity <- cbind(vrSpatialPoints(object, assay = "Assay1"),
1949
    #                       vrSpatialPoints(object, assay = "Assay2"))
1950
    # object <- addConnectivity(object,
1951
    #                           connectivity = connectivity,
1952
    #                           sample = sample.metadata["Assay1", "Sample"],
1953
    #                           layer = sample.metadata["Assay1", "Layer"])
1954
    
1955
    # add negative probe assay as new feature set
1956
    object <- addFeature(object, assay = assay_name, data = protdata, feature_name = "Protein")
1957
1958
  }
1959
  
1960
  # return 
1961
  return(object)
1962
}
1963
1964
####
1965
# Image Data ####
1966
####
1967
1968
#' importImageData
1969
#'
1970
#' import an image as VoltRon object
1971
#'
1972
#' @param image a single or a list of image paths or magick-image objects
1973
#' @param tile.size the size of tiles
1974
#' @param segments Either a list of segments or a GeoJSON file. This will result in a second assay in the VoltRon object to be created
1975
#' @param image_name the image name of the Image assay, Default: main
1976
#' @param channel_names the channel names of the images if multiple images are provided
1977
#' @param ... additional parameters passed to \link{formVoltRon}
1978
#'
1979
#' @importFrom magick image_read image_info
1980
#' @importFrom data.table data.table
1981
#'
1982
#' @examples
1983
#' # single image
1984
#' imgfile <- system.file("extdata", "DAPI.tif", package = "VoltRon")
1985
#' vrdata <- importImageData(imgfile, image_name = "main")
1986
#' 
1987
#' # multiple images
1988
#' imgfile <- c(system.file("extdata", "DAPI.tif", package = "VoltRon"), 
1989
#'              system.file("extdata", "DAPI.tif", package = "VoltRon"))
1990
#' vrdata <- importImageData(imgfile, image_name = "main", channel_name = c("DAPI", "DAPI2"))
1991
#' 
1992
#' @export
1993
importImageData <- function(image, tile.size = 10, segments = NULL, image_name = "main", channel_names = NULL, ...){
1994
  
1995
  # images and channel names
1996
  if(!is.null(channel_names)){
1997
    if(length(image) != length(channel_names))
1998
      stop("Provided channel names should of the same length as the images!")
1999
    if(any(!is.character(channel_names)))
2000
      stop("Invalid channel names!")  
2001
  }
2002
  
2003
  # get image
2004
  if(!is.list(image)){}
2005
    image <- as.list(image)
2006
  image <- sapply(image, function(img){
2007
    if(!inherits(img, "magick-image")){
2008
      if(!is.character(img)){
2009
        stop("image should either be a magick-image object or a file.path")
2010
      } else{
2011
        if(file.exists(img)){
2012
          img <- magick::image_read(img)
2013
        } else {
2014
          stop(img, " is not found!")
2015
        }
2016
      }
2017
    }
2018
    img
2019
  }, USE.NAMES = TRUE, simplify = FALSE)
2020
  
2021
  # channel names
2022
  if(!is.null(channel_names)){
2023
    names(image) <- channel_names
2024
  }
2025
2026
  # check image size
2027
  imageinfo <- vapply(image, function(img) {
2028
    info <- magick::image_info(img)
2029
    c(info$width, info$height)
2030
  }, numeric(2))
2031
  unique_width <- unique(imageinfo[1,])
2032
  unique_height <- unique(imageinfo[2,])
2033
  if(length(unique_width) == 1 && length(unique_height) == 1){
2034
    imageinfo <- list(width = imageinfo[1,1], height = imageinfo[2,1])
2035
  }
2036
2037
  # coordinates
2038
  even_odd_correction <- (!tile.size%%2)*(0.5)
2039
  x_coords <- seq((tile.size/2) + even_odd_correction, imageinfo$width, tile.size)[seq_len(imageinfo$width %/% tile.size)]
2040
  y_coords <- seq((tile.size/2) + even_odd_correction, imageinfo$height, tile.size)[seq_len(imageinfo$height %/% tile.size)]
2041
  y_coords <- rev(y_coords)
2042
  coords <- as.matrix(expand.grid(x_coords, y_coords))
2043
  colnames(coords) <- c("x", "y")
2044
  rownames(coords) <- paste0("tile", seq_len(nrow(coords)))
2045
2046
  # metadata
2047
  metadata <- data.table::data.table(id = rownames(coords))
2048
2049
  # create voltron object with tiles
2050
  object <- formVoltRon(data = NULL, metadata = metadata, image = image, coords, main.assay = "ImageData", assay.type = "tile", params = list(tile.size = tile.size), image_name = image_name, ...)
2051
  
2052
  # check if segments are defined
2053
  if(is.null(segments)){
2054
    return(object)
2055
  } else {
2056
    
2057
    # check if segments are paths
2058
    if(inherits(segments, "character")){
2059
      if(grepl(".geojson$", segments)){
2060
        segments <- generateSegments(geojson.file = segments)
2061
      } else {
2062
        stop("Only lists or GeoJSON files are accepted as segments input!")
2063
      }
2064
    }
2065
    
2066
    # make coordinates out of segments
2067
    coords <- t(vapply(segments, function(dat){
2068
      apply(dat[,c("x", "y")], 2, mean)
2069
    }, numeric(2)))
2070
    rownames(coords) <- names(segments)
2071
    
2072
    # make segment assay
2073
    assay <- formAssay(coords = coords, segments = segments, image = image, type = "ROI", main_image = image_name) 
2074
    
2075
    # add segments as assay
2076
    sample_metadata <- SampleMetadata(object)
2077
    object <- addAssay(object,
2078
                       assay = assay,
2079
                       metadata = data.frame(row.names = paste0(names(segments), "_Assay2")),
2080
                       assay_name = "ROIAnnotation",
2081
                       sample = sample_metadata$Sample, 
2082
                       layer = sample_metadata$Layer)
2083
    
2084
    # return
2085
    return(object)
2086
  }
2087
}
2088
2089
2090
#' generateSegments
2091
#' 
2092
#' The function to import segments from a geojson file
2093
#'
2094
#' @param geojson.file the GeoJSON file, typically generated by QuPath software
2095
#'
2096
#' @importFrom rjson fromJSON
2097
#' @importFrom dplyr tibble
2098
#' 
2099
#' @export
2100
generateSegments <- function(geojson.file){
2101
  
2102
  # get segments
2103
  if(inherits(geojson.file, "character")){
2104
    if(file.exists(geojson.file)){
2105
      segments <- rjson::fromJSON(file = geojson.file)
2106
    } else {
2107
      stop("geojson.file doesn't exist!")
2108
    }
2109
  } else {
2110
    stop("geojson.file should be the path to the GeoJSON file!")
2111
  }
2112
  
2113
  # parse polygons as segments/ROIs
2114
  segments <- lapply(segments, function(x){
2115
    type <- x$geometry$type
2116
    poly <- x$geometry$coordinates
2117
    if(grepl("Polygon", type)){
2118
      poly <- as.data.frame(matrix(unlist(poly[[1]]), ncol = 2, byrow = TRUE))
2119
    }
2120
    colnames(poly) <- c("x", "y")
2121
    dplyr::tibble(poly)
2122
  })
2123
  
2124
  # attach names to segments
2125
  segments <- mapply(function(x, sgt){
2126
    dplyr::tibble(data.frame(id = x, sgt))
2127
  }, seq_len(length(segments)), segments, SIMPLIFY = FALSE)
2128
  
2129
  # generate ROI names
2130
  names(segments) <- paste0("ROI", seq_len(length(segments)))
2131
2132
  # return
2133
  return(segments)
2134
}
2135
2136
#' generateGeoJSON
2137
#' 
2138
#' generating geojson files from segments
2139
#'
2140
#' @param segments the segments, typically from \link{vrSegments}.
2141
#' @param file the GeoJSON file, typically to be used by QuPath software.
2142
#'
2143
#' @importFrom rjson fromJSON
2144
#' 
2145
#' @export
2146
generateGeoJSON <- function(segments, file){
2147
  
2148
  if(!requireNamespace('geojsonR'))
2149
    stop("Please install geojsonR package for using geojsonR functions")
2150
  
2151
  # get segments
2152
  if(!inherits(file, "character")){
2153
    stop("file should be the path to the GeoJSON file!")
2154
  } 
2155
  
2156
  # reshape segments
2157
  segments <- mapply(function(id, sgt){
2158
    poly <- na.omit(as.matrix(sgt[,c("x", "y")]))
2159
    poly <- rbind(poly, poly[1,,drop = FALSE])
2160
    poly <- as.list(data.frame(t(poly)))
2161
    names(poly) <- NULL
2162
    init <- geojsonR::TO_GeoJson$new()
2163
    geometry <- init$Polygon(list(poly), stringify = TRUE)
2164
    feature <- list(type = "Feature", 
2165
                    id = id, 
2166
                    geometry = geometry[!names(geometry) %in% "json_dump"],
2167
                    properties = list(objectType = "annotation"))
2168
    feature
2169
  }, names(segments), segments, SIMPLIFY = FALSE, USE.NAMES = FALSE)
2170
2171
  # save as json
2172
  segments <- rjson::toJSON(segments)
2173
  write(segments, file = file)
2174
}