Diff of /R/integration.R [000000] .. [413088]

Switch to unified view

a b/R/integration.R
1
####
2
# Data Transfer ####
3
####
4
5
#' transferData
6
#'
7
#' transfer data across assays
8
#'
9
#' @param object a VoltRon object
10
#' @param from the name or class of assay whose data transfered to the second assay
11
#' @param to the name or class of target assay where data is transfered to
12
#' @param features the set of features from \link{vrFeatures} or metadata columns from \link{Metadata} that are transferred. 
13
#' Only one metadata feature can be transfered at a time.
14
#' @param new_feature_name the name of the new feature set created from the source assay defined in \code{from} argument.
15
#'
16
#' @export
17
transferData <- function(object, from = NULL, to = NULL, features = NULL, new_feature_name = NULL){
18
19
  # assay list
20
  assaytypes <- c("ROI", "spot", "cell", "molecule", "tile")
21
  
22
  # get Assay IDs from Names and IDs
23
  from <- vrAssayNames(object, assay = from)
24
  to <- vrAssayNames(object, assay = to)
25
  
26
  # check assay names
27
  if(length(from) > 1 | length(to) > 1){
28
    stop("For now, label transfer can only be accomplished across two assays")
29
  }
30
  
31
  # check if assays are in the same block
32
  sample.metadata <- SampleMetadata(object)
33
  from_assayclass <- sample.metadata[from, "Assay"]
34
  to_assayclass <- sample.metadata[to, "Assay"]
35
  samples <- sample.metadata[c(from, to), "Sample"]
36
  
37
  if(length(unique(samples)) > 1)
38
    stop("Selected assays have to be within the same sample block!")
39
  
40
  # get assay types
41
  to_object_type <- vrAssayTypes(object[[to]])
42
  from_object_type <- vrAssayTypes(object[[from]])
43
  
44
  if(which(assaytypes == to_object_type) > which(assaytypes == from_object_type) && from_object_type == "ROI"){
45
    return(transferLabels(object = object, from = from, to = to, features = features))
46
  } else {
47
    return(transferFeatureData(object = object, from = from, to = to, features = features, new_feature_name = new_feature_name))
48
  }
49
}
50
51
#' transferFeatureData
52
#'
53
#' transfer feature data across assays
54
#'
55
#' @param object a VoltRon object
56
#' @param from The ID of assay whose data transfer to the second assay
57
#' @param to The ID of the target assay where data is transfered to
58
#' @param features The set of data or metadata features that are transfered. Only one metadata feature can be transfered at a time.
59
#' @param new_feature_name the name of the new feature set created from the source assay defined in \code{from}.
60
#'
61
#' @noRd
62
transferFeatureData <- function(object, from = NULL, to = NULL, features = NULL, new_feature_name = NULL){
63
64
  # get assays and metadata
65
  from_object <- object[[from]]
66
  from_metadata <- Metadata(object, assay = from)
67
  to_object <- object[[to]]
68
  to_metadata <- Metadata(object, assay = to)
69
70
  # get assay types
71
  to_object_type <- vrAssayTypes(to_object)
72
  from_object_type <- vrAssayTypes(from_object)
73
74
  # get transfer data type
75
  if(to_object_type == "spot"){
76
    if(from_object_type == "cell"){
77
      new_assay <- getSpotsFromCells(from_object, from_metadata, to_object, features = features)
78
    }
79
  } else if(to_object_type == "cell"){
80
    if(from_object_type == "tile"){
81
      new_assay <- getCellsFromTiles(from_object, from_metadata, to_object, features = features)
82
    } else if(from_object_type == "spot"){
83
      new_assay <- getCellsFromSpots(from_object, from_metadata, to_object, features = features)
84
    }
85
  } else if(to_object_type == "ROI"){
86
    if(from_object_type == "cell"){
87
      new_assay <- getROIsFromCells(from_object, from_metadata, to_object, features = features)
88
    }
89
  }
90
91
  # add new feature set
92
  if(is.null(new_feature_name)){
93
    new_feature_name <- paste(vrMainFeatureType(object, assay = from)$Feature, "pseudo", sep = "_")
94
  }
95
  object <- addFeature(object, assay = to, data = new_assay, feature_name = new_feature_name)
96
97
  # return
98
  object
99
}
100
101
#' transferLabels
102
#'
103
#' transfer labels across assays
104
#'
105
#' @param object a VoltRon object
106
#' @param from The ID of assay whose data transfer to the second assay
107
#' @param to The ID of the target assay where data is transfered to
108
#' @param features The set of data or metadata features that are transferred. Only one metadata feature can be transferred at a time.
109
#'
110
#' @noRd
111
transferLabels <- function(object, from = NULL, to = NULL, features = NULL){
112
  
113
  # get assays and metadata
114
  from_object <- object[[from]]
115
  from_metadata <- Metadata(object, assay = from)
116
  to_object <- object[[to]]
117
  to_metadata <- Metadata(object, assay = to)
118
  
119
  # get assay types
120
  to_object_type <- vrAssayTypes(to_object)
121
  from_object_type <- vrAssayTypes(from_object)
122
  
123
  # get transfer data type
124
  if(from_object_type == "ROI" & to_object_type != "ROI"){
125
    
126
    # transfer labels
127
    transferedLabelsMetadata <- transferLabelsFromROI(from_object, from_metadata, to_object, to_metadata, features = features)
128
129
    # update metadata
130
    # metadata <- Metadata(object, assay = to)
131
    # entities <- vrSpatialPoints(to_object)
132
    # if(is.numeric(value)){
133
    #   metadata[[features]] <- as.numeric(NA)
134
    # } else {
135
    #   metadata[[features]] <- ""
136
    # }
137
    # if(is.null(rownames(metadata))){
138
    #   metadata[[features]][match(entities, as.vector(metadata$id))] <- value
139
    # } else {
140
    #   metadata[entities,][[features]] <- value
141
    # }
142
    # 
143
    # # transfer labels
144
    # Metadata(object, assay = to) <- metadata
145
    object <- addMetadata(object, assay = to, value = transferedLabelsMetadata[[features]], label = features)
146
  }
147
    
148
  # return
149
  object
150
}
151
152
153
#' getSpotsFromCells
154
#'
155
#' Generate Psuedo counts per spots and insert to as a separate image assay to the Visium object
156
#'
157
#' @param from_object The vrAssay object whose data transfer to the second assay
158
#' @param from_metadata the metadata associated with \code{from_object}
159
#' @param to_object The ID of the target vrAssay object where data is transfered to
160
#' @param features the name of the metadata feature to transfer, if NULL, the rawdata will be transfered
161
#'
162
#' @importFrom dplyr %>% right_join
163
#' @importFrom stats aggregate
164
#' @importFrom magick image_data
165
#'
166
#' @noRd
167
#'
168
getSpotsFromCells <- function(from_object, from_metadata = NULL, to_object, features = NULL) {
169
170
  # get the spot radius of Visium spots
171
  Vis_spotradius <- vrAssayParams(to_object, param = "spot.radius")
172
173
  # get cell and spot coordinates
174
  message("Cell to Spot Distances \n")
175
  coords_spots <- vrCoordinates(to_object)
176
  coords_cells <- vrCoordinates(from_object)
177
178
  # get distances from cells to spots
179
  # alldist <- flexclust::dist2(coords_cells, coords_spots)
180
  # cell_to_spot <- FNN::get.knnx(coords_spots, coords_cells, k = 1)
181
  cell_to_spot <- knn_annoy(coords_spots, coords_cells, k = 1)
182
  names(cell_to_spot) <- c("nn.index", "nn.dist")
183
  cell_to_spot_nnid <- vrSpatialPoints(to_object)[cell_to_spot$nn.index[,1]]
184
  names(cell_to_spot_nnid) <- rownames(coords_cells)
185
  cell_to_spot_nndist <- cell_to_spot$nn.dist[,1]
186
  names(cell_to_spot_nndist) <- rownames(coords_cells)
187
  cell_to_spot_nnid <- cell_to_spot_nnid[cell_to_spot_nndist < Vis_spotradius]
188
189
  # find associated spot for each cell
190
  message("Find associated spots for each cell \n")
191
  cell_to_spot_id <- names(cell_to_spot_nnid)
192
193
  # get data
194
  if(is.null(features)){
195
    raw_counts <- vrData(from_object, norm = FALSE)
196
  } else {
197
    data_features <- features[features %in% vrFeatures(from_object)]
198
    metadata_features <- features[features %in% colnames(from_metadata)]
199
    if(length(data_features) > 0){
200
      if(length(metadata_features) > 0){
201
        stop("Data and metadata features cannot be transfered in the same time!")
202
      } else {
203
        raw_counts <- vrData(from_object, norm = FALSE)
204
        raw_counts <- raw_counts[features,]
205
        message("There are ", length(setdiff(features, data_features)), " unknown features!")
206
      }
207
    } else {
208
      if(length(metadata_features) > 1){
209
        stop("Only one metadata column can be transfered at a time")
210
      } else if(length(metadata_features) == 1) {
211
        raw_counts <- from_metadata[,metadata_features, drop = FALSE]
212
        rownames_raw_counts <- rownames(raw_counts)
213
        raw_counts <- dummy_cols(raw_counts, remove_first_dummy = FALSE)
214
        raw_counts <- raw_counts[,-1]
215
        raw_counts <- t(raw_counts)
216
        colnames(raw_counts) <- rownames_raw_counts
217
        rownames(raw_counts) <- gsub(paste0("^", metadata_features, "_"), "", rownames(raw_counts))
218
      } else {
219
        stop("Features cannot be found in data and metadata!")
220
      }
221
    }
222
  }
223
  raw_counts <- raw_counts[,cell_to_spot_id, drop = FALSE]
224
225
  # pool cell counts to Spots
226
  message("Aggregating cell profiles in spots \n")
227
  aggregate_raw_counts <- stats::aggregate(t(as.matrix(raw_counts)), list(cell_to_spot_nnid), sum)
228
  aggregate_raw_counts <- data.frame(barcodes = vrSpatialPoints(to_object)) %>% dplyr::right_join(aggregate_raw_counts, by = c("barcodes" = "Group.1"))
229
  rownames(aggregate_raw_counts) <- aggregate_raw_counts$barcodes
230
  aggregate_raw_counts <- t(aggregate_raw_counts[,-1])
231
  aggregate_raw_counts[is.na(aggregate_raw_counts)] <- 0
232
233
  # return
234
  return(aggregate_raw_counts)
235
}
236
237
#' getSpotsFromCells
238
#'
239
#' Generate Psuedo counts per spots and insert to as a separate image assay to the Visium object
240
#'
241
#' @param from_object The vrAssay object whose data transfer to the second assay
242
#' @param from_metadata the metadata associated with \code{from_object}
243
#' @param to_object The ID of the target vrAssay object where data is transfered to
244
#' @param features the name of the metadata feature to transfer, if NULL, the rawdata will be transfered
245
#'
246
#' @importFrom dplyr %>% right_join
247
#' @importFrom stats aggregate
248
#' @importFrom magick image_data
249
#'
250
#' @noRd
251
#'
252
getCellsFromSpots <- function(from_object, from_metadata = NULL, to_object, features = NULL) {
253
  
254
  # get the spot radius of Visium spots
255
  radius <- vrAssayParams(from_object, param = "nearestpost.distance")/2
256
  
257
  # get cell and spot coordinates
258
  message("Spot to Cell Distances \n")
259
  coords_spots <- vrCoordinates(from_object)
260
  coords_cells <- vrCoordinates(to_object)
261
  
262
  # get distances from cells to spots
263
  spot_to_cell <- knn_annoy(coords_spots, coords_cells, k = 1)
264
  names(spot_to_cell) <- c("nn.index", "nn.dist")
265
  nnindex <- spot_to_cell$nn.index[,1]
266
  names_nnindex <- names(nnindex)
267
  nnindex <- vrSpatialPoints(from_object)[nnindex]
268
  names(nnindex) <- names_nnindex
269
  nndist <- spot_to_cell$nn.dist[,1]
270
  nnindex <- nnindex[nndist < radius]
271
272
  # find associated spot for each cell
273
  message("Find associated spot for each cell \n")
274
275
  # get data
276
  if(is.null(features)){
277
    raw_counts <- vrData(from_object, norm = FALSE)
278
  } else {
279
    data_features <- features[features %in% vrFeatures(from_object)]
280
    metadata_features <- features[features %in% colnames(from_metadata)]
281
    if(length(data_features) > 0){
282
      if(length(metadata_features) > 0){
283
        stop("Data and metadata features cannot be transfered in the same time!")
284
      } else {
285
        raw_counts <- vrData(from_object, norm = FALSE)
286
        raw_counts <- raw_counts[features,]
287
        message("There are ", length(setdiff(features, data_features)), " unknown features!")
288
      }
289
    } else {
290
      if(length(metadata_features) > 1){
291
        stop("Only one metadata column can be transfered at a time")
292
      } else if(length(metadata_features) == 1) {
293
        raw_counts <- from_metadata[,metadata_features, drop = FALSE]
294
        rownames_raw_counts <- rownames(raw_counts)
295
        raw_counts <- dummy_cols(raw_counts, remove_first_dummy = FALSE)
296
        raw_counts <- raw_counts[,-1]
297
        raw_counts <- t(raw_counts)
298
        colnames(raw_counts) <- rownames_raw_counts
299
        rownames(raw_counts) <- gsub(paste0("^", metadata_features, "_"), "", rownames(raw_counts))
300
      } else {
301
        stop("Features cannot be found in data and metadata!")
302
      }
303
    }
304
  }
305
  raw_counts <- raw_counts[,nnindex, drop = FALSE]
306
  colnames(raw_counts) <- names(nnindex)
307
  
308
  # return
309
  return(raw_counts)
310
}
311
312
#' getROIsFromCells
313
#'
314
#' Generate Psuedo counts per ROIs and insert to as a separate image assay to the Visium object
315
#'
316
#' @param from_object The vrAssay object whose data transfer to the second assay
317
#' @param from_metadata the metadata associated with \code{from_object}
318
#' @param to_object The ID of the target vrAssay object where data is transfered to
319
#' @param features the name of the metadata feature to transfer, if NULL, the rawdata will be transfered
320
#'
321
#' @importFrom dplyr %>% right_join
322
#' @importFrom stats aggregate
323
#' @importFrom magick image_data
324
#'
325
#' @noRd
326
#'
327
getROIsFromCells <- function(from_object, from_metadata = NULL, to_object, features = NULL) {
328
329
  # get cell and ROIs coordinates
330
  message("Cell to ROI Distances \n")
331
  segments_rois <- vrSegments(to_object)
332
  coords_cells <- vrCoordinates(from_object)
333
  
334
  # find associated spot for each cell
335
  message("Find associated ROIs for each cell \n")
336
  cell_to_roi_id <- NULL
337
  cell_to_roi_labelid <- NULL
338
  names_segments_rois <- names(segments_rois)
339
  for(i in seq_len(length(segments_rois))){
340
    cur_segt <- segments_rois[[i]]
341
    if(ncol(cur_segt) > 3){
342
      in.list <- point.in.circle(coords_cells[,1], coords_cells[,2], cur_segt$x, cur_segt$y, cur_segt$rx)
343
    } else {
344
      in.list <- sp::point.in.polygon(coords_cells[,1], coords_cells[,2], cur_segt$x, cur_segt$y)
345
    }
346
    in.list.cells <- rownames(coords_cells)[!!in.list]
347
    cell_to_roi_id <- c(cell_to_roi_id, in.list.cells)
348
    cell_to_roi_labelid <- c(cell_to_roi_labelid, rep(names_segments_rois[i], length(in.list.cells)))
349
  }
350
  
351
  # get data
352
  if(is.null(features)){
353
    raw_counts <- vrData(from_object, norm = FALSE)
354
  } else {
355
    data_features <- features[features %in% vrFeatures(from_object)]
356
    metadata_features <- features[features %in% colnames(from_metadata)]
357
    if(length(data_features) > 0){
358
      if(length(metadata_features) > 0){
359
        stop("Data and metadata features cannot be transfered in the same time!")
360
      } else {
361
        raw_counts <- vrData(from_object, norm = FALSE)
362
        raw_counts <- raw_counts[features,]
363
        message("There are ", length(setdiff(features, data_features)), " unknown features!")
364
      }
365
    } else {
366
      if(length(metadata_features) > 1){
367
        stop("Only one metadata column can be transfered at a time")
368
      } else if(length(metadata_features) == 1) {
369
        raw_counts <- from_metadata[,metadata_features, drop = FALSE]
370
        rownames_raw_counts <- rownames(raw_counts)
371
        raw_counts <- dummy_cols(raw_counts, remove_first_dummy = FALSE)
372
        raw_counts <- raw_counts[,-1]
373
        raw_counts <- t(raw_counts)
374
        colnames(raw_counts) <- rownames_raw_counts
375
        rownames(raw_counts) <- gsub(paste0("^", metadata_features, "_"), "", rownames(raw_counts))
376
      } else {
377
        stop("Features cannot be found in data and metadata!")
378
      }
379
    }
380
  }
381
  raw_counts <- raw_counts[,cell_to_roi_id, drop = FALSE]
382
  
383
  # pool cell counts to Spots
384
  message("Aggregating cell profiles in spots \n")
385
  aggregate_raw_counts <- stats::aggregate(t(as.matrix(raw_counts)), list(cell_to_roi_labelid), sum)
386
  aggregate_raw_counts <- data.frame(barcodes = vrSpatialPoints(to_object)) %>% dplyr::right_join(aggregate_raw_counts, by = c("barcodes" = "Group.1"))
387
  rownames(aggregate_raw_counts) <- aggregate_raw_counts$barcodes
388
  aggregate_raw_counts <- t(aggregate_raw_counts[,-1])
389
  aggregate_raw_counts[is.na(aggregate_raw_counts)] <- 0
390
  
391
  # return
392
  return(aggregate_raw_counts)
393
}
394
395
getCellsFromTiles <- function(from_object, from_metadata = NULL, to_object, features = NULL, k = 5) {
396
397
  # get cell and spot coordinates
398
  message("Tile to Cell Distances \n")
399
  coords_cells <- vrCoordinates(to_object)
400
  coords_tiles <- vrCoordinates(from_object)
401
402
  # get distances from cells to spots
403
  # tile_to_cell <- FNN::get.knnx(coords_tiles, coords_cells, k = k)
404
  tile_to_cell <- knn_annoy(coords_tiles, coords_cells, k = k)
405
  names(tile_to_cell) <- c("nn.index", "nn.dist")
406
  tile_to_cell_nnid <- data.frame(id = rownames(coords_cells), tile_to_cell$nn.index)
407
  # tile_to_cell_nnid <- reshape2::melt(tile_to_cell_nnid, id.vars = "id")
408
  tile_to_cell_nnid <- data.table::melt(tile_to_cell_nnid, id.vars = "id")
409
  tile_id <- vrSpatialPoints(from_object)[tile_to_cell_nnid$value]
410
  tile_to_cell_nnid <- tile_to_cell_nnid$id
411
412
  # get data
413
  raw_counts <- vrData(from_object, norm = FALSE)
414
  raw_counts <- raw_counts[,tile_id]
415
416
  # pool cell counts to Spots
417
  message("Aggregating tile profiles in cells \n")
418
  aggregate_raw_counts <- stats::aggregate(t(as.matrix(raw_counts)), list(tile_to_cell_nnid), mean)
419
  aggregate_raw_counts <- data.frame(barcodes = vrSpatialPoints(to_object)) %>% dplyr::right_join(aggregate_raw_counts, by = c("barcodes" = "Group.1"))
420
  rownames(aggregate_raw_counts) <- aggregate_raw_counts$barcodes
421
  aggregate_raw_counts <- t(aggregate_raw_counts[,-1])
422
  aggregate_raw_counts[is.na(aggregate_raw_counts)] <- 0
423
424
  # return
425
  return(aggregate_raw_counts)
426
}
427
428
transferLabelsFromROI <- function(from_object, from_metadata = NULL, to_object, to_metadata = NULL, features = NULL) {
429
  
430
  # get ROI and other coordinates
431
  segments_roi <- vrSegments(from_object)
432
  coords <- vrCoordinates(to_object)
433
  spatialpoints <- rownames(coords)
434
  
435
  # check if all features are in from_metadata
436
  if(!all(features %in% colnames(from_metadata))){
437
    stop("Some features are not found in the ROI metadata!")
438
  }
439
  
440
  # annotate points in the to object
441
  for(feat in features){
442
    
443
    # get from metadata labels
444
    feat_labels <- from_metadata[,feat]
445
    
446
    # get to metadata
447
    new_label <- rep("undefined", length(spatialpoints))
448
    names(new_label) <- spatialpoints
449
    
450
    for(i in seq_len(length(segments_roi))){
451
      cur_poly <- segments_roi[[i]][,c("x","y")]
452
      in.list <- sp::point.in.polygon(coords[,1], coords[,2], cur_poly[,1], cur_poly[,2])
453
      new_label[rownames(coords)[!!in.list]] <- feat_labels[i]
454
    }
455
    
456
    to_metadata[[feat]] <- new_label
457
  }
458
459
  
460
  # return label
461
  return(to_metadata)
462
}
463
464
####
465
# Embedding ####
466
####
467
468
getJointPCA <- function(object, assay.1 = NULL, assay.2 = NULL, dims = 30, seed = 1, ...){
469
470
  # get assay names
471
  assay <- assay.1
472
  assay_names <- vrAssayNames(object, assay = assay)
473
474
  # if there are features of a VoltRon object, then get variable features too
475
  assay_features <- vrFeatures(object, assay = assay)
476
  if(length(assay_features) > 0) {
477
    features <- getVariableFeatures(object, assay = assay)
478
    vrMainAssay(object) <- object@sample.metadata[assay, "Assay"]
479
    object_subset <- subsetVoltRon(object, features = features)
480
    vrMainAssay(object_subset) <- vrMainAssay(object)
481
    if(dims > length(features)){
482
      message("Requested more PC dimensions than existing features: dims = length(features) now!")
483
      dims <- length(features)
484
    }
485
  } else {
486
    object_subset <- object
487
  }
488
489
  # set seed
490
  set.seed(seed)
491
  
492
  # get PCA embedding
493
  normdata.1 <- vrData(object_subset, assay = assay, norm = TRUE)
494
  scale.data <- apply(normdata.1, 1, scale)
495
  pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
496
  pr.data.1 <- pr.data$x
497
  colnames(pr.data.1) <- paste0("PC", seq_len(dims))
498
  rownames(pr.data.1) <- colnames(normdata.1)
499
  normdata.1 <- normdata.1/(pr.data$sdev[1]^2)
500
501
  # get assay names
502
  assay <- assay.2
503
  assay_names <- vrAssayNames(object, assay = assay)
504
505
  # if there are features of a VoltRon object, then get variable features too
506
  assay_features <- vrFeatures(object, assay = assay)
507
  if(length(assay_features) > 0) {
508
    features <- getVariableFeatures(object, assay = assay)
509
    vrMainAssay(object) <- object@sample.metadata[assay, "Assay"]
510
    object_subset <- subsetVoltRon(object, features = features)
511
    vrMainAssay(object_subset) <- vrMainAssay(object)
512
    if(dims > length(features)){
513
      message("Requested more PC dimensions than existing features: dims = length(features) now!")
514
      dims <- length(features)
515
    }
516
  } else {
517
    object_subset <- object
518
  }
519
520
  # get PCA embedding
521
  normdata.2 <- vrData(object_subset, assay = assay, norm = TRUE)
522
  scale.data <- apply(normdata.2, 1, scale)
523
  pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
524
  pr.data.2 <- pr.data$x
525
  colnames(pr.data.2) <- paste0("PC", seq_len(dims))
526
  rownames(pr.data.2) <- colnames(normdata.2)
527
  normdata.2 <- normdata.2/(pr.data$sdev[1]^2)
528
529
  # get joint PCA
530
  normdata <- rbind(normdata.1, normdata.2)
531
  scale.data <- apply(normdata, 1, scale)
532
  pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
533
  prsdev <- pr.data$sdev
534
  pr.data <- pr.data$x
535
  colnames(pr.data) <- paste0("PC", seq_len(dims))
536
  rownames(pr.data) <- colnames(normdata)
537
  normdata <- normdata/(prsdev[1]^2)
538
539
  # set Embeddings
540
  vrEmbeddings(object, type = "pca_joint", assay = assay.1, ...) <- pr.data
541
  vrEmbeddings(object, type = "pca_joint", assay = assay.2, ...) <- pr.data
542
543
  # return
544
  return(object)
545
}