Diff of /R/processing.R [000000] .. [413088]

Switch to unified view

a b/R/processing.R
1
#' @include allgenerics.R
2
#'
3
NULL
4
5
####
6
# Normalization ####
7
####
8
9
normalizeDataVoltRon <- function(object, assay = NULL, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
10
  
11
  # get assay names
12
  assay_names <- vrAssayNames(object, assay = assay)
13
  
14
  # normalize assays
15
  for(assy in assay_names){
16
    cur_assay <- object[[assy]]
17
    object[[assy]] <- normalizeData(cur_assay, method = method, desiredQuantile = desiredQuantile, scale = scale, sizefactor = sizefactor, feat_type = feat_type)
18
  }
19
  
20
  # return
21
  return(object)
22
}
23
24
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
25
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
26
#' @param method the normalization method: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"
27
#' @param desiredQuantile the quantile of the data if "QuanNorm" or "LogQuanNorm" is selected as \code{method}.
28
#' @param scale the scale parameter for the hyperbolic arcsine transformation
29
#' @param sizefactor size factor if \code{method} is selected as \code{LogNorm}
30
#' @param feat_type the feature set type
31
#' 
32
#' @rdname normalizeData
33
#' @method normalizeData VoltRon
34
#'
35
#' @export
36
setMethod("normalizeData", "VoltRon", normalizeDataVoltRon)
37
38
normalizeDatavrAssay <- function(object, method = "LogNorm", desiredQuantile = 0.9, scale = 0.2, sizefactor = 10000, feat_type = NULL) {
39
  
40
  # size factor
41
  rawdata <- vrData(object, feat_type = feat_type, norm = FALSE)
42
  
43
  if(!is.numeric(desiredQuantile)){
44
    stop("desiredQuantile should be numeric")
45
  } else {
46
    if(!findInterval(desiredQuantile, c(0,1)) == 1L){
47
      stop("desiredQuantile should be between [0,1]")
48
    }
49
  }
50
  
51
  # normalization method
52
  if(method == "LogNorm"){
53
    normdata <- LogNorm(rawdata, colSums(rawdata), sizefactor)
54
  } else if(method == "Q3Norm") {
55
    # rawdata[rawdata==0] <- 1
56
    qs <- getColQuantiles(rawdata, desiredQuantile)
57
    normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
58
  } else if(method == "LogQ3Norm") {
59
    # rawdata[rawdata==0] <- 1
60
    qs <- getColQuantiles(rawdata, desiredQuantile)
61
    normdata <- getDivideSweep(rawdata, qs / exp(mean(log(qs))))
62
    normdata <- log(normdata + 1)
63
  } else if(method == "CLR") {
64
    normdata <- getDivideSweep(rawdata, colSums(rawdata))
65
    normdata <- apply(normdata, 2, function(x) {
66
      log1p(x = x / (exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE) / length(x = x))))
67
    })
68
  } else if(method == "hyper.arcsine") {
69
    normdata <- asinh(rawdata/scale)
70
  } else {
71
    stop('Please select one of these methods: "LogNorm", "Q3Norm", "LogQ3Norm" or "CLR"')
72
  }
73
  
74
  # get normalized data
75
  catch_connect1 <- try(slot(object, name = "data"), silent = TRUE)
76
  catch_connect2 <- try(slot(object, name = "rawdata"), silent = TRUE)
77
  if(!is(catch_connect1, 'try-error') && !methods::is(catch_connect1,'error')){
78
    if(is.null(feat_type))
79
      feat_type <- vrMainFeatureType(object)
80
    object@data[[paste0(feat_type, "_norm")]] <- normdata
81
  } else if(!is(catch_connect2, 'try-error') && !methods::is(catch_connect2,'error')){
82
    object@normdata <- normdata
83
  }
84
  
85
  # return
86
  return(object)
87
}
88
89
#' @rdname normalizeData
90
#' @method normalizeData vrAssay
91
#'
92
#' @importFrom stats quantile
93
#'
94
#' @export
95
setMethod("normalizeData", "vrAssay", normalizeDatavrAssay)
96
97
#' @rdname normalizeData
98
#' @method normalizeData vrAssayV2
99
#'
100
#' @importFrom stats quantile
101
#'
102
#' @export
103
setMethod("normalizeData", "vrAssayV2", normalizeDatavrAssay)
104
105
LogNorm <- function(rawdata, coldepth, sizefactor){
106
  if(inherits(rawdata, "IterableMatrix")){
107
    if(!requireNamespace("BPCells"))
108
      stop("You have to install BPCells!")
109
    normdata <- BPCells::t(BPCells::t(rawdata)/coldepth)
110
    normdata <- BPCells::log1p_slow(normdata*sizefactor)
111
  } else {
112
    normdata <- sweep(rawdata, 2L, coldepth, FUN = "/")
113
    normdata <- log(normdata*sizefactor + 1)
114
  }
115
  return(normdata)
116
}
117
118
getDivideSweep <- function(rawdata, divisor){
119
  if(inherits(rawdata, "IterableMatrix")){
120
    if(!requireNamespace("BPCells"))
121
      stop("You have to install BPCells!")
122
    return(BPCells::t(BPCells::t(rawdata)/divisor))
123
  } else {
124
    return(sweep(rawdata, 2L, divisor, FUN = "/"))
125
  }
126
  return(rawdata)
127
}
128
129
####
130
# Features ####
131
####
132
133
getFeaturesVoltRon <- function(object, assay = NULL, max.count = 1, n = 3000){
134
  
135
  # get assay names
136
  assay_names <- vrAssayNames(object, assay = assay)
137
  
138
  # get features for all coordinates
139
  for(assy in assay_names){
140
    object[[assy]] <- getFeatures(object[[assy]], max.count = max.count, n = n)
141
  }
142
  
143
  # return
144
  return(object)
145
}
146
147
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
148
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
149
#' @param max.count maximum count (across spatial points) for low count filtering
150
#' @param n the top number of variable features 
151
#' 
152
#' @rdname getFeatures
153
#'
154
#' @export
155
setMethod("getFeatures", "VoltRon", getFeaturesVoltRon)
156
157
getFeaturesvrAssay <- function(object, max.count = 1, n = 3000){
158
  
159
  # get data and coordinates
160
  rawdata <- vrData(object, norm = FALSE)
161
  coords <- vrCoordinates(object)
162
  features <- vrFeatures(object)
163
  
164
  # eliminate genes with low counts
165
  # keep.genes <- which(apply(rawdata,1,max) > max.count)
166
  keep.genes <- getMaxCount(rawdata, max.count)
167
  
168
  # vst estimation
169
  # vst_data <- data.frame(mean = Matrix::rowMeans(rawdata), var = apply(rawdata, 1, stats::var))
170
  vst_data <- getVstData(rawdata)
171
  loess_data <- vst_data[keep.genes,]
172
  loess_results <- stats::loess(var~mean, loess_data, span = 0.3)
173
  vst_data$adj_var <- 0
174
  vst_data$rank <- 0
175
  vst_data[keep.genes,]$adj_var <- stats::predict(loess_results)
176
  vst_data[keep.genes,]$rank <- order(order(vst_data$adj_var[keep.genes], decreasing = TRUE))
177
  
178
  # set feature data
179
  vrFeatureData(object) <- vst_data
180
  
181
  # return
182
  return(object)
183
}
184
185
#' @rdname getFeatures
186
#'
187
#' @importFrom stats loess predict var
188
#' @importFrom Matrix rowMeans
189
#'
190
#' @export
191
setMethod("getFeatures", "vrAssay", getFeaturesvrAssay)
192
193
#' @rdname getFeatures
194
#'
195
#' @importFrom stats loess predict var
196
#' @importFrom Matrix rowMeans
197
#'
198
#' @export
199
setMethod("getFeatures", "vrAssayV2", getFeaturesvrAssay)
200
201
getVstData <- function(rawdata){
202
  if(inherits(rawdata, "IterableMatrix")){
203
    if(!requireNamespace("BPCells"))
204
      stop("You have to install BPCells!")
205
    mean_data <- BPCells::rowMeans(rawdata)
206
    var_data <- BPCells::rowSums(rawdata^2)
207
    var_data <- (var_data - mean_data^2/nrow(rawdata))/(nrow(rawdata)-1)
208
    # var_data <- BPCells::matrix_stats(rawdata, row_stats="variance")
209
  } else {
210
    mean_data <- Matrix::rowMeans(rawdata)
211
    var_data <- apply(rawdata, 1, stats::var)
212
  }
213
  vst_data <- data.frame(mean = mean_data, var = var_data)
214
  return(vst_data)
215
}
216
217
getMaxCount <- function(rawdata, max.count){
218
  if(inherits(rawdata, "IterableMatrix")){
219
    if(!requireNamespace("BPCells"))
220
      stop("You have to install BPCells!")
221
    rawdata <- rawdata > max.count
222
    keep.genes <- which(BPCells::rowSums(rawdata) > 0)
223
  } else {
224
    keep.genes <- which(apply(rawdata,1,max) > max.count)
225
  }
226
  return(keep.genes)
227
}
228
229
#' getVariableFeatures
230
#'
231
#' get shared variable features across multiple assays
232
#'
233
#' @param object a Voltron Object
234
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
235
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
236
#' @param n the number of features
237
#' @param ... additional arguements passed to \link{vrFeatureData}
238
#'
239
#' @importFrom dplyr full_join
240
#' @importFrom utils head
241
#'
242
#' @export
243
getVariableFeatures <- function(object, assay = NULL, n = 3000, ...){
244
245
  # get assay names
246
  assay_names <- vrAssayNames(object, assay = assay)
247
248
  # get features for all coordinates
249
  ranks <- NULL
250
  for(assy in assay_names){
251
    feature_data <- vrFeatureData(object[[assy]], ...)
252
    # if(nrow(feature_data) > 0){
253
    if(!is.null(feature_data)) {
254
      if(nrow(feature_data) > 0){
255
        feature_data$gene <- rownames(feature_data)
256
      }
257
    } else {
258
      feature_data <- data.frame(gene = vrFeatures(object[[assy]]), rank = NA)
259
    }
260
    if(is.null(ranks)){
261
      ranks <- feature_data[,c("gene", "rank")]
262
    } else {
263
      ranks <- ranks %>% full_join(feature_data[,c("gene", "rank")], by = c("gene" = "gene"))
264
    }
265
  }
266
267
  # get geometric mean of ranks, i.e. rank product statistic
268
  rownames_ranks <- ranks$gene
269
  ranks <- ranks[,!colnames(ranks) %in% "gene", drop = FALSE]
270
  ranks <- apply(ranks, 1, function(x) exp(mean(log(x))))
271
  # names(ranks) <- rownames(feature_data)
272
  names(ranks) <- rownames_ranks
273
  ranks <- ranks[ranks != 0]
274
275
  # get selected features
276
  if(length(ranks[!is.na(ranks)]) > 0){
277
    selected_features <- names(utils::head(sort(ranks, decreasing = FALSE), n))
278
  } else {
279
    selected_features <- vrFeatures(object, assay = assay)
280
  }
281
282
  # return
283
  return(selected_features)
284
}
285
286
####
287
# vrEmbeddings ####
288
####
289
290
#' getPCA
291
#'
292
#' calculate PCA of the VoltRon objects
293
#'
294
#' @param object a VoltRon object
295
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
296
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
297
#' @param features the selected features for PCA reduction
298
#' @param dims the number of dimensions extracted from PCA
299
#' @param type the key name for the embedding, default: pca
300
#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
301
#' @param seed seed
302
#'
303
#' @importFrom irlba irlba
304
#'
305
#' @export
306
getPCA <- function(object, assay = NULL, features = NULL, dims = 30, type = "pca", overwrite = FALSE, seed = 1){
307
308
  # get assay names
309
  assay_names <- vrAssayNames(object, assay = assay)
310
311
  # get shared features and subset
312
  assay_features <- vrFeatures(object, assay = assay)
313
314
  # if there are features of a VoltRon object, then get variable features too
315
  if(length(assay_features) > 0) {
316
    if(is.null(features))
317
      features <- getVariableFeatures(object, assay = assay)
318
    object_subset <- subsetVoltRon(object, features = features)
319
    vrMainAssay(object_subset) <- vrMainAssay(object)
320
321
    # adjust extraction features length
322
    if(dims > length(features)){
323
      message("Requested more PC dimensions than existing features: dims = length(features) now!")
324
      dims <- length(features)
325
    }
326
327
  # if there are no features in VoltRon object, return the assay as itself
328
  } else {
329
    object_subset <- object
330
  }
331
332
  # get data
333
  normdata <- vrData(object_subset, assay = assay, norm = TRUE)
334
335
  # get PCA embedding
336
  set.seed(seed)
337
  if(inherits(normdata, "IterableMatrix")){
338
    if(!requireNamespace("BPCells"))
339
      stop("You have to install BPCells!")
340
    svd <- BPCells::svds(normdata, k=dims)
341
    pr.data <- BPCells::multiply_cols(svd$v, svd$d)
342
  } else {
343
    scale.data <- apply(normdata, 1, scale)
344
    pr.data <- irlba::prcomp_irlba(scale.data, n=dims, center=colMeans(scale.data))
345
    pr.data <- pr.data$x 
346
  }
347
  
348
  # change colnames
349
  colnames(pr.data) <- paste0("PC", seq_len(dims))
350
  rownames(pr.data) <- colnames(normdata)
351
352
  # set Embeddings
353
  vrEmbeddings(object, assay = assay, type = type, overwrite = overwrite) <- pr.data
354
355
  # return
356
  return(object)
357
}
358
359
#' getUMAP
360
#'
361
#' calculate UMAP of the VoltRon objects
362
#'
363
#' @param object a VoltRon object
364
#' @param assay assay name (exp: Assay1) or assay class (exp: Visium, Xenium), see \link{SampleMetadata}. 
365
#' if NULL, the default assay will be used, see \link{vrMainAssay}.
366
#' @param data.type the type of data used to calculate UMAP from: "pca" (default), "raw" or "norm"
367
#' @param dims the number of dimensions extracted from PCA
368
#' @param umap.key the name of the umap embedding, default: umap
369
#' @param overwrite Whether the existing embedding with name 'type' should be overwritten in \link{vrEmbeddings}
370
#' @param seed seed
371
#'
372
#' @importFrom uwot umap
373
#' @importFrom Matrix t
374
#'
375
#' @export
376
#'
377
getUMAP <- function(object, assay = NULL, data.type = "pca", dims = seq_len(30), umap.key = "umap", overwrite = FALSE, seed = 1){
378
379
  # get data
380
  if(data.type %in% c("raw", "norm")){
381
    data <- vrData(object, assay = assay, norm = (data.type == "norm"))
382
    data <- as.matrix(as(Matrix::t(data),"dgCMatrix"))
383
  } else{
384
    embedding_names <- vrEmbeddingNames(object)
385
    if(data.type %in% vrEmbeddingNames(object)) {
386
      data <- vrEmbeddings(object, assay = assay, type = data.type, dims = dims)
387
    } else {
388
      stop("Please provide a data type from one of three choices: raw, norm and pca")
389
    }
390
  }
391
392
  # get umap
393
  set.seed(seed)
394
  umap_data <- uwot::umap(data)
395
  colnames(umap_data) <- c("x", "y")
396
  vrEmbeddings(object, assay = assay, type = umap.key, overwrite = overwrite) <- umap_data
397
398
  # return
399
  return(object)
400
}
401
402
####
403
# Image Processing ####
404
####
405
406
#' split_into_tiles
407
#'
408
#' split image raster data into tiles
409
#'
410
#' @param image_data image raster data
411
#' @param tile_size tile size
412
#'
413
#' @noRd
414
split_into_tiles <- function(image_data, tile_size = 10) {
415
  n_rows <- nrow(image_data)
416
  n_cols <- ncol(image_data)
417
418
  # Calculate the number of tiles in rows and columns
419
  n_row_tiles <- n_rows %/% tile_size
420
  n_col_tiles <- n_cols %/% tile_size
421
422
  # Initialize an empty list to store tiles
423
  tiles <- list()
424
425
  # Loop through the image data matrix to extract tiles
426
  for (i in seq_len(n_row_tiles)) {
427
    for (j in seq_len(n_col_tiles)) {
428
      # Calculate the indices for the current tile
429
      start_row <- (i - 1) * tile_size + 1
430
      end_row <- i * tile_size
431
      start_col <- (j - 1) * tile_size + 1
432
      end_col <- j * tile_size
433
434
      # Extract the current tile from the image data matrix
435
      tile <- image_data[start_row:end_row, start_col:end_col]
436
437
      # Store the tile in the list
438
      tiles[[length(tiles) + 1]] <- tile
439
    }
440
  }
441
442
  # Return the list of tiles
443
  return(tiles)
444
}