Diff of /utils.R [000000] .. [fe0e8b]

Switch to unified view

a b/utils.R
1
#############################
2
## Commonly-used functions ##
3
#############################
4
5
load_SingleCellExperiment <- function(file, normalise = FALSE, features = NULL, cells = NULL, remove_non_expressed_genes = FALSE) {
6
  library(SingleCellExperiment); library(scran); library(scater);
7
  sce <- readRDS(file)
8
  if (!is.null(cells)) sce <- sce[,cells]
9
  if (!is.null(features)) sce <- sce[features,]
10
  if (remove_non_expressed_genes) sce <- sce[which(Matrix::rowSums(counts(sce))>15),]
11
  if (normalise) sce <- logNormCounts(sce)
12
  
13
  return(sce)
14
}
15
16
load_Seurat <- function(file, assay = "RNA", normalise = FALSE, features = NULL, cells = NULL, remove_non_expressed_genes = FALSE, ...) {
17
  library(Seurat)
18
  seurat <- readRDS(file)
19
  # if (assay%in%Seurat::Assays(seurat)) seurat <- seurat[[assay]]
20
  if (!is.null(cells)) seurat <- seurat[,cells]
21
  if (!is.null(features)) seurat <- seurat[features,]
22
  if (normalise) {
23
    seurat <- NormalizeData(seurat, normalization.method = "LogNormalize")
24
    seurat <- ScaleData(seurat, ...)
25
  }
26
  if (remove_non_expressed_genes) seurat <- seurat[which(Matrix::rowMeans(seurat@assays[[assay]]@counts)>1e-4),]
27
  return(seurat)
28
}
29
30
matrix.please<-function(x) {
31
  m<-as.matrix(x[,-1])
32
  rownames(m)<-x[[1]]
33
  m
34
}
35
36
#'  method=1: The TF-IDF implementation used by Stuart & Butler et al. 2019. This computes \eqn{\log(TF \times IDF)}.
37
#'  method=2: The TF-IDF implementation used by Cusanovich & Hill et al. 2018. This computes \eqn{TF \times (\log(IDF))}.
38
#'  method=3: The log-TF method used by Andrew Hill. This computes \eqn{\log(TF) \times \log(IDF)}.
39
tfidf <- function(mtx, method = 1, scale.factor = 1e4) {
40
  npeaks <- colSums(mtx)
41
  if (any(npeaks == 0)) {
42
    warning("Some cells contain 0 total counts")
43
  }
44
45
  tf <- Matrix::tcrossprod(mtx, y = Matrix::Diagonal(x=1/npeaks))
46
47
  rsums <- rowSums(mtx)
48
  if (any(rsums == 0)) {
49
    warning("Some features contain 0 total counts")
50
  }
51
  idf <- ncol(mtx) / rsums
52
53
  if (method == 2) {
54
    idf <- log(1 + idf)
55
  } else if (method == 3) {
56
    tf <- log1p(tf * scale.factor)
57
    idf <- log(1 + idf)
58
  }
59
  mtx.tfidf <- Matrix::Diagonal(n = length(idf), x = idf) %*% tf
60
61
  if (method == 1) {
62
    mtx.tfidf <- log1p(mtx.tfidf * scale.factor)
63
  }
64
  colnames(mtx.tfidf) <- colnames(mtx)
65
  rownames(mtx.tfidf) <- rownames(mtx)
66
67
  # set NA values to 0
68
  mtx.tfidf[is.na(mtx.tfidf)] <- 0
69
70
  return(mtx.tfidf)
71
}
72
73
pdist <- function(tmat){
74
  # @param tmat A non-negative matrix with samples by features
75
  # @reference http://r.789695.n4.nabble.com/dist-function-in-R-is-very-slow-td4738317.html
76
  mtm <- Matrix::tcrossprod(tmat)
77
  sq <- rowSums(tmat^2)
78
  out0 <- outer(sq, sq, "+") - 2 * mtm
79
  out0[out0 < 0] <- 0
80
  
81
  sqrt(out0)
82
}
83
84
smoother_aggregate_nearest_nb <- function(mat, D, k){
85
  # @param mat A matrix in a shape of #genes x #samples.
86
  # @param D A predefined distance matrix in a shape of #samples x #samples.
87
  # @param k An integer to choose \code{k} nearest samples (self-inclusive) to
88
  #  aggregate based on the distance matrix \code{D}.
89
  denoised_mat <- sapply(seq_len(ncol(mat)), function(cid){
90
    nb_cid <- head(order(D[cid, ]), k)
91
    closest_mat <- mat[, nb_cid, drop=FALSE]
92
    # return(Matrix::rowSums(closest_mat))
93
    return(Matrix::rowMeans(closest_mat))
94
  })
95
  dimnames(denoised_mat) <- dimnames(mat)
96
  return(denoised_mat)
97
}
98
99
100
# TO-FINISH.....
101
smoother_aggregate_nearest_nb_parallel <- function(mat, D, k, cores=1){
102
  # @param mat A matrix in a shape of #genes x #samples.
103
  # @param D A predefined distance matrix in a shape of #samples x #samples.
104
  # @param k An integer to choose \code{k} nearest samples (self-inclusive) to
105
  #  aggregate based on the distance matrix \code{D}.
106
107
  # library(future)
108
  library(future.apply)
109
  plan("multiprocess", workers = ncores)
110
111
112
  # sapply(seq_len(ncol(mat)), function(cid){
113
  future_sapply(seq_len(ncol(mat)), function(cid){
114
    nb_cid <- head(order(D[cid, ]), k)
115
    closest_mat <- mat[, nb_cid, drop=FALSE]
116
    # return(Matrix::rowSums(closest_mat))
117
    return(Matrix::rowMeans(closest_mat))
118
  })
119
}
120
121
# regress_covariates <- function(mtx, vars.to.regress) {
122
#   data <- scale(t(logcounts(sce_filt)), center = T, scale = F)
123
#   data_regressed <- apply(data, 2, function(x) {
124
#     lm.out <- lm(formula=expr~covariate, data=data.frame(expr=x, covariate=factor(sce_filt$stage)));
125
#     residuals <- lm.out[["residuals"]]+lm.out[["coefficients"]][1]
126
#   })
127
# }
128
129
# Remove unwanted effects from a matrix
130
#
131
# @parm mtx An expression matrix to regress the effects of covariates out
132
# of should be the complete expression matrix in genes x cells
133
# @param covariates A matrix or data.frame of latent variables, should be cells
134
# x covariates, the colnames should be the variables to regress
135
# @param features_idx An integer vector representing the indices of the
136
# genes to run regression on
137
# @param model.use Model to use, one of 'linear', 'poisson', or 'negbinom'; pass
138
# NULL to simply return mtx
139
# @param verbose Display a progress bar
140
#' @importFrom stats as.formula lm
141
#' @importFrom utils txtProgressBar setTxtProgressBar
142
#
143
RegressOutMatrix_parallel <- function(mtx, covariates = NULL, features_idx = NULL, split.by = NULL, block.size = 1000, min.cells.to.block = 3000, ncores = 1, verbose = TRUE) {
144
  
145
  library(future)
146
  library(future.apply)
147
  plan("multiprocess", workers = ncores)
148
  
149
  # Check features_idx
150
  if (is.null(features_idx)) {
151
    features_idx <- 1:nrow(mtx)
152
  }
153
  if (is.character(features_idx)) {
154
    features_idx <- intersect(features_idx, rownames(mtx))
155
    if (length(features_idx) == 0) {
156
      stop("Cannot use features that are beyond the scope of mtx")
157
    }
158
  } else if (max(features_idx) > nrow(mtx)) {
159
    stop("Cannot use features that are beyond the scope of mtx")
160
  }
161
  
162
  # Check dataset dimensions
163
  if (nrow(covariates) != ncol(mtx)) {
164
    stop("Uneven number of cells between latent data and expression data")
165
  }
166
  
167
  # Subset
168
  mtx <- mtx[features_idx,]
169
  mtx.dimnames <- dimnames(mtx)
170
  
171
  # Define chunck points
172
  chunk.points <- ChunkPoints(dsize = nrow(mtx), csize = block.size)
173
  
174
  # Define cell splitting
175
  split.cells <- split(colnames(mtx), f = split.by %||% TRUE)
176
177
   if (nbrOfWorkers() > 1) {
178
179
    # Define chuncks
180
      chunks <- expand.grid(
181
        names(split.cells),
182
        1:ncol(chunk.points),
183
        stringsAsFactors = FALSE
184
      )
185
186
      # Run RegressOutMatrix in parallel
187
      mtx.resid <- future_lapply(
188
        X = 1:nrow(chunks),
189
        FUN = function(i) {
190
          row <- chunks[i, ]
191
          group <- row[[1]]
192
          index <- as.numeric(row[[2]])
193
          return(RegressOutMatrix(
194
            mtx = mtx[chunk.points[1, index]:chunk.points[2, index], split.cells[[group]], drop = FALSE],
195
            covariates = covariates[split.cells[[group]], , drop = FALSE],
196
            # features_idx = features_idx[chunk.points[1, index]:chunk.points[2, index]],
197
            verbose = FALSE
198
          ))
199
        }
200
      )
201
202
      # Merge splitted cells
203
      if (length(split.cells) > 1) {
204
        merge.indices <- lapply(
205
          X = 1:length(x = split.cells),
206
          FUN = seq.int,
207
          to = length(mtx.resid),
208
          by = length(split.cells)
209
        )
210
        mtx.resid <- lapply(
211
          X = merge.indices,
212
          FUN = function(x) {
213
            return(do.call( 'rbind', mtx.resid[x]))
214
          }
215
        )
216
        mtx.resid <- do.call('cbind', mtx.resid)
217
      } else {
218
        mtx.resid <- do.call( 'rbind', mtx.resid)
219
      }
220
    } else {
221
      
222
      mtx.resid <- lapply(
223
        X = names(split.cells),
224
        FUN = function(x) {
225
          if (verbose && length(split.cells) > 1) {
226
            message("Regressing out variables from split ", x)
227
          }
228
          return(RegressOutMatrix(
229
            mtx = mtx[, split.cells[[x]], drop = FALSE],
230
            covariates = covariates[split.cells[[x]], , drop = FALSE],
231
            features_idx = features_idx,
232
            verbose = verbose
233
          ))
234
        }
235
      )
236
      mtx.resid <- do.call('cbind', mtx.resid)
237
    }
238
    # dimnames(mtx.resid) <- dimnames(mtx)
239
    return(mtx.resid)
240
  }
241
242
RegressOutMatrix <- function(mtx, covariates = NULL, features_idx = NULL, verbose = TRUE) {
243
  
244
  # Check features_idx
245
  if (is.null(features_idx)) {
246
    features_idx <- 1:nrow(mtx)
247
  }
248
  if (is.character(features_idx)) {
249
    features_idx <- intersect(features_idx, rownames(mtx))
250
    if (length(features_idx) == 0) {
251
      stop("Cannot use features that are beyond the scope of mtx")
252
    }
253
  } else if (max(features_idx) > nrow(mtx)) {
254
    stop("Cannot use features that are beyond the scope of mtx")
255
  }
256
  
257
  # Check dataset dimensions
258
  if (nrow(covariates) != ncol(mtx)) {
259
    stop("Uneven number of cells between latent data and expression data")
260
  }
261
  
262
  # Subset
263
  mtx <- mtx[features_idx,]
264
  mtx.dimnames <- dimnames(mtx)
265
  
266
  # Create formula for regression
267
  vars.to.regress <- colnames(covariates)
268
  fmla <- paste('GENE ~', paste(vars.to.regress, collapse = '+')) %>% as.formula
269
270
  # In this code, we'll repeatedly regress different Y against the same X
271
  # (covariates) in order to calculate residuals.  Rather that repeatedly
272
  # call lm to do this, we'll avoid recalculating the QR decomposition for the
273
  # covariates matrix each time by reusing it after calculating it once
274
  regression.mat <- cbind(covariates, mtx[1,])
275
  colnames(regression.mat) <- c(colnames(covariates), "GENE")
276
  qr <- lm(fmla, data = regression.mat, qr = TRUE)$qr
277
  rm(regression.mat)
278
279
  # Make results matrix
280
  data.resid <- matrix(
281
    nrow = nrow(mtx),
282
    ncol = ncol(mtx)
283
  )
284
285
  if (verbose) pb <- txtProgressBar(char = '=', style = 3, file = stderr())
286
287
  # Extract residuals from each feature by using the pre-computed QR decomposition
288
  for (i in 1:length(features_idx)) {
289
    regression.mat <- cbind(covariates, mtx[features_idx[i], ])
290
    colnames(regression.mat) <- c(vars.to.regress, 'GENE')
291
    regression.mat <- qr.resid(qr = qr, y = mtx[features_idx[i],])  # The function qr.resid returns the residuals when fitting y to the matrix with QR decomposition.
292
    data.resid[i, ] <- regression.mat
293
    if (verbose) {
294
      setTxtProgressBar(pb = pb, value = i / length(features_idx))
295
    }
296
  }
297
298
  if (verbose) close(con = pb)
299
300
  dimnames(data.resid) <- mtx.dimnames
301
  
302
  return(data.resid)
303
}
304
305
306
307
# Generate chunk points
308
#
309
# @param dsize How big is the data being chunked
310
# @param csize How big should each chunk be
311
#
312
# @return A matrix where each column is a chunk, row 1 is start points, row 2 is end points
313
#
314
ChunkPoints <- function(dsize, csize) {
315
  return(vapply(
316
    X = 1L:ceiling(dsize / csize),
317
    FUN = function(i) {
318
      return(c(
319
        start = (csize * (i - 1L)) + 1L,
320
        end = min(csize * i, dsize)
321
      ))
322
    },
323
    FUN.VALUE = numeric(length = 2L)
324
  ))
325
}
326
327
328
"%ni%" <- Negate("%in%")
329
330
ggplot_theme_NoAxes <- function() {
331
  theme(
332
    axis.title = element_blank(),
333
    axis.line = element_blank(),
334
    axis.text = element_blank(),
335
    axis.ticks = element_blank()
336
  )
337
}
338
339
minmax.normalisation <- function(x)
340
{
341
    return((x-min(x,na.rm=T)) /(max(x,na.rm=T)-min(x,na.rm=T)))
342
}
343
344
getmode <- function(v, dist) {
345
  tab <- table(v)
346
  #if tie, break to shortest distance
347
  if(sum(tab == max(tab)) > 1){
348
    tied <- names(tab)[tab == max(tab)]
349
    sub  <- dist[v %in% tied]
350
    names(sub) <- v[v %in% tied]
351
    return(names(sub)[which.min(sub)])
352
  } else {
353
    return(names(tab)[which.max(tab)])
354
  }
355
}
356
357
GRangesToString <- function(grange, sep = c("-", "-")) {
358
  regions <- paste0(
359
    as.character(x = seqnames(x = grange)),
360
    sep[[1]],
361
    start(x = grange),
362
    sep[[2]],
363
    end(x = grange)
364
  )
365
  return(regions)
366
}
367
368
369
## sparse -> matrix
370
dropNA2matrix <- function(x) {
371
  if(!is(x, "dsparseMatrix")) stop("x needs to be a subclass of dsparseMatrix!")
372
  
373
  x <- as(x, "dgCMatrix")
374
  
375
  ## remember true NAs
376
  nas <- Matrix::which(is.na(x), arr.ind=TRUE)
377
  
378
  x@x[x@x==0] <- NA
379
  zeros <- Matrix::which(is.na(x), arr.ind=TRUE)
380
  x <- as(x, "matrix")
381
  x[x==0] <- NA
382
  x[zeros] <- 0
383
  x[nas] <- NA
384
  x
385
}
386
387
# dropNA2vector <- function(x) {
388
#   stop("NEEDS TO BE FIXED")
389
#   if(!is(x, "dsparseMatrix")) stop("x needs to be a subclass of dsparseMatrix!")
390
#   x <- as(x, "numeric")
391
392
#   # true NAs
393
#   nas <- which(is.na(x), arr.ind=TRUE)
394
395
#   x[x==0] <- NA
396
#   zeros <- which(is.na(x), arr.ind=TRUE)
397
#   x[nas] <- NA
398
#   x[zeros] <- 0
399
# }
400
401
## matrix -> sparse
402
dropNA <- function(x) {
403
  if(!is(x, "matrix")) stop("x needs to be a matrix!")
404
  
405
  zeros <- which(x==0, arr.ind=TRUE)
406
  ## keep zeros
407
  x[is.na(x)] <- 0
408
  x[zeros] <- NA
409
  x <- Matrix::drop0(x)
410
  x[zeros] <- 0
411
  x
412
}
413
414
# dropNAis.na <- function(x) {
415
#   if(!is(x, "dsparseMatrix")) stop("x needs to be a subclass of dsparseMatrix!")
416
#   x <- as(x, "dgCMatrix")
417
#   
418
#   ### not represented means NA and 0 means 0
419
#   ### this coercion keeps 0
420
#   !as(x, "ngCMatrix")
421
# }
422
423
give.n <- function(x){
424
  return(c(y = mean(x), label = length(x)))
425
}
426
427
sort.abs <- function(dt, sort.field) dt[order(-abs(dt[[sort.field]]))]
428
429
430
# function to pseudobulk a SingleCellExperiment object
431
pseudobulk_sce_fn <- function(x, assay = NULL, by, fun = c("sum", "mean", "median"), scale = FALSE) {
432
  
433
  # check validity of input arguments
434
  fun <- match.arg(fun)
435
  if (is.null(assay))  assay <- assayNames(x)[1] 
436
  
437
  # store aggregation parameters &
438
  # nb. of cells that went into aggregation
439
  md <- metadata(x)
440
  md$agg_pars <- list(assay = assay, by = by, fun = fun, scale = scale)
441
  
442
  # get aggregation function
443
  # fun <- switch(fun, sum = "rowSums", mean = "rowMeans", median = "rowMedians")
444
  
445
  # drop missing factor levels & tabulate number of cells
446
  cd <- dplyr::mutate_if(as.data.frame(colData(x)), is.factor, droplevels)
447
  colData(x) <- DataFrame(cd, row.names = colnames(x),check.names = FALSE)
448
  md$n_cells <- table(as.data.frame(colData(x)[, by]))
449
  
450
  # assure 'by' colData columns are factors so that missing combinations aren't dropped
451
  for (i in by) 
452
    if (!is.factor(x[[i]])) 
453
      x[[i]] <- factor(x[[i]])
454
  
455
  # split cells & compute pseudo-bulks
456
  cs <- .split_cells(x, by)
457
  # pb <- .pb(x, cs, assay, fun)
458
  pb <- .pb(x=x, by=by, fun=fun)
459
  if (scale & length(by) == 2) {
460
    ls <- lapply(.pb(x, cs, "counts", "rowSums"), colSums)
461
    pb <- lapply(seq_along(pb), function(i) pb[[i]] / 1e6 * ls[[i]])
462
    names(pb) <- names(ls)
463
  }
464
  
465
  # construct SCE
466
  pb <- SingleCellExperiment(pb, metadata = md)
467
  
468
  # propagate 'colData' columns that are unique across 2nd 'by'
469
  if (length(by) == 2) {
470
    cd <- colData(x)
471
    ids <- colnames(pb)
472
    counts <- vapply(ids, function(u) {
473
      m <- as.logical(match(cd[, by[2]], u, nomatch = 0))
474
      vapply(cd[m, ], function(u) length(unique(u)), numeric(1))
475
    }, numeric(ncol(colData(x))))
476
    cd_keep <- apply(counts, 1, function(u) all(u == 1))
477
    cd_keep <- setdiff(names(which(cd_keep)), by)
478
    if (length(cd_keep) != 0) {
479
      m <- match(ids, cd[, by[2]], nomatch = 0)
480
      cd <- cd[m, cd_keep, drop = FALSE]
481
      rownames(cd) <- ids
482
      colData(pb) <- cd
483
    }
484
  }
485
  return(pb)
486
}
487
488
489
# split cells by cluster-sample
490
# auxiliary function to pseudobulk a SingleCellExperiment object
491
#   - by: character vector specifying colData column(s) to split by. If length(by) == 1, a list of length nlevels(colData$by), else,
492
#          a nested list with 2nd level of length nlevels(colData$by[2])
493
.split_cells <- function(x, by) {
494
  if (is(x, "SingleCellExperiment")) x <- colData(x)
495
  cd <- data.frame(x[by], check.names = FALSE)
496
  cd <- data.table(cd, cell = rownames(x)) %>% split(by = by, sorted = TRUE, flatten = FALSE)
497
  purrr::map_depth(cd, length(by), "cell")
498
}
499
500
501
# auxiliary function to pseudobulk a SingleCellExperiment object
502
.pb <- function(x, by, fun) {
503
  
504
  # compute pseudobulks
505
  # y <- scuttle::summarizeAssayByGroup(x, assay.type = assay, ids = (ids <- colData(x)[by]), statistics = fun, BPPARAM = BiocParallel::SerialParam())
506
  y <- scuttle::summarizeAssayByGroup(x, ids = colData(x)[by], statistics = fun)
507
  colnames(y) <- y[[by[length(by)]]]
508
  
509
  if (length(by) == 1)  return(assay(y))
510
  
511
  # reformat into one assay per 'by[1]'
512
  if (is.factor(ids <- y[[by[1]]]))
513
    ids <- droplevels(ids)
514
  is <- split(seq_len(ncol(y)), ids)
515
  ys <- map(is, ~assay(y)[, .])
516
  
517
  # fill in missing combinations
518
  for (i in seq_along(ys)) {
519
    fill <- setdiff(unique(y[[by[2]]]), colnames(ys[[i]]))
520
    if (length(fill != 0)) {
521
      foo <- matrix(0, nrow(x), length(fill))
522
      colnames(foo) <- fill
523
      foo <- cbind(ys[[i]], foo)
524
      o <- paste(sort(unique(y[[by[2]]])))
525
      ys[[i]] <- foo[, o]
526
    }
527
  }
528
  return(ys)
529
}
530
531
532
.summarizeJASPARMotifs <- function(motifs = NULL){
533
534
  motifNames <- lapply(seq_along(motifs), function(x){
535
    namex <- make.names(motifs[[x]]@name)
536
    if(substr(namex,nchar(namex),nchar(namex))=="."){
537
      namex <- substr(namex,1,nchar(namex)-1)
538
    }
539
    namex <- paste0(namex, "_", x)
540
    namex
541
  }) %>% unlist(.)
542
543
  motifDF <- lapply(seq_along(motifs), function(x){
544
    data.frame(
545
      row.names = motifNames[x],
546
      name = motifs[[x]]@name[[1]],
547
      ID = motifs[[x]]@ID,
548
      strand = motifs[[x]]@strand,
549
      symbol = ifelse(!is.null(motifs[[x]]@tags$symbol[1]), motifs[[x]]@tags$symbol[1], NA) ,
550
      family = ifelse(!is.null(motifs[[x]]@tags$family[1]), motifs[[x]]@tags$family[1], NA),
551
      alias = ifelse(!is.null(motifs[[x]]@tags$alias[1]), motifs[[x]]@tags$alias[1], NA),
552
      stringsAsFactors = FALSE
553
    )
554
  }) %>% Reduce("rbind", .) %>% DataFrame
555
  
556
  names(motifs) <- motifNames
557
558
  out <- list(motifs = motifs, motifSummary = motifDF)
559
560
  return(out)
561
  
562
}
563
564
.summarizeChromVARMotifs <- function(motifs = NULL){
565
566
  motifNames <- lapply(seq_along(motifs), function(x){
567
    namex <- make.names(motifs[[x]]@name)
568
    if(grepl("LINE", namex)){
569
      splitNamex <- stringr::str_split(motifs[[x]]@ID, pattern="\\_", simplify = TRUE)
570
      namex <- splitNamex[1, grep("LINE",splitNamex[1,]) + 1]
571
    }
572
    if(substr(namex,nchar(namex),nchar(namex))=="."){
573
      namex <- substr(namex,1,nchar(namex)-1)
574
    }
575
    namex <- paste0(namex, "_", x)
576
    namex
577
  }) %>% unlist(.)
578
579
  motifNames2 <- lapply(seq_along(motifs), function(x){
580
    namex <- make.names(motifs[[x]]@name)
581
    if(grepl("LINE", namex)){
582
      splitNamex <- stringr::str_split(motifs[[x]]@ID, pattern="\\_", simplify = TRUE)
583
      namex <- splitNamex[1, grep("LINE",splitNamex[1,]) + 1]
584
    }
585
    if(substr(namex,nchar(namex),nchar(namex))=="."){
586
      namex <- substr(namex,1,nchar(namex)-1)
587
    }
588
    namex
589
  }) %>% unlist(.)
590
591
  motifDF <- lapply(seq_along(motifs), function(x){
592
    df <- data.frame(
593
      row.names = motifNames[x],
594
      name = motifNames2[[x]],
595
      ID = motifs[[x]]@ID,
596
      strand = motifs[[x]]@strand,
597
      stringsAsFactors = FALSE
598
    )
599
  }) %>% Reduce("rbind", .) %>% DataFrame
600
601
  names(motifs) <- motifNames
602
603
  out <- list(motifs = motifs, motifSummary = motifDF)
604
605
  return(out)
606
607
}
608
609
.augment_matrix <-function(mtx, samples) {
610
  samples <- unique(samples)
611
  mtx <- t(mtx)
612
  aug_mtx<-matrix(NA, ncol=ncol(mtx), nrow=length(samples))
613
  aug_mtx<-mtx[match(samples,rownames(mtx)),,drop=FALSE]
614
  rownames(aug_mtx)<-samples
615
  colnames(aug_mtx)<-colnames(mtx)
616
  return(t(aug_mtx))
617
}
618
619
620
stop_quietly <- function() {
621
  opt <- options(show.error.messages = FALSE)
622
  on.exit(options(opt))
623
  stop()
624
}