Diff of /R/customConverters.R [000000] .. [28e211]

Switch to unified view

a b/R/customConverters.R
1
#' Automatic Feature Id Conversion.
2
#'
3
#' Attempt to automatically convert non-ENSEMBL feature identifiers to
4
#' ENSEMBL identifiers. Features are included as rownames of the input
5
#' data.frame (or matrix). It is assumed that feature identifiers
6
#' (i.e., rownames of x) come from human or mouse genomes, and
7
#' are either OFFICIAL SYMBOLS or ENTREZIDS. If less than 20% of match
8
#' is identified, an error will be thrown.
9
#'
10
#' @param x data.frame or matrix including raw counts (typically, UMIs),
11
#'   where rows are features (genes) and rownames are feature identifiers
12
#'   (SYMBOLs or ENTREZIDs).
13
#' @param verbose logical, shall messages be printed to inform about
14
#'   conversion advances.
15
#'
16
#' @return a data.frame where rownames are ENSEMBL IDs. The new feature IDs are
17
#' automatically imputed based on existing feature IDs (SYMBOLs or ENTREZIDs).
18
#'
19
#' @export
20
customConvertFeats <- function(x, verbose = TRUE) {
21
  if (!(is.matrix(x) || is.data.frame(x))) {
22
    stop("wrong input")
23
  }
24
25
  # uncomment
26
  myDict <- DIscBIO::HumanMouseGeneIds
27
  myDict <- myDict[!is.na(myDict[, "ENSEMBL"]), ]
28
29
  #
30
  xx <- rownames(x)
31
  keep.ei <- xx %in% myDict[, "ENTREZID"]
32
  keep.sy <- xx %in% myDict[, "SYMBOL"]
33
  keep.ez <- xx %in% myDict[, "ENSEMBL"]
34
35
  rat.ez <- sum(keep.ez) / length(xx)
36
  rat.ei <- sum(keep.ei) / length(xx)
37
  rat.sy <- sum(keep.sy) / length(xx)
38
39
  # Automatic selection and replacement
40
  if (rat.ez >= rat.sy && rat.ez >= rat.ei && rat.ez >= 0.2) {
41
    # nothing to do, return
42
    return(x)
43
  } else if (rat.ei >= rat.sy && rat.ei >= 0.2) {
44
    x <- x[!duplicated(xx), ]
45
    x <- x[rownames(x) %in% myDict[, "ENTREZID"], ]
46
47
    tmpDict <- myDict$ENSEMBL
48
    names(tmpDict) <- myDict$ENTREZID
49
    rownames(x) <- as.character(tmpDict[rownames(x)])
50
  } else if (rat.ei < rat.sy && rat.sy >= 0.2) {
51
    x <- x[!duplicated(xx), ]
52
    x <- x[rownames(x) %in% myDict[, "SYMBOL"], ]
53
54
    tmpDict <- myDict$ENSEMBL
55
    names(tmpDict) <- myDict$SYMBOL
56
    rownames(x) <- as.character(tmpDict[rownames(x)])
57
  } else {
58
    message("Feat ID Conversion could not be performed")
59
  }
60
  return(x)
61
}
62
63
64
65
#' Convert Single Cell Data Objects to DISCBIO.
66
#'
67
#' Initialize a DISCBIO-class object starting from a
68
#' SingleCellExperiment object or a Seurat object
69
#'
70
#' @param x an object of class Seurat or SingleCellExperiment
71
#' @param ... additional parameters to pass to the function
72
#'
73
#' @details Additional parameters to pass to `list` include, if x is
74
#'   a Seurat object, "assay", which is a string indicating the assay slot
75
#'   used to obtain data from (defaults to 'RNA')
76
#'
77
#' @return a DISCBIO-class object
78
#'
79
#' @export
80
#'
81
as.DISCBIO <- function(x, ...) {
82
  if ("Seurat" %in% class(x)) {
83
    # Get Arguments and parse out what we want
84
    all.args <- list(...)
85
86
    # Fetch arguments we care about
87
    if ("assay" %in% all.args) {
88
      assay <- all.args[["assay"]]
89
    } else {
90
      assay <- "RNA"
91
    }
92
93
    # Get feats and sample names
94
    all.feats <- base::as.character(
95
      rownames(x@assays[[assay]]@meta.features)
96
    )
97
    all.cells <- base::as.character(rownames(x@meta.data))
98
99
    # Get data
100
    all.counts <- base::data.frame(
101
      base::as.matrix(x@assays[[assay]]@counts)
102
    )
103
104
    # re-write row and colnames
105
    rownames(all.counts) <- all.feats
106
    colnames(all.counts) <- all.cells
107
108
    # prep output and return
109
    if (sum(grepl("^ENS", all.feats)) / length(all.feats) < 0.5) {
110
      all.counts <- customConvertFeats(all.counts)
111
    }
112
    y <- DISCBIO(all.counts)
113
  } else if ("SingleCellExperiment" %in% class(x)) {
114
    y <- DISCBIO(x)
115
  } else {
116
    stop("Conversion not supported")
117
  }
118
  return(y)
119
}
120
121
122
#' Convert a DISCBIO object to a SingleCellExperiment.
123
#'
124
#' Extract the SingleCellExperiment input data from the corresponding input slot
125
#' in a DISCBIO-class object
126
#'
127
#' @param x an object of class DISCBIO
128
#'
129
#' @return a SingleCellExperiment-class object
130
#'
131
#' @export
132
#' @examples
133
#' g1_disc <- DISCBIO(valuesG1msTest)
134
#' class(g1_disc)
135
#' g1_sce <- DISCBIO2SingleCellExperiment(g1_disc)
136
#' class(g1_sce)
137
#'
138
DISCBIO2SingleCellExperiment <- function(x) {
139
  return(x@SingleCellExperiment)
140
}