[28e211]: / R / customConverters.R

Download this file

141 lines (124 with data), 4.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#' Automatic Feature Id Conversion.
#'
#' Attempt to automatically convert non-ENSEMBL feature identifiers to
#' ENSEMBL identifiers. Features are included as rownames of the input
#' data.frame (or matrix). It is assumed that feature identifiers
#' (i.e., rownames of x) come from human or mouse genomes, and
#' are either OFFICIAL SYMBOLS or ENTREZIDS. If less than 20% of match
#' is identified, an error will be thrown.
#'
#' @param x data.frame or matrix including raw counts (typically, UMIs),
#' where rows are features (genes) and rownames are feature identifiers
#' (SYMBOLs or ENTREZIDs).
#' @param verbose logical, shall messages be printed to inform about
#' conversion advances.
#'
#' @return a data.frame where rownames are ENSEMBL IDs. The new feature IDs are
#' automatically imputed based on existing feature IDs (SYMBOLs or ENTREZIDs).
#'
#' @export
customConvertFeats <- function(x, verbose = TRUE) {
if (!(is.matrix(x) || is.data.frame(x))) {
stop("wrong input")
}
# uncomment
myDict <- DIscBIO::HumanMouseGeneIds
myDict <- myDict[!is.na(myDict[, "ENSEMBL"]), ]
#
xx <- rownames(x)
keep.ei <- xx %in% myDict[, "ENTREZID"]
keep.sy <- xx %in% myDict[, "SYMBOL"]
keep.ez <- xx %in% myDict[, "ENSEMBL"]
rat.ez <- sum(keep.ez) / length(xx)
rat.ei <- sum(keep.ei) / length(xx)
rat.sy <- sum(keep.sy) / length(xx)
# Automatic selection and replacement
if (rat.ez >= rat.sy && rat.ez >= rat.ei && rat.ez >= 0.2) {
# nothing to do, return
return(x)
} else if (rat.ei >= rat.sy && rat.ei >= 0.2) {
x <- x[!duplicated(xx), ]
x <- x[rownames(x) %in% myDict[, "ENTREZID"], ]
tmpDict <- myDict$ENSEMBL
names(tmpDict) <- myDict$ENTREZID
rownames(x) <- as.character(tmpDict[rownames(x)])
} else if (rat.ei < rat.sy && rat.sy >= 0.2) {
x <- x[!duplicated(xx), ]
x <- x[rownames(x) %in% myDict[, "SYMBOL"], ]
tmpDict <- myDict$ENSEMBL
names(tmpDict) <- myDict$SYMBOL
rownames(x) <- as.character(tmpDict[rownames(x)])
} else {
message("Feat ID Conversion could not be performed")
}
return(x)
}
#' Convert Single Cell Data Objects to DISCBIO.
#'
#' Initialize a DISCBIO-class object starting from a
#' SingleCellExperiment object or a Seurat object
#'
#' @param x an object of class Seurat or SingleCellExperiment
#' @param ... additional parameters to pass to the function
#'
#' @details Additional parameters to pass to `list` include, if x is
#' a Seurat object, "assay", which is a string indicating the assay slot
#' used to obtain data from (defaults to 'RNA')
#'
#' @return a DISCBIO-class object
#'
#' @export
#'
as.DISCBIO <- function(x, ...) {
if ("Seurat" %in% class(x)) {
# Get Arguments and parse out what we want
all.args <- list(...)
# Fetch arguments we care about
if ("assay" %in% all.args) {
assay <- all.args[["assay"]]
} else {
assay <- "RNA"
}
# Get feats and sample names
all.feats <- base::as.character(
rownames(x@assays[[assay]]@meta.features)
)
all.cells <- base::as.character(rownames(x@meta.data))
# Get data
all.counts <- base::data.frame(
base::as.matrix(x@assays[[assay]]@counts)
)
# re-write row and colnames
rownames(all.counts) <- all.feats
colnames(all.counts) <- all.cells
# prep output and return
if (sum(grepl("^ENS", all.feats)) / length(all.feats) < 0.5) {
all.counts <- customConvertFeats(all.counts)
}
y <- DISCBIO(all.counts)
} else if ("SingleCellExperiment" %in% class(x)) {
y <- DISCBIO(x)
} else {
stop("Conversion not supported")
}
return(y)
}
#' Convert a DISCBIO object to a SingleCellExperiment.
#'
#' Extract the SingleCellExperiment input data from the corresponding input slot
#' in a DISCBIO-class object
#'
#' @param x an object of class DISCBIO
#'
#' @return a SingleCellExperiment-class object
#'
#' @export
#' @examples
#' g1_disc <- DISCBIO(valuesG1msTest)
#' class(g1_disc)
#' g1_sce <- DISCBIO2SingleCellExperiment(g1_disc)
#' class(g1_sce)
#'
DISCBIO2SingleCellExperiment <- function(x) {
return(x@SingleCellExperiment)
}