Diff of /R/customConverters.R [000000] .. [28e211]

Switch to side-by-side view

--- a
+++ b/R/customConverters.R
@@ -0,0 +1,140 @@
+#' Automatic Feature Id Conversion.
+#'
+#' Attempt to automatically convert non-ENSEMBL feature identifiers to
+#' ENSEMBL identifiers. Features are included as rownames of the input
+#' data.frame (or matrix). It is assumed that feature identifiers
+#' (i.e., rownames of x) come from human or mouse genomes, and
+#' are either OFFICIAL SYMBOLS or ENTREZIDS. If less than 20% of match
+#' is identified, an error will be thrown.
+#'
+#' @param x data.frame or matrix including raw counts (typically, UMIs),
+#'   where rows are features (genes) and rownames are feature identifiers
+#'   (SYMBOLs or ENTREZIDs).
+#' @param verbose logical, shall messages be printed to inform about
+#'   conversion advances.
+#'
+#' @return a data.frame where rownames are ENSEMBL IDs. The new feature IDs are
+#' automatically imputed based on existing feature IDs (SYMBOLs or ENTREZIDs).
+#'
+#' @export
+customConvertFeats <- function(x, verbose = TRUE) {
+  if (!(is.matrix(x) || is.data.frame(x))) {
+    stop("wrong input")
+  }
+
+  # uncomment
+  myDict <- DIscBIO::HumanMouseGeneIds
+  myDict <- myDict[!is.na(myDict[, "ENSEMBL"]), ]
+
+  #
+  xx <- rownames(x)
+  keep.ei <- xx %in% myDict[, "ENTREZID"]
+  keep.sy <- xx %in% myDict[, "SYMBOL"]
+  keep.ez <- xx %in% myDict[, "ENSEMBL"]
+
+  rat.ez <- sum(keep.ez) / length(xx)
+  rat.ei <- sum(keep.ei) / length(xx)
+  rat.sy <- sum(keep.sy) / length(xx)
+
+  # Automatic selection and replacement
+  if (rat.ez >= rat.sy && rat.ez >= rat.ei && rat.ez >= 0.2) {
+    # nothing to do, return
+    return(x)
+  } else if (rat.ei >= rat.sy && rat.ei >= 0.2) {
+    x <- x[!duplicated(xx), ]
+    x <- x[rownames(x) %in% myDict[, "ENTREZID"], ]
+
+    tmpDict <- myDict$ENSEMBL
+    names(tmpDict) <- myDict$ENTREZID
+    rownames(x) <- as.character(tmpDict[rownames(x)])
+  } else if (rat.ei < rat.sy && rat.sy >= 0.2) {
+    x <- x[!duplicated(xx), ]
+    x <- x[rownames(x) %in% myDict[, "SYMBOL"], ]
+
+    tmpDict <- myDict$ENSEMBL
+    names(tmpDict) <- myDict$SYMBOL
+    rownames(x) <- as.character(tmpDict[rownames(x)])
+  } else {
+    message("Feat ID Conversion could not be performed")
+  }
+  return(x)
+}
+
+
+
+#' Convert Single Cell Data Objects to DISCBIO.
+#'
+#' Initialize a DISCBIO-class object starting from a
+#' SingleCellExperiment object or a Seurat object
+#'
+#' @param x an object of class Seurat or SingleCellExperiment
+#' @param ... additional parameters to pass to the function
+#'
+#' @details Additional parameters to pass to `list` include, if x is
+#'   a Seurat object, "assay", which is a string indicating the assay slot
+#'   used to obtain data from (defaults to 'RNA')
+#'
+#' @return a DISCBIO-class object
+#'
+#' @export
+#'
+as.DISCBIO <- function(x, ...) {
+  if ("Seurat" %in% class(x)) {
+    # Get Arguments and parse out what we want
+    all.args <- list(...)
+
+    # Fetch arguments we care about
+    if ("assay" %in% all.args) {
+      assay <- all.args[["assay"]]
+    } else {
+      assay <- "RNA"
+    }
+
+    # Get feats and sample names
+    all.feats <- base::as.character(
+      rownames(x@assays[[assay]]@meta.features)
+    )
+    all.cells <- base::as.character(rownames(x@meta.data))
+
+    # Get data
+    all.counts <- base::data.frame(
+      base::as.matrix(x@assays[[assay]]@counts)
+    )
+
+    # re-write row and colnames
+    rownames(all.counts) <- all.feats
+    colnames(all.counts) <- all.cells
+
+    # prep output and return
+    if (sum(grepl("^ENS", all.feats)) / length(all.feats) < 0.5) {
+      all.counts <- customConvertFeats(all.counts)
+    }
+    y <- DISCBIO(all.counts)
+  } else if ("SingleCellExperiment" %in% class(x)) {
+    y <- DISCBIO(x)
+  } else {
+    stop("Conversion not supported")
+  }
+  return(y)
+}
+
+
+#' Convert a DISCBIO object to a SingleCellExperiment.
+#'
+#' Extract the SingleCellExperiment input data from the corresponding input slot
+#' in a DISCBIO-class object
+#'
+#' @param x an object of class DISCBIO
+#'
+#' @return a SingleCellExperiment-class object
+#'
+#' @export
+#' @examples
+#' g1_disc <- DISCBIO(valuesG1msTest)
+#' class(g1_disc)
+#' g1_sce <- DISCBIO2SingleCellExperiment(g1_disc)
+#' class(g1_sce)
+#'
+DISCBIO2SingleCellExperiment <- function(x) {
+  return(x@SingleCellExperiment)
+}