|
a |
|
b/R/customConverters.R |
|
|
1 |
#' Automatic Feature Id Conversion. |
|
|
2 |
#' |
|
|
3 |
#' Attempt to automatically convert non-ENSEMBL feature identifiers to |
|
|
4 |
#' ENSEMBL identifiers. Features are included as rownames of the input |
|
|
5 |
#' data.frame (or matrix). It is assumed that feature identifiers |
|
|
6 |
#' (i.e., rownames of x) come from human or mouse genomes, and |
|
|
7 |
#' are either OFFICIAL SYMBOLS or ENTREZIDS. If less than 20% of match |
|
|
8 |
#' is identified, an error will be thrown. |
|
|
9 |
#' |
|
|
10 |
#' @param x data.frame or matrix including raw counts (typically, UMIs), |
|
|
11 |
#' where rows are features (genes) and rownames are feature identifiers |
|
|
12 |
#' (SYMBOLs or ENTREZIDs). |
|
|
13 |
#' @param verbose logical, shall messages be printed to inform about |
|
|
14 |
#' conversion advances. |
|
|
15 |
#' |
|
|
16 |
#' @return a data.frame where rownames are ENSEMBL IDs. The new feature IDs are |
|
|
17 |
#' automatically imputed based on existing feature IDs (SYMBOLs or ENTREZIDs). |
|
|
18 |
#' |
|
|
19 |
#' @export |
|
|
20 |
customConvertFeats <- function(x, verbose = TRUE) { |
|
|
21 |
if (!(is.matrix(x) || is.data.frame(x))) { |
|
|
22 |
stop("wrong input") |
|
|
23 |
} |
|
|
24 |
|
|
|
25 |
# uncomment |
|
|
26 |
myDict <- DIscBIO::HumanMouseGeneIds |
|
|
27 |
myDict <- myDict[!is.na(myDict[, "ENSEMBL"]), ] |
|
|
28 |
|
|
|
29 |
# |
|
|
30 |
xx <- rownames(x) |
|
|
31 |
keep.ei <- xx %in% myDict[, "ENTREZID"] |
|
|
32 |
keep.sy <- xx %in% myDict[, "SYMBOL"] |
|
|
33 |
keep.ez <- xx %in% myDict[, "ENSEMBL"] |
|
|
34 |
|
|
|
35 |
rat.ez <- sum(keep.ez) / length(xx) |
|
|
36 |
rat.ei <- sum(keep.ei) / length(xx) |
|
|
37 |
rat.sy <- sum(keep.sy) / length(xx) |
|
|
38 |
|
|
|
39 |
# Automatic selection and replacement |
|
|
40 |
if (rat.ez >= rat.sy && rat.ez >= rat.ei && rat.ez >= 0.2) { |
|
|
41 |
# nothing to do, return |
|
|
42 |
return(x) |
|
|
43 |
} else if (rat.ei >= rat.sy && rat.ei >= 0.2) { |
|
|
44 |
x <- x[!duplicated(xx), ] |
|
|
45 |
x <- x[rownames(x) %in% myDict[, "ENTREZID"], ] |
|
|
46 |
|
|
|
47 |
tmpDict <- myDict$ENSEMBL |
|
|
48 |
names(tmpDict) <- myDict$ENTREZID |
|
|
49 |
rownames(x) <- as.character(tmpDict[rownames(x)]) |
|
|
50 |
} else if (rat.ei < rat.sy && rat.sy >= 0.2) { |
|
|
51 |
x <- x[!duplicated(xx), ] |
|
|
52 |
x <- x[rownames(x) %in% myDict[, "SYMBOL"], ] |
|
|
53 |
|
|
|
54 |
tmpDict <- myDict$ENSEMBL |
|
|
55 |
names(tmpDict) <- myDict$SYMBOL |
|
|
56 |
rownames(x) <- as.character(tmpDict[rownames(x)]) |
|
|
57 |
} else { |
|
|
58 |
message("Feat ID Conversion could not be performed") |
|
|
59 |
} |
|
|
60 |
return(x) |
|
|
61 |
} |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
#' Convert Single Cell Data Objects to DISCBIO. |
|
|
66 |
#' |
|
|
67 |
#' Initialize a DISCBIO-class object starting from a |
|
|
68 |
#' SingleCellExperiment object or a Seurat object |
|
|
69 |
#' |
|
|
70 |
#' @param x an object of class Seurat or SingleCellExperiment |
|
|
71 |
#' @param ... additional parameters to pass to the function |
|
|
72 |
#' |
|
|
73 |
#' @details Additional parameters to pass to `list` include, if x is |
|
|
74 |
#' a Seurat object, "assay", which is a string indicating the assay slot |
|
|
75 |
#' used to obtain data from (defaults to 'RNA') |
|
|
76 |
#' |
|
|
77 |
#' @return a DISCBIO-class object |
|
|
78 |
#' |
|
|
79 |
#' @export |
|
|
80 |
#' |
|
|
81 |
as.DISCBIO <- function(x, ...) { |
|
|
82 |
if ("Seurat" %in% class(x)) { |
|
|
83 |
# Get Arguments and parse out what we want |
|
|
84 |
all.args <- list(...) |
|
|
85 |
|
|
|
86 |
# Fetch arguments we care about |
|
|
87 |
if ("assay" %in% all.args) { |
|
|
88 |
assay <- all.args[["assay"]] |
|
|
89 |
} else { |
|
|
90 |
assay <- "RNA" |
|
|
91 |
} |
|
|
92 |
|
|
|
93 |
# Get feats and sample names |
|
|
94 |
all.feats <- base::as.character( |
|
|
95 |
rownames(x@assays[[assay]]@meta.features) |
|
|
96 |
) |
|
|
97 |
all.cells <- base::as.character(rownames(x@meta.data)) |
|
|
98 |
|
|
|
99 |
# Get data |
|
|
100 |
all.counts <- base::data.frame( |
|
|
101 |
base::as.matrix(x@assays[[assay]]@counts) |
|
|
102 |
) |
|
|
103 |
|
|
|
104 |
# re-write row and colnames |
|
|
105 |
rownames(all.counts) <- all.feats |
|
|
106 |
colnames(all.counts) <- all.cells |
|
|
107 |
|
|
|
108 |
# prep output and return |
|
|
109 |
if (sum(grepl("^ENS", all.feats)) / length(all.feats) < 0.5) { |
|
|
110 |
all.counts <- customConvertFeats(all.counts) |
|
|
111 |
} |
|
|
112 |
y <- DISCBIO(all.counts) |
|
|
113 |
} else if ("SingleCellExperiment" %in% class(x)) { |
|
|
114 |
y <- DISCBIO(x) |
|
|
115 |
} else { |
|
|
116 |
stop("Conversion not supported") |
|
|
117 |
} |
|
|
118 |
return(y) |
|
|
119 |
} |
|
|
120 |
|
|
|
121 |
|
|
|
122 |
#' Convert a DISCBIO object to a SingleCellExperiment. |
|
|
123 |
#' |
|
|
124 |
#' Extract the SingleCellExperiment input data from the corresponding input slot |
|
|
125 |
#' in a DISCBIO-class object |
|
|
126 |
#' |
|
|
127 |
#' @param x an object of class DISCBIO |
|
|
128 |
#' |
|
|
129 |
#' @return a SingleCellExperiment-class object |
|
|
130 |
#' |
|
|
131 |
#' @export |
|
|
132 |
#' @examples |
|
|
133 |
#' g1_disc <- DISCBIO(valuesG1msTest) |
|
|
134 |
#' class(g1_disc) |
|
|
135 |
#' g1_sce <- DISCBIO2SingleCellExperiment(g1_disc) |
|
|
136 |
#' class(g1_sce) |
|
|
137 |
#' |
|
|
138 |
DISCBIO2SingleCellExperiment <- function(x) { |
|
|
139 |
return(x@SingleCellExperiment) |
|
|
140 |
} |