|
a |
|
b/R/getCluster.R |
|
|
1 |
#' Get variable cluster from (s)PCA, (s)PLS or block.(s)PLS |
|
|
2 |
#' |
|
|
3 |
#' This function returns the cluster associated to each feature from a mixOmics object. |
|
|
4 |
#' |
|
|
5 |
#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls} |
|
|
6 |
#' @param user.cluster a vector to filter the result and return only the features of the specified clusters |
|
|
7 |
#' @param user.block a vector to filter the result and return the features of the specified blocks. |
|
|
8 |
#' |
|
|
9 |
#' @return |
|
|
10 |
#' A data.frame containing the name of feature, its assigned cluster and other information such as selected component, contribution, sign, ... |
|
|
11 |
#' |
|
|
12 |
#' @details |
|
|
13 |
#' For each feature, the cluster is assigned according to the maximum contribution on a component and the sign of that contribution. |
|
|
14 |
#' |
|
|
15 |
#' @seealso |
|
|
16 |
#' \code{\link[mixOmics]{selectVar}} |
|
|
17 |
#' |
|
|
18 |
#' @examples |
|
|
19 |
#' demo <- suppressWarnings(get_demo_cluster()) |
|
|
20 |
#' pca.cluster <- getCluster(demo$pca) |
|
|
21 |
#' spca.cluster <- getCluster(demo$spca) |
|
|
22 |
#' pls.cluster <- getCluster(demo$pls) |
|
|
23 |
#' spls.cluster <- getCluster(demo$spls) |
|
|
24 |
#' block.pls.cluster <- getCluster(demo$block.pls) |
|
|
25 |
#' block.spls.cluster <- getCluster(demo$block.spls) |
|
|
26 |
#' |
|
|
27 |
#' @export |
|
|
28 |
getCluster <- function(X, user.block = NULL, user.cluster = NULL) UseMethod("getCluster") |
|
|
29 |
|
|
|
30 |
#' get_demo_cluster |
|
|
31 |
#' |
|
|
32 |
#' Generates random data to be used in examples. |
|
|
33 |
#' |
|
|
34 |
#' @return a list containg: |
|
|
35 |
#' \item{X}{data.frame} |
|
|
36 |
#' \item{Y}{data.frame} |
|
|
37 |
#' \item{Z}{data.frame} |
|
|
38 |
#' \item{pca}{a mixOmics pca result} |
|
|
39 |
#' \item{spca}{a mixOmics spca result} |
|
|
40 |
#' \item{pls}{a mixOmics pls result} |
|
|
41 |
#' \item{spls}{a mixOmics spls result} |
|
|
42 |
#' \item{block.pls}{a mixOmics block.pls result} |
|
|
43 |
#' \item{block.spls}{a mixOmics block.spls result} |
|
|
44 |
#' |
|
|
45 |
#' @examples |
|
|
46 |
#' # Random data could lead to "The SGCCA algorithm did not converge" warning which is not important for a demo |
|
|
47 |
#' demo <- suppressWarnings(get_demo_cluster()) |
|
|
48 |
#' @export |
|
|
49 |
get_demo_cluster<- function(){ |
|
|
50 |
X <- matrix(sample(1:1000), nrow = 10, |
|
|
51 |
dimnames = list(1:10, paste0("X_",1:100))) |
|
|
52 |
|
|
|
53 |
Y <- matrix(sample(1:100), nrow = 10, |
|
|
54 |
dimnames = list(1:10, paste0("Y_",1:10))) |
|
|
55 |
|
|
|
56 |
Z <- matrix(sample(1:500), nrow = 10, |
|
|
57 |
dimnames = list(1:10, Y = paste0("Z_",1:50))) |
|
|
58 |
|
|
|
59 |
list.res = list() |
|
|
60 |
list.res$X <- X |
|
|
61 |
list.res$Y <- Y |
|
|
62 |
list.res$Z <- Z |
|
|
63 |
list.res$pca <- mixOmics::pca(X = X, ncomp = 5) |
|
|
64 |
list.res$spca <- mixOmics::spca(X = X, ncomp = 5, keepX = c(5, 15, 4,5,6)) |
|
|
65 |
|
|
|
66 |
list.res$pls <- mixOmics::pls(X = X, Y = Y, ncomp = 5, mode = "canonical") |
|
|
67 |
list.res$spls <- mixOmics::spls(X = X, Y = Y, ncomp = 5, mode = "canonical", |
|
|
68 |
keepX = c(5,6,4,5,6), keepY = c(5,1,4,5,6)) |
|
|
69 |
|
|
|
70 |
list.res$block.pls <- mixOmics::block.pls(X = list("X" = X, "Y" = Y, "Z" = Z), indY = 1, |
|
|
71 |
ncomp = 5, mode = "canonical") |
|
|
72 |
|
|
|
73 |
list.res$block.spls <- mixOmics::block.spls(X = list("X" = X, "Y" = Y, "Z" = Z), indY = 1, ncomp = 3, |
|
|
74 |
mode = "canonical", keepX = list("X" = c(5,6,4), "Y" = c(5,5,5), "Z" = c(4,2,4))) |
|
|
75 |
list.res$UpDown <- getUpDownCluster(X = X) |
|
|
76 |
return(invisible(list.res)) |
|
|
77 |
} |
|
|
78 |
|
|
|
79 |
#' @importFrom dplyr mutate |
|
|
80 |
#' @importFrom tibble rownames_to_column |
|
|
81 |
#' @importFrom stringr str_remove |
|
|
82 |
#' @importFrom magrittr %>% |
|
|
83 |
#' @export |
|
|
84 |
getCluster.pca <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
85 |
|
|
|
86 |
#print("getCluster.pca") |
|
|
87 |
# colnames = PC1, PC2... |
|
|
88 |
loadings.max <- getMaxContrib(X$loadings$X) |
|
|
89 |
|
|
|
90 |
loadings.max <- loadings.max %>% |
|
|
91 |
rownames_to_column("molecule") %>% |
|
|
92 |
mutate(cluster = stringr::str_remove(comp, "^PC") %>% |
|
|
93 |
as.numeric()) %>% |
|
|
94 |
mutate(block = "X") %>% |
|
|
95 |
.mutate_cluster() |
|
|
96 |
Valid.getCluster(loadings.max) |
|
|
97 |
loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster) |
|
|
98 |
class(loadings.max) <- c("cluster.df", "data.frame") |
|
|
99 |
return(loadings.max) |
|
|
100 |
} |
|
|
101 |
|
|
|
102 |
#' @export |
|
|
103 |
#' @importFrom dplyr mutate |
|
|
104 |
#' @importFrom tibble rownames_to_column |
|
|
105 |
#' @importFrom stringr str_remove |
|
|
106 |
#' @importFrom magrittr %>% |
|
|
107 |
getCluster.spca <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
108 |
# print(class(X)) |
|
|
109 |
selected.features.loadings <- X$loadings$X[rowSums(X$loadings$X) != 0,,drop=FALSE] |
|
|
110 |
loadings.max <- getMaxContrib(selected.features.loadings) |
|
|
111 |
|
|
|
112 |
loadings.max <- loadings.max %>% |
|
|
113 |
rownames_to_column("molecule") %>% |
|
|
114 |
mutate(cluster = stringr::str_remove(comp, "^PC") %>% |
|
|
115 |
as.numeric()) %>% |
|
|
116 |
mutate(block = "X") %>% |
|
|
117 |
.mutate_cluster() |
|
|
118 |
Valid.getCluster(loadings.max) |
|
|
119 |
|
|
|
120 |
loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster) |
|
|
121 |
class(loadings.max) <- c("cluster.df", "data.frame") |
|
|
122 |
return(loadings.max) |
|
|
123 |
} |
|
|
124 |
|
|
|
125 |
#' @export |
|
|
126 |
#' @importFrom dplyr mutate |
|
|
127 |
#' @importFrom tibble rownames_to_column |
|
|
128 |
#' @importFrom stringr str_remove |
|
|
129 |
#' @importFrom magrittr %>% |
|
|
130 |
getCluster.mixo_pls <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
131 |
# print(class(X)) |
|
|
132 |
# block X |
|
|
133 |
loadings.max.X <- getMaxContrib(X$loadings$X) |
|
|
134 |
|
|
|
135 |
loadings.max.X <- loadings.max.X %>% |
|
|
136 |
rownames_to_column("molecule") %>% |
|
|
137 |
mutate(cluster = stringr::str_remove(comp, "^comp") %>% |
|
|
138 |
as.numeric()) %>% |
|
|
139 |
mutate(block = "X") |
|
|
140 |
|
|
|
141 |
|
|
|
142 |
# block Y |
|
|
143 |
loadings.max.Y <- getMaxContrib(X$loadings$Y) |
|
|
144 |
|
|
|
145 |
loadings.max.Y <- loadings.max.Y %>% |
|
|
146 |
rownames_to_column("molecule") %>% |
|
|
147 |
mutate(cluster = stringr::str_remove(comp, "^comp") %>% |
|
|
148 |
as.numeric()) %>% |
|
|
149 |
mutate(block = "Y") |
|
|
150 |
|
|
|
151 |
loadings.max <- rbind(loadings.max.X, loadings.max.Y) %>% |
|
|
152 |
.mutate_cluster() |
|
|
153 |
|
|
|
154 |
Valid.getCluster(loadings.max) |
|
|
155 |
|
|
|
156 |
loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster) |
|
|
157 |
class(loadings.max) <- c("cluster.df", "data.frame") |
|
|
158 |
return(loadings.max) |
|
|
159 |
} |
|
|
160 |
|
|
|
161 |
#' @export |
|
|
162 |
#' @importFrom dplyr mutate |
|
|
163 |
#' @importFrom tibble rownames_to_column |
|
|
164 |
#' @importFrom stringr str_remove |
|
|
165 |
#' @importFrom magrittr %>% |
|
|
166 |
getCluster.mixo_spls <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
167 |
# note : can not concatenate X and Y |
|
|
168 |
# because they can have the same features names contrary to block.(s)pls |
|
|
169 |
|
|
|
170 |
# print(class(X)) |
|
|
171 |
# block X |
|
|
172 |
X.selected.features.loadings <- X$loadings$X[rowSums(X$loadings$X) != 0,,drop=FALSE] |
|
|
173 |
loadings.max.X <- getMaxContrib(X.selected.features.loadings) |
|
|
174 |
|
|
|
175 |
loadings.max.X <- loadings.max.X %>% |
|
|
176 |
rownames_to_column("molecule") %>% |
|
|
177 |
mutate(cluster = stringr::str_remove(comp, "^comp") %>% |
|
|
178 |
as.numeric()) %>% |
|
|
179 |
mutate(block = "X") |
|
|
180 |
|
|
|
181 |
# block Y |
|
|
182 |
Y.selected.features.loadings <- X$loadings$Y[rowSums(X$loadings$Y) != 0,,drop = FALSE] |
|
|
183 |
loadings.max.Y <- getMaxContrib(Y.selected.features.loadings) |
|
|
184 |
|
|
|
185 |
loadings.max.Y <- loadings.max.Y %>% |
|
|
186 |
rownames_to_column("molecule") %>% |
|
|
187 |
mutate(cluster = stringr::str_remove(comp, "^comp") %>% |
|
|
188 |
as.numeric()) %>% |
|
|
189 |
mutate(block = "Y") |
|
|
190 |
|
|
|
191 |
loadings.max <- rbind(loadings.max.X, loadings.max.Y) %>% |
|
|
192 |
.mutate_cluster() |
|
|
193 |
|
|
|
194 |
Valid.getCluster(loadings.max) |
|
|
195 |
|
|
|
196 |
loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster) |
|
|
197 |
class(loadings.max) <- c("cluster.df", "data.frame") |
|
|
198 |
return(loadings.max) |
|
|
199 |
} |
|
|
200 |
|
|
|
201 |
#' @export |
|
|
202 |
#' @importFrom purrr imap set_names |
|
|
203 |
#' @importFrom dplyr mutate left_join |
|
|
204 |
#' @importFrom tibble rownames_to_column |
|
|
205 |
#' @importFrom stringr str_remove |
|
|
206 |
#' @importFrom magrittr %>% |
|
|
207 |
getCluster.block.pls <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
208 |
# print(class(X)) |
|
|
209 |
# get block info |
|
|
210 |
block.info <- purrr::imap(X$loadings, function(x,y) rownames(x) %>% |
|
|
211 |
as.data.frame %>% |
|
|
212 |
set_names("molecule") %>% |
|
|
213 |
mutate("block" = y)) |
|
|
214 |
block.info <- do.call("rbind", block.info) %>% as.data.frame() %>% |
|
|
215 |
mutate(block = factor(block, levels = names(X$loadings))) %>% |
|
|
216 |
mutate(molecule = as.character(molecule)) |
|
|
217 |
|
|
|
218 |
loadings <- do.call("rbind", X$loadings) |
|
|
219 |
loadings.max <- getMaxContrib(loadings) |
|
|
220 |
|
|
|
221 |
loadings.max <- loadings.max %>% |
|
|
222 |
rownames_to_column("molecule") %>% |
|
|
223 |
mutate(cluster = stringr::str_remove(comp, "^comp") %>% |
|
|
224 |
as.numeric()) %>% |
|
|
225 |
left_join(block.info, by = c("molecule"= "molecule")) %>% |
|
|
226 |
.mutate_cluster() |
|
|
227 |
|
|
|
228 |
Valid.getCluster(loadings.max) |
|
|
229 |
|
|
|
230 |
loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster) |
|
|
231 |
class(loadings.max) <- c("cluster.df", "data.frame") |
|
|
232 |
return(loadings.max) |
|
|
233 |
} |
|
|
234 |
|
|
|
235 |
#' @export |
|
|
236 |
#' @importFrom purrr imap set_names |
|
|
237 |
#' @importFrom dplyr mutate left_join |
|
|
238 |
#' @importFrom tibble rownames_to_column |
|
|
239 |
#' @importFrom stringr str_remove |
|
|
240 |
#' @importFrom magrittr %>% |
|
|
241 |
getCluster.block.spls <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
242 |
|
|
|
243 |
# print(class(X)) |
|
|
244 |
# get block info |
|
|
245 |
block.info <- purrr::imap(X$loadings, function(x,y) rownames(x) %>% |
|
|
246 |
as.data.frame %>% |
|
|
247 |
set_names("molecule") %>% |
|
|
248 |
mutate("block" = y)) |
|
|
249 |
block.info <- do.call("rbind", block.info) %>% |
|
|
250 |
as.data.frame() %>% |
|
|
251 |
mutate(block = factor(block, levels = names(X$loadings))) %>% |
|
|
252 |
mutate(molecule = as.character(molecule)) |
|
|
253 |
|
|
|
254 |
# sparse |
|
|
255 |
loadings <- do.call("rbind", X$loadings) |
|
|
256 |
X.selected.features.loadings <- loadings[rowSums(loadings) != 0,, drop=FALSE] |
|
|
257 |
loadings.max <- getMaxContrib(X.selected.features.loadings) |
|
|
258 |
|
|
|
259 |
loadings.max <- loadings.max %>% |
|
|
260 |
rownames_to_column("molecule") %>% |
|
|
261 |
mutate(cluster = stringr::str_remove(comp, "^comp") %>% |
|
|
262 |
as.numeric()) %>% |
|
|
263 |
left_join(block.info, by = c("molecule"= "molecule")) %>% |
|
|
264 |
.mutate_cluster() |
|
|
265 |
|
|
|
266 |
Valid.getCluster(loadings.max) |
|
|
267 |
|
|
|
268 |
loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster) |
|
|
269 |
class(loadings.max) <- c("cluster.df", "data.frame") |
|
|
270 |
return(loadings.max) |
|
|
271 |
} |
|
|
272 |
|
|
|
273 |
#' Get Max Contrib from loading matrix |
|
|
274 |
#' |
|
|
275 |
#' @param X loading matrix from mixOmics |
|
|
276 |
#' @return a matrix |
|
|
277 |
#' |
|
|
278 |
#' @keywords internal |
|
|
279 |
#' @noRd |
|
|
280 |
#' @importFrom purrr set_names |
|
|
281 |
#' @importFrom magrittr %>% |
|
|
282 |
getMaxContrib <- function(X){ |
|
|
283 |
# loadings matrix, features in rows, comp in columns |
|
|
284 |
contrib.max <- apply(X = X, FUN = function(x) { x[which.max( abs(x) )][1]}, MARGIN = 1) %>% |
|
|
285 |
as.data.frame() %>% |
|
|
286 |
purrr::set_names("contrib.max") |
|
|
287 |
|
|
|
288 |
cluster.info <- apply(X = X, FUN = function(x) { colnames(X)[which.max( abs(x) )[1]]}, MARGIN = 1) %>% |
|
|
289 |
as.data.frame() %>% |
|
|
290 |
purrr::set_names("comp") |
|
|
291 |
|
|
|
292 |
stopifnot(rownames(contrib.max) == rownames(cluster.info)) |
|
|
293 |
return(cbind(cluster.info, contrib.max)) |
|
|
294 |
} |
|
|
295 |
|
|
|
296 |
# absmax <- function(x) { x[which.max( abs(x) )][1]} |
|
|
297 |
# absmax.index <- function(x) { which.max( abs(x) )[1]} |
|
|
298 |
|
|
|
299 |
|
|
|
300 |
#' @importFrom dplyr mutate case_when pull |
|
|
301 |
#' @importFrom magrittr %>% |
|
|
302 |
.mutate_cluster <- function(loadings.max){ |
|
|
303 |
X <- loadings.max %>% |
|
|
304 |
mutate(cluster = cluster * sign(contrib.max)) %>% |
|
|
305 |
mutate(contribution = case_when(sign(contrib.max) == 1 ~ "positive", |
|
|
306 |
sign(contrib.max) == -1 ~ "negative", |
|
|
307 |
sign(contrib.max) == 0 ~ "NULL")) |
|
|
308 |
|
|
|
309 |
cluster.order <- X %>% |
|
|
310 |
pull(cluster) %>% |
|
|
311 |
abs %>% |
|
|
312 |
unique %>% |
|
|
313 |
sort %>% |
|
|
314 |
rep(each=2) %>% `*`(c(1,-1)) %>% unique |
|
|
315 |
|
|
|
316 |
X <- X %>% |
|
|
317 |
mutate(cluster = factor(cluster, levels = cluster.order)) |
|
|
318 |
return(X) |
|
|
319 |
} |
|
|
320 |
|
|
|
321 |
Valid.getCluster <- function(X){ |
|
|
322 |
col_names <- c("molecule","comp","contrib.max","cluster","block","contribution") |
|
|
323 |
stopifnot(all(col_names %in% colnames(X))) |
|
|
324 |
# other check ? |
|
|
325 |
# all comp present? sometimes not true |
|
|
326 |
# idem for number of cluster |
|
|
327 |
# also a molecule can be found in different cluster |
|
|
328 |
} |
|
|
329 |
|
|
|
330 |
|
|
|
331 |
#' @importFrom dplyr filter |
|
|
332 |
#' @export |
|
|
333 |
filter.cluster.df <- function(.data, user.block = NULL, user.cluster = NULL){ |
|
|
334 |
# X <- getCluster(pca); pca.cluster |
|
|
335 |
|
|
|
336 |
X.filter <- .data |
|
|
337 |
if(!is.null(user.block)){ |
|
|
338 |
X.filter <- dplyr::filter(X.filter, block %in% user.block) |
|
|
339 |
} |
|
|
340 |
if(!is.null(user.cluster)){ |
|
|
341 |
X.filter <- dplyr::filter(X.filter, cluster %in% user.cluster) |
|
|
342 |
} |
|
|
343 |
return(X.filter) |
|
|
344 |
} |
|
|
345 |
|
|
|
346 |
|
|
|
347 |
# add getCluster for getCluster (cluser.df) to easily apply filter |
|
|
348 |
#' @export |
|
|
349 |
getCluster.cluster.df <- function(X, user.block = NULL, user.cluster = NULL){ |
|
|
350 |
results <- X |
|
|
351 |
results <- filter.cluster.df(.data = results, user.block = user.block, user.cluster = user.cluster) |
|
|
352 |
class(results) <- c("cluster.df", "data.frame") |
|
|
353 |
return(results) |
|
|
354 |
} |
|
|
355 |
|
|
|
356 |
|
|
|
357 |
|
|
|
358 |
|