Diff of /R/getCluster.R [000000] .. [d79ff0]

Switch to unified view

a b/R/getCluster.R
1
#' Get variable cluster from (s)PCA, (s)PLS or block.(s)PLS
2
#'
3
#' This function returns the cluster associated to each feature from a mixOmics object.
4
#'
5
#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls}
6
#' @param user.cluster a vector to filter the result and return only the features of the specified clusters
7
#' @param user.block a vector to filter the result and return the features of the specified blocks.
8
#'
9
#' @return
10
#' A data.frame containing the name of feature, its assigned cluster and other information such as selected component, contribution, sign, ...
11
#'
12
#' @details
13
#' For each feature, the cluster is assigned according to the maximum contribution on a component and the sign of that contribution.
14
#' 
15
#' @seealso 
16
#' \code{\link[mixOmics]{selectVar}}
17
#' 
18
#' @examples
19
#' demo <- suppressWarnings(get_demo_cluster())
20
#' pca.cluster <- getCluster(demo$pca)
21
#' spca.cluster <- getCluster(demo$spca)
22
#' pls.cluster <- getCluster(demo$pls)
23
#' spls.cluster <- getCluster(demo$spls)
24
#' block.pls.cluster <- getCluster(demo$block.pls)
25
#' block.spls.cluster <- getCluster(demo$block.spls)
26
#'
27
#' @export
28
getCluster <- function(X, user.block = NULL, user.cluster = NULL) UseMethod("getCluster")
29
30
#' get_demo_cluster
31
#' 
32
#' Generates random data to be used in examples.
33
#' 
34
#' @return  a list containg:
35
#' \item{X}{data.frame}
36
#' \item{Y}{data.frame}
37
#' \item{Z}{data.frame}
38
#' \item{pca}{a mixOmics pca result}
39
#' \item{spca}{a mixOmics spca result}
40
#' \item{pls}{a mixOmics pls result}
41
#' \item{spls}{a mixOmics spls result}
42
#' \item{block.pls}{a mixOmics block.pls result}
43
#' \item{block.spls}{a mixOmics block.spls result}
44
#' 
45
#' @examples
46
#' # Random data could lead to "The SGCCA algorithm did not converge" warning which is not important for a demo
47
#' demo <- suppressWarnings(get_demo_cluster())
48
#' @export
49
get_demo_cluster<- function(){
50
    X <- matrix(sample(1:1000), nrow = 10,
51
                dimnames = list(1:10, paste0("X_",1:100)))
52
53
    Y <- matrix(sample(1:100), nrow = 10, 
54
                dimnames = list(1:10, paste0("Y_",1:10)))
55
56
    Z <- matrix(sample(1:500), nrow = 10, 
57
                dimnames = list(1:10, Y = paste0("Z_",1:50)))
58
    
59
    list.res = list()
60
    list.res$X <- X
61
    list.res$Y <- Y
62
    list.res$Z <- Z
63
    list.res$pca <- mixOmics::pca(X = X, ncomp = 5)
64
    list.res$spca <- mixOmics::spca(X = X, ncomp = 5, keepX = c(5, 15, 4,5,6))
65
66
    list.res$pls <- mixOmics::pls(X = X, Y = Y, ncomp = 5, mode = "canonical")
67
    list.res$spls <- mixOmics::spls(X = X, Y = Y, ncomp = 5, mode = "canonical",
68
                                keepX = c(5,6,4,5,6), keepY = c(5,1,4,5,6))
69
70
    list.res$block.pls <- mixOmics::block.pls(X = list("X" = X, "Y" = Y, "Z" = Z), indY = 1,
71
                                             ncomp = 5, mode = "canonical")
72
73
    list.res$block.spls <- mixOmics::block.spls(X = list("X" = X, "Y" = Y, "Z" = Z), indY = 1, ncomp = 3,
74
                                             mode = "canonical", keepX = list("X" = c(5,6,4), "Y" = c(5,5,5), "Z" = c(4,2,4)))
75
    list.res$UpDown <- getUpDownCluster(X = X)
76
    return(invisible(list.res))
77
}
78
79
#' @importFrom dplyr mutate
80
#' @importFrom tibble rownames_to_column
81
#' @importFrom stringr str_remove
82
#' @importFrom magrittr %>%
83
#' @export
84
getCluster.pca <- function(X, user.block = NULL, user.cluster = NULL){
85
86
    #print("getCluster.pca")
87
    # colnames = PC1, PC2...
88
    loadings.max <- getMaxContrib(X$loadings$X)
89
90
    loadings.max <- loadings.max %>% 
91
        rownames_to_column("molecule") %>%
92
        mutate(cluster = stringr::str_remove(comp, "^PC") %>% 
93
                   as.numeric()) %>%
94
        mutate(block = "X") %>%
95
        .mutate_cluster()
96
    Valid.getCluster(loadings.max)
97
    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
98
    class(loadings.max) <- c("cluster.df", "data.frame")
99
    return(loadings.max)
100
}
101
102
#' @export
103
#' @importFrom dplyr mutate
104
#' @importFrom tibble rownames_to_column
105
#' @importFrom stringr str_remove
106
#' @importFrom magrittr %>%
107
getCluster.spca <- function(X,  user.block = NULL, user.cluster = NULL){
108
    # print(class(X))
109
    selected.features.loadings <- X$loadings$X[rowSums(X$loadings$X) != 0,,drop=FALSE]
110
    loadings.max <- getMaxContrib(selected.features.loadings)
111
112
    loadings.max <- loadings.max %>% 
113
        rownames_to_column("molecule") %>%
114
        mutate(cluster = stringr::str_remove(comp, "^PC") %>% 
115
                   as.numeric()) %>%
116
        mutate(block = "X") %>%
117
        .mutate_cluster()
118
    Valid.getCluster(loadings.max)
119
    
120
    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
121
    class(loadings.max) <- c("cluster.df", "data.frame")
122
    return(loadings.max)
123
}
124
125
#' @export
126
#' @importFrom dplyr mutate
127
#' @importFrom tibble rownames_to_column
128
#' @importFrom stringr str_remove
129
#' @importFrom magrittr %>%
130
getCluster.mixo_pls <- function(X,  user.block = NULL, user.cluster = NULL){
131
    # print(class(X))
132
    # block X
133
    loadings.max.X <- getMaxContrib(X$loadings$X)
134
135
    loadings.max.X <- loadings.max.X %>% 
136
        rownames_to_column("molecule") %>%
137
        mutate(cluster = stringr::str_remove(comp, "^comp") %>% 
138
                   as.numeric()) %>%
139
        mutate(block = "X")
140
141
142
    # block Y
143
    loadings.max.Y <- getMaxContrib(X$loadings$Y)
144
145
    loadings.max.Y <- loadings.max.Y %>% 
146
        rownames_to_column("molecule") %>%
147
        mutate(cluster = stringr::str_remove(comp, "^comp") %>% 
148
                   as.numeric()) %>%
149
        mutate(block = "Y")
150
151
    loadings.max <- rbind(loadings.max.X, loadings.max.Y) %>%
152
        .mutate_cluster()
153
154
    Valid.getCluster(loadings.max)
155
    
156
    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
157
    class(loadings.max) <- c("cluster.df", "data.frame")
158
    return(loadings.max)
159
}
160
161
#' @export
162
#' @importFrom dplyr mutate
163
#' @importFrom tibble rownames_to_column
164
#' @importFrom stringr str_remove
165
#' @importFrom magrittr %>%
166
getCluster.mixo_spls <- function(X,  user.block = NULL, user.cluster = NULL){
167
    # note : can not concatenate X and Y
168
    # because they can have the same features names contrary to block.(s)pls
169
170
    # print(class(X))
171
    # block X
172
    X.selected.features.loadings <- X$loadings$X[rowSums(X$loadings$X) != 0,,drop=FALSE]
173
    loadings.max.X <- getMaxContrib(X.selected.features.loadings)
174
175
    loadings.max.X <- loadings.max.X %>%
176
        rownames_to_column("molecule") %>%
177
        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
178
                   as.numeric()) %>%
179
        mutate(block = "X")
180
181
    # block Y
182
    Y.selected.features.loadings <- X$loadings$Y[rowSums(X$loadings$Y) != 0,,drop = FALSE]
183
    loadings.max.Y <- getMaxContrib(Y.selected.features.loadings)
184
185
    loadings.max.Y <- loadings.max.Y %>%
186
        rownames_to_column("molecule") %>%
187
        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
188
                   as.numeric()) %>%
189
        mutate(block = "Y")
190
191
    loadings.max <- rbind(loadings.max.X, loadings.max.Y)  %>%
192
        .mutate_cluster()
193
194
    Valid.getCluster(loadings.max)
195
    
196
    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
197
    class(loadings.max) <- c("cluster.df", "data.frame")
198
    return(loadings.max)
199
}
200
201
#' @export
202
#' @importFrom purrr imap set_names
203
#' @importFrom dplyr mutate left_join
204
#' @importFrom tibble rownames_to_column
205
#' @importFrom stringr str_remove
206
#' @importFrom magrittr %>%
207
getCluster.block.pls <- function(X,  user.block = NULL, user.cluster = NULL){
208
    # print(class(X))
209
    # get block info
210
    block.info <- purrr::imap(X$loadings, function(x,y) rownames(x) %>%
211
                           as.data.frame %>%
212
                           set_names("molecule") %>%
213
                           mutate("block" = y))
214
    block.info <- do.call("rbind", block.info) %>% as.data.frame() %>%
215
        mutate(block = factor(block, levels = names(X$loadings))) %>%
216
        mutate(molecule = as.character(molecule))
217
218
    loadings <- do.call("rbind", X$loadings)
219
    loadings.max <- getMaxContrib(loadings)
220
221
    loadings.max <- loadings.max %>%
222
        rownames_to_column("molecule") %>%
223
        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
224
                   as.numeric()) %>%
225
        left_join(block.info, by = c("molecule"= "molecule")) %>%
226
        .mutate_cluster()
227
228
    Valid.getCluster(loadings.max)
229
    
230
    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
231
    class(loadings.max) <- c("cluster.df", "data.frame")
232
    return(loadings.max)
233
}
234
235
#' @export
236
#' @importFrom purrr imap set_names
237
#' @importFrom dplyr mutate left_join
238
#' @importFrom tibble rownames_to_column
239
#' @importFrom stringr str_remove
240
#' @importFrom magrittr %>%
241
getCluster.block.spls <- function(X,  user.block = NULL, user.cluster = NULL){
242
243
    # print(class(X))
244
    # get block info
245
    block.info <- purrr::imap(X$loadings, function(x,y) rownames(x) %>%
246
                                  as.data.frame %>%
247
                                  set_names("molecule") %>%
248
                                  mutate("block" = y))
249
    block.info <- do.call("rbind", block.info) %>%
250
        as.data.frame() %>%
251
        mutate(block = factor(block, levels = names(X$loadings))) %>%
252
        mutate(molecule = as.character(molecule))
253
254
    # sparse
255
    loadings <- do.call("rbind", X$loadings)
256
    X.selected.features.loadings <- loadings[rowSums(loadings) != 0,, drop=FALSE]
257
    loadings.max <- getMaxContrib(X.selected.features.loadings)
258
259
    loadings.max <- loadings.max %>%
260
        rownames_to_column("molecule") %>%
261
        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
262
                   as.numeric()) %>%
263
        left_join(block.info, by = c("molecule"= "molecule")) %>%
264
        .mutate_cluster()
265
266
    Valid.getCluster(loadings.max)
267
    
268
    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
269
    class(loadings.max) <- c("cluster.df", "data.frame")
270
    return(loadings.max)
271
}
272
273
#' Get Max Contrib from loading matrix
274
#' 
275
#' @param X loading matrix from mixOmics
276
#' @return a matrix
277
#' 
278
#' @keywords internal
279
#' @noRd 
280
#' @importFrom purrr set_names
281
#' @importFrom magrittr %>%
282
getMaxContrib <- function(X){
283
    # loadings matrix, features in rows, comp in columns
284
    contrib.max <- apply(X = X, FUN = function(x) { x[which.max( abs(x) )][1]}, MARGIN = 1) %>%
285
        as.data.frame() %>%
286
        purrr::set_names("contrib.max")
287
288
    cluster.info <- apply(X = X, FUN = function(x) { colnames(X)[which.max( abs(x) )[1]]}, MARGIN = 1) %>%
289
        as.data.frame() %>%
290
        purrr::set_names("comp")
291
292
    stopifnot(rownames(contrib.max) == rownames(cluster.info))
293
    return(cbind(cluster.info, contrib.max))
294
}
295
296
# absmax <- function(x) { x[which.max( abs(x) )][1]}
297
# absmax.index <- function(x) { which.max( abs(x) )[1]}
298
299
300
#' @importFrom dplyr mutate case_when pull
301
#' @importFrom magrittr %>%
302
.mutate_cluster <- function(loadings.max){
303
    X <- loadings.max %>%
304
        mutate(cluster = cluster * sign(contrib.max)) %>%
305
    mutate(contribution = case_when(sign(contrib.max) == 1 ~ "positive",
306
                                    sign(contrib.max) == -1 ~ "negative",
307
                                    sign(contrib.max) == 0 ~ "NULL"))
308
309
    cluster.order <-  X %>%
310
        pull(cluster) %>%
311
        abs %>%
312
        unique %>% 
313
        sort %>%
314
        rep(each=2) %>% `*`(c(1,-1)) %>% unique
315
316
    X <- X %>%
317
        mutate(cluster = factor(cluster, levels = cluster.order))
318
    return(X)
319
}
320
321
Valid.getCluster <- function(X){
322
    col_names <- c("molecule","comp","contrib.max","cluster","block","contribution")
323
    stopifnot(all(col_names %in% colnames(X)))
324
    # other check ?
325
    # all comp present? sometimes not true
326
    # idem for number of cluster
327
    # also a molecule can be found in different cluster
328
}
329
330
331
#' @importFrom dplyr filter
332
#' @export
333
filter.cluster.df <- function(.data, user.block = NULL, user.cluster = NULL){
334
    # X <- getCluster(pca); pca.cluster
335
    
336
    X.filter <- .data
337
    if(!is.null(user.block)){
338
        X.filter <- dplyr::filter(X.filter, block %in% user.block)
339
    }
340
    if(!is.null(user.cluster)){
341
        X.filter <- dplyr::filter(X.filter, cluster %in% user.cluster)
342
    }
343
    return(X.filter)
344
}
345
346
347
# add getCluster for getCluster (cluser.df) to easily apply filter
348
#' @export
349
getCluster.cluster.df <- function(X, user.block = NULL, user.cluster = NULL){
350
    results <- X
351
    results <- filter.cluster.df(.data = results, user.block = user.block, user.cluster = user.cluster)
352
    class(results) <- c("cluster.df", "data.frame")
353
    return(results)
354
}
355
356
357
358