Diff of /R/RWR.R [000000] .. [73f552]

Switch to unified view

a b/R/RWR.R
1
#' Random Walk with Restart
2
#' 
3
#' This function performs a propagation analysis by random walk with restart
4
#'  in a multi-layered network from specific seeds.
5
#' 
6
#' @param X an igraph or list.igraph object.
7
#' @param seed a character vector. Only seeds present in X are considered.
8
#' @param r a numeric value between 0 and 1. 
9
#'  It sets the probability of restarting to a seed node after each step. 
10
#'  
11
#' @return 
12
#' Each element of X returns a list (class = 'rwr') 
13
#' containing the following elements:
14
#' \item{rwr}{a \code{data.frame}, the RWR results for each valid seed.}
15
#' \item{seed}{a character vector with the valid seeds}
16
#' \item{graph}{\code{igraph} object from X}
17
#' If X is a \code{list.igraph}, the returned object is a \code{list.rwr}.
18
#' 
19
#' @seealso 
20
# \code{\link[RandomWalkRestartMH]{Random.Walk.Restart.Multiplex}}, 
21
#' \code{\link[netOmics]{rwr_find_seeds_between_attributes}}, 
22
#' \code{\link[netOmics]{rwr_find_closest_type}}
23
#' 
24
#' @examples
25
#' graph1 <- igraph::graph_from_data_frame(
26
#'     list(from = c('A', 'B', 'A', 'D', 'C', 'A', 'C'), 
27
#'          to = c('B', 'C', 'D', 'E', 'D', 'F', 'G')), 
28
#'     directed = FALSE)
29
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
30
#'                                   name = 'type', 
31
#'                                   index = c('A','B','C'),
32
#'                                   value = '1')
33
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
34
#'                                   name = 'type', 
35
#'                                   index = c('D','E'),
36
#'                                   value = '2')
37
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
38
#'                                   name = 'type', 
39
#'                                   index = c('F', 'G'),
40
#'                                   value = '3')
41
#' 
42
#' rwr_res <- random_walk_restart(X = graph1, 
43
#'                                seed = c('A', 'B', 'C', 'D', 'E'))
44
#' 
45
# @importFrom RandomWalkRestartMH create.multiplex
46
# @importFrom RandomWalkRestartMH compute.adjacency.matrix
47
# @importFrom RandomWalkRestartMH normalize.multiplex.adjacency 
48
# @importFrom RandomWalkRestartMH Random.Walk.Restart.Multiplex
49
#' @importFrom dplyr mutate left_join
50
#' @importFrom purrr imap_dfr
51
#' @importFrom magrittr %>%
52
#' @export
53
random_walk_restart <- function(X, seed = NULL, r = 0.7) {
54
    
55
    # check X is graph or list of graph
56
    X <- check_graph(X)
57
    
58
    # check seed
59
    seed <- check_vector_char(X = seed, var.name = "'seed' ")
60
    
61
    # check r
62
    r <- check_single_numeric_value(r, min = 0, max = 1, var.name = "'r' ")
63
    
64
    # delta
65
    delta <- 0.5
66
    
67
    res <- list()
68
    if (is(X, "list.igraph")) {
69
        # apply RWR on each graph
70
        for (i in seq_along(X)) {
71
            Xi <- X[[i]]
72
            Xi <- remove_unconnected_nodes(Xi)
73
            index_name_i <- ifelse(
74
                !is.null(names(X)[i]),
75
                names(X)[i],
76
                i
77
            )
78
            
79
            ## possible implementation to benchmark: extract graph component
80
            ## and make couples with seeds and matching subgraph
81
            
82
            seed_xi <- intersect(seed, igraph::V(Xi)$name)  
83
            # prevent the error: 'Some of the seeds are not nodes of the network
84
            
85
            # rwr layer names: to change if we include some day multiplex
86
            # network
87
            layers_name <- ifelse(
88
                !is.null(names(X)[i]),
89
                names(X)[i],
90
                "graph"
91
            )
92
            
93
            # multiplex <- RandomWalkRestartMH::create.multiplex(L1 =
94
            # Xi,Layers_Name=layers_name)
95
            #multiplex <- RandomWalkRestartMH::create.multiplex(
96
            multiplex <- create.multiplex(
97
                LayersList = list(L1 = Xi),
98
                Layers_Name = layers_name
99
            )
100
            # adj_matrix <- RandomWalkRestartMH::compute.adjacency.matrix(
101
            adj_matrix <- compute.adjacency.matrix(
102
                x = multiplex, 
103
                delta = delta)
104
            adj_matrix_norm <- 
105
               normalize.multiplex.adjacency(
106
                # RandomWalkRestartMH::normalize.multiplex.adjacency(
107
                    x = adj_matrix)  # time/RAM consuming
108
            
109
            res_tmp <- list()
110
            for (seed_xi_i in seed_xi) {
111
                # rwr_res <- RandomWalkRestartMH::Random.Walk.Restart.Multiplex(
112
                rwr_res <- Random.Walk.Restart.Multiplex(
113
                    
114
                    x = adj_matrix_norm, 
115
                    MultiplexObject = multiplex, 
116
                    Seeds = seed_xi_i,
117
                    r = r
118
                )
119
                res_tmp[[seed_xi_i]] <- rwr_res
120
            }
121
            if (!is_empty(seed_xi)) {
122
                res[[index_name_i]] <- list()
123
                res[[index_name_i]][["rwr"]] <- purrr::imap_dfr(
124
                    res_tmp, ~{
125
                        .x$RWRM_Results %>%
126
                            dplyr::mutate(SeedName = .y)
127
                    }
128
                ) %>%
129
                    dplyr::left_join(
130
                        as.data.frame(vertex_attr(X[[i]])),
131
                        by = c(NodeNames = "name")
132
                    )
133
                res[[index_name_i]][["graph"]] <- X[[i]]
134
                res[[index_name_i]][["seed"]] <- seed_xi
135
                class(res[[index_name_i]]) <- "rwr"
136
            }
137
            class(res) <- c("list.rwr")
138
        }
139
    } else {
140
        # X is a single graph
141
        Xi <- remove_unconnected_nodes(X)
142
        
143
        ## possible implementation to benchmark: extract graph component and
144
        ## make couples with seeds and matching subgraph
145
        
146
        seed_xi <- intersect(seed, igraph::V(Xi)$name)  
147
        # prevent the error: Some of the seeds are not nodes of the network    
148
        
149
        # rwr layer names: to change if we include some day multiplex network
150
        # layers_name <- ifelse(!is.null(names(X)[i]), names(X)[i], 'graph')
151
        layers_name <- c("graph")
152
        
153
        # multiplex <- RandomWalkRestartMH::create.multiplex(L1 =
154
        # Xi,Layers_Name=layers_name)
155
        multiplex <- create.multiplex(
156
          #RandomWalkRestartMH::create.multiplex(
157
            LayersList = list(L1 = Xi),
158
            Layers_Name = layers_name
159
        )
160
        
161
        adj_matrix <- compute.adjacency.matrix(
162
          #RandomWalkRestartMH::compute.adjacency.matrix(
163
            x = multiplex, 
164
            delta = delta)
165
        adj_matrix_norm <- normalize.multiplex.adjacency(
166
          #RandomWalkRestartMH::normalize.multiplex.adjacency(
167
            x = adj_matrix)  # time/RAM consuming
168
        
169
        res_tmp <- list()
170
        for (seed_xi_i in seed_xi) {
171
            rwr_res <- Random.Walk.Restart.Multiplex(
172
            #rwr_res <- RandomWalkRestartMH::Random.Walk.Restart.Multiplex(
173
                x = adj_matrix_norm, 
174
                MultiplexObject = multiplex, 
175
                Seeds = seed_xi_i,
176
                r = r
177
            )
178
            res_tmp[[seed_xi_i]] <- rwr_res
179
        }
180
        # all seeds for a graph X has been computed -> merge result (more
181
        # efficient than having seperate results + associated graph)
182
        if (!is_empty(seed_xi)) {
183
            res[["rwr"]] <- purrr::imap_dfr(
184
                res_tmp, ~{
185
                    .x$RWRM_Results %>%
186
                        dplyr::mutate(SeedName = .y)
187
                }
188
            ) %>%
189
                dplyr::left_join(
190
                    as.data.frame(vertex_attr(X)),
191
                    by = c(NodeNames = "name")
192
                )
193
            res[["graph"]] <- X
194
            res[["seed"]] <- seed_xi
195
        }
196
        
197
        class(res) <- c("rwr")
198
    }
199
    return(res)
200
}
201
202
#' @importFrom igraph delete_vertices simplify degree
203
remove_unconnected_nodes <- function(X) {
204
    # remove unconnected nodes but does not simplify
205
    X.simplified <- igraph::simplify(X)
206
    isolated_nodes = which(igraph::degree(X.simplified) == 0)
207
    X = igraph::delete_vertices(X, isolated_nodes)
208
    return(X)
209
}
210
211
#' @importFrom dplyr filter pull top_n
212
#' @importFrom igraph induced_subgraph set_vertex_attr V
213
rwr_top_k_graph <- function(X, RWRM_Result_Object, Seed, k = 15) {
214
    Top_Results_Nodes <- RWRM_Result_Object %>%
215
        dplyr::filter(SeedName == Seed) %>%
216
        dplyr::top_n(n = k, wt = Score) %>%
217
        dplyr::pull(NodeNames)
218
    Query_Nodes <- intersect(
219
        c(Seed, Top_Results_Nodes),
220
        igraph::V(X)$name
221
    )
222
    Target_Nodes <- intersect(Top_Results_Nodes, igraph::V(X)$name)
223
    
224
    if (!purrr::is_empty(Query_Nodes)) {
225
        top_k_graph <- igraph::induced_subgraph(graph = X, 
226
                                                vids = Query_Nodes)
227
        top_k_graph <- igraph::set_vertex_attr(graph = top_k_graph, 
228
                                               name = "rwr", 
229
                                               index = Seed, 
230
                                               value = "seed")
231
        top_k_graph <- igraph::set_vertex_attr(graph = top_k_graph, 
232
                                               name = "rwr", 
233
                                               index = Target_Nodes, 
234
                                               value = "target")
235
        return(top_k_graph)
236
    }
237
    return(NULL)
238
}
239
240
241
242
243
#' RWR Find seeds between attributes
244
#' 
245
#' From rwr results, this function returns a subgraph if any vertex shares 
246
#' different attributes value.
247
#' In biological context, this might be useful to identify vertex shared between
248
#'  clusters or omics types.
249
#' 
250
#' @param X a random walk result from \code{random_walk_restart}
251
#' @param seed a character vector or NULL. If NULL, all the seeds from X 
252
#' are considered.
253
#' @param attribute a character value or NULL. 
254
#' If NULL, the closest node is returned.
255
#' @param k a integer, k closest nodes to consider in the search
256
#' 
257
#' @return 
258
#' A list of igraph object for each seed.
259
#' If X is a list, it returns a list of list of graph.
260
#' 
261
#' @examples 
262
#' graph1 <- igraph::graph_from_data_frame(
263
#'     list(from = c("A", "B", "A", "D", "C", "A", "C"), 
264
#'          to = c("B", "C", "D", "E", "D", "F", "G")), 
265
#'     directed = FALSE)
266
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
267
#'                                   name = 'type', 
268
#'                                   index = c("A","B","C"),
269
#'                                   value = "1")
270
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
271
#'                                   name = 'type', 
272
#'                                   index = c("D","E"),
273
#'                                   value = "2")
274
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
275
#'                                   name = 'type', 
276
#'                                   index = c("F", "G"),
277
#'                                   value = "3")
278
#' 
279
#' rwr_res <- random_walk_restart(X = graph1,
280
#'                                seed = c("A", "B", "C", "D", "E"))
281
#' rwr_res_type <- rwr_find_seeds_between_attributes(X = rwr_res, 
282
#'                                                   attribute = "type", 
283
#'                                                   k = 3)
284
#' 
285
#' @export
286
rwr_find_seeds_between_attributes <- function(X, 
287
                                              seed = NULL, 
288
                                              k = 15, 
289
                                              attribute = "type"){
290
    # check X
291
    if(!(is(X, "rwr") | is(X, "list.rwr"))){
292
        stop("X must be a random walk result")
293
    }
294
    
295
    # check k
296
    if(!is.null(k)){
297
        k <-  check_single_numeric_value(k, min = 0, 
298
                                         max = 200, 
299
                                         var.name = "'k' ")
300
301
    } else {
302
        k <- 15
303
    }
304
    
305
    # check seed  # if seed is null, all seeds found in rwr are considered 
306
    if(!is.null(seed)){
307
        # don't check if all seeds are in vids -> NULL results anyway
308
        seed <- check_vector_char(X = seed, 
309
                                  var.name = "'seed' ", 
310
                                  default = NULL)
311
    } 
312
    
313
    # check attribute
314
    attribute <-  check_vector_char(X = attribute, var.name = "'attribute' ", 
315
                                    default = "type",
316
                                    X.length = 1)
317
    
318
    if(is(X, "rwr")){
319
        if(is.null(seed)){ # seed = all seeds 
320
            seed <- X$seed  # can be NULL
321
        }
322
        res <- .rwr_find_seeds_between_attribute(rwr = X, 
323
                                                 k = k, 
324
                                                 attribute = attribute, 
325
                                                 seed = seed)
326
        class(res) <- "rwr.attributes"
327
    } else { # X is list.res
328
        # should not be run on list.res because each item 
329
        # contains a unique cluster
330
        res <- list()
331
332
        for(i in seq_along(X)){
333
            index_name_i <- ifelse(!is.null(names(X)[i]), names(X)[i], i)
334
            
335
            if(is.null(seed)){ # seed = all seeds 
336
                seed_i <- X[[index_name_i]]$seed  # can be NULL
337
            } else {
338
                seed_i <- seed
339
            }
340
            
341
            res[[index_name_i]] <- .rwr_find_seeds_between_attribute(
342
                rwr = X[[index_name_i]],
343
                k = k,
344
                attribute = attribute,
345
                seed = seed_i)
346
            class(res[[index_name_i]]) <- "rwr.attributes"
347
            
348
        }
349
        class(res) <- "list.rwr.attributes"
350
    }
351
    return(res)
352
}
353
354
#' @importFrom igraph vertex_attr
355
.rwr_find_seeds_between_attribute <- function(rwr, 
356
                                              k, 
357
                                              attribute, 
358
                                              seed){
359
    res <- list()
360
    for(seed_xi in seed){
361
        # print(seed_xi)
362
        top_k_graph <- rwr_top_k_graph(X = rwr$graph, 
363
                                       RWRM_Result_Object = rwr$rwr, 
364
                                       Seed = seed_xi, k = k)
365
        
366
        # find different cluster
367
        if(!is.null(top_k_graph)){
368
            if(nrow(table(igraph::vertex_attr(top_k_graph)[[attribute]])) >= 2){
369
                # generic version
370
                res[[seed_xi]] <- top_k_graph
371
            }
372
        }
373
    }
374
    return(res)
375
}
376
377
378
379
#' RWR Find closest nodes
380
#' 
381
#' From a rwr results, this function returns the closest nodes from a seed with 
382
#' a given attribute and value.
383
#' In biological context, it might be useful to get the closest Gene Ontology
384
#'  annotation nodes from unannotated seeds.
385
#' 
386
#' @param X a random walk result from \code{random_walk_restart}
387
#' @param seed a character vector or NULL. If NULL, all the seeds 
388
#' from X are considered.
389
#' @param attribute a character value or NULL. If NULL, 
390
#' the closest node is returned.
391
#' @param value a character value or NULL. If NULL, the closest node for a given
392
#'  attribute is returned.
393
#' @param top a numeric value, the top closest nodes to extract
394
#' 
395
#' 
396
#' 
397
#' @return 
398
#' A list of \code{data.frame} for each seed containing the closest nodes per
399
#'  seed and their vertex attributes.
400
#' If X is \code{list.rwr}, the returned value is a list of list. 
401
#' 
402
#' 
403
#' @examples 
404
#' graph1 <- igraph::graph_from_data_frame(
405
#'     list(from = c("A", "B", "A", "D", "C", "A", "C"), 
406
#'          to = c("B", "C", "D", "E", "D", "F", "G")), 
407
#'     directed = FALSE)
408
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
409
#'                                   name = 'type', 
410
#'                                   index = c("A","B","C"),
411
#'                                   value = "1")
412
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
413
#'                                   name = 'type', 
414
#'                                   index = c("D","E"),
415
#'                                   value = "2")
416
#' graph1 <- igraph::set_vertex_attr(graph = graph1, 
417
#'                                   name = 'type', 
418
#'                                   index = c("F", "G"),
419
#'                                   value = "3")
420
#' 
421
#' rwr_res <- random_walk_restart(X = graph1,
422
#'                                seed = c("A", "B", "C", "D", "E"))
423
#' rwr_find_closest_type(X=rwr_res, attribute = "type", 
424
#'                       seed = "A")
425
426
427
#' @export
428
rwr_find_closest_type <- function(X, 
429
                                  seed = NULL, 
430
                                  attribute = NULL, 
431
                                  value = NULL, 
432
                                  top = 1){
433
    # check X
434
    if(!(is(X, "rwr") | is(X, "list.rwr"))){
435
        stop("X must be a random walk result")
436
    }
437
    
438
    # check attribute or replace with default value
439
    attribute <- check_vector_char(X = attribute,
440
                                   X.length = 1, 
441
                                   default = NULL,
442
                                   var.name = "'attribute' ")
443
    
444
    # check value or replace with default value
445
    value <- check_vector_char(X = value, 
446
                               X.length = 1, 
447
                               default = NULL, 
448
                               var.name = "'value' ")
449
    
450
    # check top
451
    top <- check_single_numeric_value(top, var.name = "'top' ")
452
    
453
    # check seed  # if seed is null, all seeds found in rwr are considered  
454
    if(!is.null(seed)){
455
        # don't check if all seeds are in vids -> NULL results anyway
456
        seed <- check_vector_char(X = seed, 
457
                                  var.name = "'seed' ", 
458
                                  default = NULL)
459
    }
460
    
461
    if(is(X, "rwr")){
462
        if(is.null(seed)){ # seed = all seeds 
463
            seed <- X$seed  # can be NULL
464
        }
465
        res <- .rwr_find_closest(rwr = X, user.attribute = attribute, 
466
                                 seed = seed, 
467
                                 user.value = value, 
468
                                 top = top)
469
        class(res) <- "rwr.closest"
470
    } else { # X is list.res
471
        # should not be run on list.res because each item 
472
        # contains a unique cluster
473
        res <- list()
474
        
475
        for(i in seq_along(X)){
476
            index_name_i <- ifelse(!is.null(names(X)[i]), names(X)[i], i)
477
            
478
            if(is.null(seed)){ # seed = all seeds 
479
                seed_i <- X[[index_name_i]]$seed  # can be NULL
480
            } else {
481
                seed_i <- seed
482
            }
483
            
484
            res[[index_name_i]] <- .rwr_find_closest(rwr = X[[index_name_i]], 
485
                                                     user.attribute = attribute, 
486
                                                     seed = seed_i, 
487
                                                     user.value = value, 
488
                                                     top = top)
489
            class(res[[index_name_i]]) <- "rwr.closest"
490
            
491
        }
492
        class(res) <- "list.rwr.closest"
493
    }
494
    return(res)
495
}
496
497
#' @importFrom dplyr filter top_n left_join select everything across mutate
498
#' @importFrom purrr map_dfr
499
#' @importFrom tidyr pivot_longer
500
.rwr_find_closest <- function(rwr, user.attribute, user.value, seed, top){
501
    res <- list()
502
    for(seed_xi in seed){
503
        rwr.res.filtered <- dplyr::filter(rwr$rwr, SeedName == seed_xi) 
504
        # fix to use pivot_longer with different cast columns (integer/logical,)
505
        rwr.res.filtered <- rwr.res.filtered %>% t %>% t %>% as.data.frame()
506
        rwr.res.filtered <- tidyr::pivot_longer(rwr.res.filtered, 
507
                                                names_to = "attribute", 
508
                                                values_to = "value", 
509
                                                -c(NodeNames, Score, SeedName),
510
                                                #values_ptypes = 
511
                                                #    list(value=character())
512
                                                ) %>% 
513
          dplyr::mutate(dplyr::across(dplyr::everything(), as.character))
514
        
515
        if(!is.null(user.attribute)){
516
            rwr.res.filtered <- dplyr::filter(rwr.res.filtered, 
517
                                              attribute == user.attribute)
518
        }
519
        if(!is.null(user.value)){
520
            rwr.res.filtered <- dplyr::filter(rwr.res.filtered, 
521
                                              value == user.value)
522
        }
523
        rwr.res.filtered <- dplyr::top_n(x = rwr.res.filtered, 
524
                                         n = top, 
525
                                         wt = Score) %>%
526
            dplyr::select(c(NodeNames, SeedName)) %>% 
527
            unique
528
        if(nrow(rwr.res.filtered) > 0){
529
            res[[seed_xi]] <- dplyr::left_join(
530
                rwr.res.filtered, 
531
                rwr$rwr, 
532
                by =  c("NodeNames" = "NodeNames", "SeedName" = "SeedName")) %>% 
533
                dplyr::select(c(NodeNames, Score, SeedName), 
534
                              dplyr::everything())
535
        }
536
    }
537
    #res <- purrr::map_dfr(res, ~.x)
538
    return(res)
539
}
540
541
542