--- a +++ b/R/RWR.R @@ -0,0 +1,542 @@ +#' Random Walk with Restart +#' +#' This function performs a propagation analysis by random walk with restart +#' in a multi-layered network from specific seeds. +#' +#' @param X an igraph or list.igraph object. +#' @param seed a character vector. Only seeds present in X are considered. +#' @param r a numeric value between 0 and 1. +#' It sets the probability of restarting to a seed node after each step. +#' +#' @return +#' Each element of X returns a list (class = 'rwr') +#' containing the following elements: +#' \item{rwr}{a \code{data.frame}, the RWR results for each valid seed.} +#' \item{seed}{a character vector with the valid seeds} +#' \item{graph}{\code{igraph} object from X} +#' If X is a \code{list.igraph}, the returned object is a \code{list.rwr}. +#' +#' @seealso +# \code{\link[RandomWalkRestartMH]{Random.Walk.Restart.Multiplex}}, +#' \code{\link[netOmics]{rwr_find_seeds_between_attributes}}, +#' \code{\link[netOmics]{rwr_find_closest_type}} +#' +#' @examples +#' graph1 <- igraph::graph_from_data_frame( +#' list(from = c('A', 'B', 'A', 'D', 'C', 'A', 'C'), +#' to = c('B', 'C', 'D', 'E', 'D', 'F', 'G')), +#' directed = FALSE) +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c('A','B','C'), +#' value = '1') +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c('D','E'), +#' value = '2') +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c('F', 'G'), +#' value = '3') +#' +#' rwr_res <- random_walk_restart(X = graph1, +#' seed = c('A', 'B', 'C', 'D', 'E')) +#' +# @importFrom RandomWalkRestartMH create.multiplex +# @importFrom RandomWalkRestartMH compute.adjacency.matrix +# @importFrom RandomWalkRestartMH normalize.multiplex.adjacency +# @importFrom RandomWalkRestartMH Random.Walk.Restart.Multiplex +#' @importFrom dplyr mutate left_join +#' @importFrom purrr imap_dfr +#' @importFrom magrittr %>% +#' @export +random_walk_restart <- function(X, seed = NULL, r = 0.7) { + + # check X is graph or list of graph + X <- check_graph(X) + + # check seed + seed <- check_vector_char(X = seed, var.name = "'seed' ") + + # check r + r <- check_single_numeric_value(r, min = 0, max = 1, var.name = "'r' ") + + # delta + delta <- 0.5 + + res <- list() + if (is(X, "list.igraph")) { + # apply RWR on each graph + for (i in seq_along(X)) { + Xi <- X[[i]] + Xi <- remove_unconnected_nodes(Xi) + index_name_i <- ifelse( + !is.null(names(X)[i]), + names(X)[i], + i + ) + + ## possible implementation to benchmark: extract graph component + ## and make couples with seeds and matching subgraph + + seed_xi <- intersect(seed, igraph::V(Xi)$name) + # prevent the error: 'Some of the seeds are not nodes of the network + + # rwr layer names: to change if we include some day multiplex + # network + layers_name <- ifelse( + !is.null(names(X)[i]), + names(X)[i], + "graph" + ) + + # multiplex <- RandomWalkRestartMH::create.multiplex(L1 = + # Xi,Layers_Name=layers_name) + #multiplex <- RandomWalkRestartMH::create.multiplex( + multiplex <- create.multiplex( + LayersList = list(L1 = Xi), + Layers_Name = layers_name + ) + # adj_matrix <- RandomWalkRestartMH::compute.adjacency.matrix( + adj_matrix <- compute.adjacency.matrix( + x = multiplex, + delta = delta) + adj_matrix_norm <- + normalize.multiplex.adjacency( + # RandomWalkRestartMH::normalize.multiplex.adjacency( + x = adj_matrix) # time/RAM consuming + + res_tmp <- list() + for (seed_xi_i in seed_xi) { + # rwr_res <- RandomWalkRestartMH::Random.Walk.Restart.Multiplex( + rwr_res <- Random.Walk.Restart.Multiplex( + + x = adj_matrix_norm, + MultiplexObject = multiplex, + Seeds = seed_xi_i, + r = r + ) + res_tmp[[seed_xi_i]] <- rwr_res + } + if (!is_empty(seed_xi)) { + res[[index_name_i]] <- list() + res[[index_name_i]][["rwr"]] <- purrr::imap_dfr( + res_tmp, ~{ + .x$RWRM_Results %>% + dplyr::mutate(SeedName = .y) + } + ) %>% + dplyr::left_join( + as.data.frame(vertex_attr(X[[i]])), + by = c(NodeNames = "name") + ) + res[[index_name_i]][["graph"]] <- X[[i]] + res[[index_name_i]][["seed"]] <- seed_xi + class(res[[index_name_i]]) <- "rwr" + } + class(res) <- c("list.rwr") + } + } else { + # X is a single graph + Xi <- remove_unconnected_nodes(X) + + ## possible implementation to benchmark: extract graph component and + ## make couples with seeds and matching subgraph + + seed_xi <- intersect(seed, igraph::V(Xi)$name) + # prevent the error: Some of the seeds are not nodes of the network + + # rwr layer names: to change if we include some day multiplex network + # layers_name <- ifelse(!is.null(names(X)[i]), names(X)[i], 'graph') + layers_name <- c("graph") + + # multiplex <- RandomWalkRestartMH::create.multiplex(L1 = + # Xi,Layers_Name=layers_name) + multiplex <- create.multiplex( + #RandomWalkRestartMH::create.multiplex( + LayersList = list(L1 = Xi), + Layers_Name = layers_name + ) + + adj_matrix <- compute.adjacency.matrix( + #RandomWalkRestartMH::compute.adjacency.matrix( + x = multiplex, + delta = delta) + adj_matrix_norm <- normalize.multiplex.adjacency( + #RandomWalkRestartMH::normalize.multiplex.adjacency( + x = adj_matrix) # time/RAM consuming + + res_tmp <- list() + for (seed_xi_i in seed_xi) { + rwr_res <- Random.Walk.Restart.Multiplex( + #rwr_res <- RandomWalkRestartMH::Random.Walk.Restart.Multiplex( + x = adj_matrix_norm, + MultiplexObject = multiplex, + Seeds = seed_xi_i, + r = r + ) + res_tmp[[seed_xi_i]] <- rwr_res + } + # all seeds for a graph X has been computed -> merge result (more + # efficient than having seperate results + associated graph) + if (!is_empty(seed_xi)) { + res[["rwr"]] <- purrr::imap_dfr( + res_tmp, ~{ + .x$RWRM_Results %>% + dplyr::mutate(SeedName = .y) + } + ) %>% + dplyr::left_join( + as.data.frame(vertex_attr(X)), + by = c(NodeNames = "name") + ) + res[["graph"]] <- X + res[["seed"]] <- seed_xi + } + + class(res) <- c("rwr") + } + return(res) +} + +#' @importFrom igraph delete_vertices simplify degree +remove_unconnected_nodes <- function(X) { + # remove unconnected nodes but does not simplify + X.simplified <- igraph::simplify(X) + isolated_nodes = which(igraph::degree(X.simplified) == 0) + X = igraph::delete_vertices(X, isolated_nodes) + return(X) +} + +#' @importFrom dplyr filter pull top_n +#' @importFrom igraph induced_subgraph set_vertex_attr V +rwr_top_k_graph <- function(X, RWRM_Result_Object, Seed, k = 15) { + Top_Results_Nodes <- RWRM_Result_Object %>% + dplyr::filter(SeedName == Seed) %>% + dplyr::top_n(n = k, wt = Score) %>% + dplyr::pull(NodeNames) + Query_Nodes <- intersect( + c(Seed, Top_Results_Nodes), + igraph::V(X)$name + ) + Target_Nodes <- intersect(Top_Results_Nodes, igraph::V(X)$name) + + if (!purrr::is_empty(Query_Nodes)) { + top_k_graph <- igraph::induced_subgraph(graph = X, + vids = Query_Nodes) + top_k_graph <- igraph::set_vertex_attr(graph = top_k_graph, + name = "rwr", + index = Seed, + value = "seed") + top_k_graph <- igraph::set_vertex_attr(graph = top_k_graph, + name = "rwr", + index = Target_Nodes, + value = "target") + return(top_k_graph) + } + return(NULL) +} + + + + +#' RWR Find seeds between attributes +#' +#' From rwr results, this function returns a subgraph if any vertex shares +#' different attributes value. +#' In biological context, this might be useful to identify vertex shared between +#' clusters or omics types. +#' +#' @param X a random walk result from \code{random_walk_restart} +#' @param seed a character vector or NULL. If NULL, all the seeds from X +#' are considered. +#' @param attribute a character value or NULL. +#' If NULL, the closest node is returned. +#' @param k a integer, k closest nodes to consider in the search +#' +#' @return +#' A list of igraph object for each seed. +#' If X is a list, it returns a list of list of graph. +#' +#' @examples +#' graph1 <- igraph::graph_from_data_frame( +#' list(from = c("A", "B", "A", "D", "C", "A", "C"), +#' to = c("B", "C", "D", "E", "D", "F", "G")), +#' directed = FALSE) +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c("A","B","C"), +#' value = "1") +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c("D","E"), +#' value = "2") +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c("F", "G"), +#' value = "3") +#' +#' rwr_res <- random_walk_restart(X = graph1, +#' seed = c("A", "B", "C", "D", "E")) +#' rwr_res_type <- rwr_find_seeds_between_attributes(X = rwr_res, +#' attribute = "type", +#' k = 3) +#' +#' @export +rwr_find_seeds_between_attributes <- function(X, + seed = NULL, + k = 15, + attribute = "type"){ + # check X + if(!(is(X, "rwr") | is(X, "list.rwr"))){ + stop("X must be a random walk result") + } + + # check k + if(!is.null(k)){ + k <- check_single_numeric_value(k, min = 0, + max = 200, + var.name = "'k' ") + + } else { + k <- 15 + } + + # check seed # if seed is null, all seeds found in rwr are considered + if(!is.null(seed)){ + # don't check if all seeds are in vids -> NULL results anyway + seed <- check_vector_char(X = seed, + var.name = "'seed' ", + default = NULL) + } + + # check attribute + attribute <- check_vector_char(X = attribute, var.name = "'attribute' ", + default = "type", + X.length = 1) + + if(is(X, "rwr")){ + if(is.null(seed)){ # seed = all seeds + seed <- X$seed # can be NULL + } + res <- .rwr_find_seeds_between_attribute(rwr = X, + k = k, + attribute = attribute, + seed = seed) + class(res) <- "rwr.attributes" + } else { # X is list.res + # should not be run on list.res because each item + # contains a unique cluster + res <- list() + + for(i in seq_along(X)){ + index_name_i <- ifelse(!is.null(names(X)[i]), names(X)[i], i) + + if(is.null(seed)){ # seed = all seeds + seed_i <- X[[index_name_i]]$seed # can be NULL + } else { + seed_i <- seed + } + + res[[index_name_i]] <- .rwr_find_seeds_between_attribute( + rwr = X[[index_name_i]], + k = k, + attribute = attribute, + seed = seed_i) + class(res[[index_name_i]]) <- "rwr.attributes" + + } + class(res) <- "list.rwr.attributes" + } + return(res) +} + +#' @importFrom igraph vertex_attr +.rwr_find_seeds_between_attribute <- function(rwr, + k, + attribute, + seed){ + res <- list() + for(seed_xi in seed){ + # print(seed_xi) + top_k_graph <- rwr_top_k_graph(X = rwr$graph, + RWRM_Result_Object = rwr$rwr, + Seed = seed_xi, k = k) + + # find different cluster + if(!is.null(top_k_graph)){ + if(nrow(table(igraph::vertex_attr(top_k_graph)[[attribute]])) >= 2){ + # generic version + res[[seed_xi]] <- top_k_graph + } + } + } + return(res) +} + + + +#' RWR Find closest nodes +#' +#' From a rwr results, this function returns the closest nodes from a seed with +#' a given attribute and value. +#' In biological context, it might be useful to get the closest Gene Ontology +#' annotation nodes from unannotated seeds. +#' +#' @param X a random walk result from \code{random_walk_restart} +#' @param seed a character vector or NULL. If NULL, all the seeds +#' from X are considered. +#' @param attribute a character value or NULL. If NULL, +#' the closest node is returned. +#' @param value a character value or NULL. If NULL, the closest node for a given +#' attribute is returned. +#' @param top a numeric value, the top closest nodes to extract +#' +#' +#' +#' @return +#' A list of \code{data.frame} for each seed containing the closest nodes per +#' seed and their vertex attributes. +#' If X is \code{list.rwr}, the returned value is a list of list. +#' +#' +#' @examples +#' graph1 <- igraph::graph_from_data_frame( +#' list(from = c("A", "B", "A", "D", "C", "A", "C"), +#' to = c("B", "C", "D", "E", "D", "F", "G")), +#' directed = FALSE) +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c("A","B","C"), +#' value = "1") +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c("D","E"), +#' value = "2") +#' graph1 <- igraph::set_vertex_attr(graph = graph1, +#' name = 'type', +#' index = c("F", "G"), +#' value = "3") +#' +#' rwr_res <- random_walk_restart(X = graph1, +#' seed = c("A", "B", "C", "D", "E")) +#' rwr_find_closest_type(X=rwr_res, attribute = "type", +#' seed = "A") + + +#' @export +rwr_find_closest_type <- function(X, + seed = NULL, + attribute = NULL, + value = NULL, + top = 1){ + # check X + if(!(is(X, "rwr") | is(X, "list.rwr"))){ + stop("X must be a random walk result") + } + + # check attribute or replace with default value + attribute <- check_vector_char(X = attribute, + X.length = 1, + default = NULL, + var.name = "'attribute' ") + + # check value or replace with default value + value <- check_vector_char(X = value, + X.length = 1, + default = NULL, + var.name = "'value' ") + + # check top + top <- check_single_numeric_value(top, var.name = "'top' ") + + # check seed # if seed is null, all seeds found in rwr are considered + if(!is.null(seed)){ + # don't check if all seeds are in vids -> NULL results anyway + seed <- check_vector_char(X = seed, + var.name = "'seed' ", + default = NULL) + } + + if(is(X, "rwr")){ + if(is.null(seed)){ # seed = all seeds + seed <- X$seed # can be NULL + } + res <- .rwr_find_closest(rwr = X, user.attribute = attribute, + seed = seed, + user.value = value, + top = top) + class(res) <- "rwr.closest" + } else { # X is list.res + # should not be run on list.res because each item + # contains a unique cluster + res <- list() + + for(i in seq_along(X)){ + index_name_i <- ifelse(!is.null(names(X)[i]), names(X)[i], i) + + if(is.null(seed)){ # seed = all seeds + seed_i <- X[[index_name_i]]$seed # can be NULL + } else { + seed_i <- seed + } + + res[[index_name_i]] <- .rwr_find_closest(rwr = X[[index_name_i]], + user.attribute = attribute, + seed = seed_i, + user.value = value, + top = top) + class(res[[index_name_i]]) <- "rwr.closest" + + } + class(res) <- "list.rwr.closest" + } + return(res) +} + +#' @importFrom dplyr filter top_n left_join select everything across mutate +#' @importFrom purrr map_dfr +#' @importFrom tidyr pivot_longer +.rwr_find_closest <- function(rwr, user.attribute, user.value, seed, top){ + res <- list() + for(seed_xi in seed){ + rwr.res.filtered <- dplyr::filter(rwr$rwr, SeedName == seed_xi) + # fix to use pivot_longer with different cast columns (integer/logical,) + rwr.res.filtered <- rwr.res.filtered %>% t %>% t %>% as.data.frame() + rwr.res.filtered <- tidyr::pivot_longer(rwr.res.filtered, + names_to = "attribute", + values_to = "value", + -c(NodeNames, Score, SeedName), + #values_ptypes = + # list(value=character()) + ) %>% + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) + + if(!is.null(user.attribute)){ + rwr.res.filtered <- dplyr::filter(rwr.res.filtered, + attribute == user.attribute) + } + if(!is.null(user.value)){ + rwr.res.filtered <- dplyr::filter(rwr.res.filtered, + value == user.value) + } + rwr.res.filtered <- dplyr::top_n(x = rwr.res.filtered, + n = top, + wt = Score) %>% + dplyr::select(c(NodeNames, SeedName)) %>% + unique + if(nrow(rwr.res.filtered) > 0){ + res[[seed_xi]] <- dplyr::left_join( + rwr.res.filtered, + rwr$rwr, + by = c("NodeNames" = "NodeNames", "SeedName" = "SeedName")) %>% + dplyr::select(c(NodeNames, Score, SeedName), + dplyr::everything()) + } + } + #res <- purrr::map_dfr(res, ~.x) + return(res) +} + + +