--- a +++ b/vignettes/netOmics.R @@ -0,0 +1,407 @@ +## ----echo=FALSE--------------------------------------------------------------- +knitr::opts_chunk$set(fig.align = "center") + + +## ----eval=FALSE--------------------------------------------------------------- +## # install the package via BioConductor +## if (!requireNamespace("BiocManager", quietly = TRUE)) +## install.packages("BiocManager") +## +## BiocManager::install("netOmics") + + +## ----eval=FALSE--------------------------------------------------------------- +## # install the package via github +## library(devtools) +## install_github("abodein/netOmics") + + +## ----eval=TRUE, message=FALSE------------------------------------------------- +# load the package +library(netOmics) + + +## ----eval=TRUE, message=FALSE------------------------------------------------- +# usefull packages to build this vignette +library(timeOmics) +library(tidyverse) +library(igraph) + + +## ----load_data---------------------------------------------------------------- +# load data +data("hmp_T2D") + + +## ----timeOmics_1, eval=FALSE-------------------------------------------------- +## # not evaluated in this vignette +## +## #1 filter fold-change +## remove.low.cv <- function(X, cutoff = 0.5){ +## # var.coef +## cv <- unlist(lapply(as.data.frame(X), +## function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE)))) +## return(X[,cv > cutoff]) +## } +## fc.threshold <- list("RNA"= 1.5, "CLINICAL"=0.2, "GUT"=1.5, "METAB"=1.5, +## "PROT" = 1.5, "CYTO" = 1) +## +## # --> hmp_T2D$raw +## data.filter <- imap(raw, ~{remove.low.cv(.x, cutoff = fc.threshold[[.y]])}) +## +## #2 scale +## data <- lapply(data.filter, function(x) log(x+1)) +## # --> hmp_T2D$data +## +## +## #3 modelling +## lmms.func <- function(X){ +## time <- rownames(X) %>% str_split("_") %>% +## map_chr(~.x[[2]]) %>% as.numeric() +## lmms.output <- lmms::lmmSpline(data = X, time = time, +## sampleID = rownames(X), deri = FALSE, +## basis = "p-spline", numCores = 4, +## keepModels = TRUE) +## return(lmms.output) +## } +## data.modelled <- lapply(data, function(x) lmms.func(x)) +## +## # 4 clustering +## block.res <- block.pls(data.modelled, indY = 1, ncomp = 1) +## getCluster.res <- getCluster(block.res) +## # --> hmp_T2D$getCluster.res +## +## +## # 5 signature +## list.keepX <- list("CLINICAL" = 4, "CYTO" = 3, "GUT" = 10, "METAB" = 3, +## "PROT" = 2,"RNA" = 34) +## sparse.block.res <- block.spls(data.modelled, indY = 1, ncomp = 1, scale =TRUE, +## keepX =list.keepX) +## getCluster.sparse.res <- getCluster(sparse.block.res) +## # --> hmp_T2D$getCluster.sparse.res + + +## ----timeOmics_2-------------------------------------------------------------- +# clustering results +cluster.info <- hmp_T2D$getCluster.res + + +## ----graph.rna, warning=FALSE------------------------------------------------- +cluster.info.RNA <- timeOmics::getCluster(cluster.info, user.block = "RNA") +graph.rna <- get_grn(X = hmp_T2D$data$RNA, cluster = cluster.info.RNA) + +# to get info about the network +get_graph_stats(graph.rna) + + +## ----PROT_graph, warning=FALSE------------------------------------------------ +# Utility function to get the molecules by cluster +get_list_mol_cluster <- function(cluster.info, user.block){ + require(timeOmics) + tmp <- timeOmics::getCluster(cluster.info, user.block) + res <- tmp %>% split(.$cluster) %>% + lapply(function(x) x$molecule) + res[["All"]] <- tmp$molecule + return(res) +} + +cluster.info.prot <- get_list_mol_cluster(cluster.info, user.block = 'PROT') +graph.prot <- get_interaction_from_database(X = cluster.info.prot, + db = hmp_T2D$interaction.biogrid, + type = "PROT", user.ego = TRUE) +# get_graph_stats(graph.prot) + + +## ----GUT_graph, eval = FALSE-------------------------------------------------- +## # not evaluated in this vignette +## library(SpiecEasi) +## +## get_sparcc_graph <- function(X, threshold = 0.3){ +## res.sparcc <- sparcc(data = X) +## sparcc.graph <- abs(res.sparcc$Cor) >= threshold +## colnames(sparcc.graph) <- colnames(X) +## rownames(sparcc.graph) <- colnames(X) +## res.graph <- graph_from_adjacency_matrix(sparcc.graph, +## mode = "undirected") %>% simplify +## return(res.graph) +## } +## +## gut_list <- get_list_mol_cluster(cluster.info, user.block = 'GUT') +## +## graph.gut <- list() +## graph.gut[["All"]] <- get_sparcc_graph(hmp_T2D$raw$GUT, threshold = 0.3) +## graph.gut[["1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>% +## dplyr::select(gut_list[["1"]]), +## threshold = 0.3) +## graph.gut[["-1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>% +## dplyr::select(gut_list[["-1"]]), +## threshold = 0.3) +## class(graph.gut) <- "list.igraph" + + +## ----GUT---------------------------------------------------------------------- +graph.gut <- hmp_T2D$graph.gut +# get_graph_stats(graph.gut) + + +## ----CYTO_graph, warning=FALSE------------------------------------------------ +# CYTO -> from database (biogrid) +cyto_list = get_list_mol_cluster(cluster.info = cluster.info, + user.block = "CYTO") +graph.cyto <- get_interaction_from_database(X = cyto_list, + db = hmp_T2D$interaction.biogrid, + type = "CYTO", user.ego = TRUE) +# get_graph_stats(graph.cyto) + +# METAB -> inference +cluster.info.metab <- timeOmics::getCluster(X = cluster.info, + user.block = "METAB") +graph.metab <- get_grn(X = hmp_T2D$data$METAB, + cluster = cluster.info.metab) +# get_graph_stats(graph.metab) + +# CLINICAL -> inference +cluster.info.clinical <- timeOmics::getCluster(X = cluster.info, + user.block = 'CLINICAL') +graph.clinical <- get_grn(X = hmp_T2D$data$CLINICAL, + cluster = cluster.info.clinical) +# get_graph_stats(graph.clinical) + + +## ----merged_0----------------------------------------------------------------- +full.graph <- combine_layers(graph1 = graph.rna, graph2 = graph.prot) +full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.cyto) + +full.graph <- combine_layers(graph1 = full.graph, + graph2 = hmp_T2D$interaction.TF) +# get_graph_stats(full.graph) + + +## ----merged_1_gut, warning=FALSE---------------------------------------------- +all_data <- reduce(hmp_T2D$data, cbind) + +# omic = gut +gut_list <- get_list_mol_cluster(cluster.info, user.block = "GUT") +omic_data <- lapply(gut_list, function(x)dplyr::select(hmp_T2D$data$GUT, x)) + +# other data = "RNA", "PROT", "CYTO" +other_data_list <- get_list_mol_cluster(cluster.info, + user.block = c("RNA", "PROT", "CYTO")) +other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x)) + +# get interaction between gut data and other data +interaction_df_gut <- get_interaction_from_correlation(X = omic_data, + Y = other_data, + threshold = 0.99) + +# and merge with full graph +full.graph <- combine_layers(graph1 = full.graph, + graph2 = graph.gut, + interaction.df = interaction_df_gut$All) + + +## ----merged_2_clinical, warning=FALSE----------------------------------------- +# omic = Clinical +clinical_list <- get_list_mol_cluster(cluster.info, user.block = "CLINICAL") +omic_data <- lapply(clinical_list, + function(x)dplyr::select(hmp_T2D$data$CLINICAL, x)) + +# other data = "RNA", "PROT", "CYTO", "GUT" +other_data_list <- get_list_mol_cluster(cluster.info, + user.block = c("RNA", "PROT", + "CYTO", "GUT")) +other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x)) + + +# get interaction between gut data and other data +interaction_df_clinical <- get_interaction_from_correlation(X = omic_data + , Y = other_data, + threshold = 0.99) + +# and merge with full graph +full.graph <- combine_layers(graph1 = full.graph, + graph2 = graph.clinical, + interaction.df = interaction_df_clinical$All) + + +## ----merged_3_metab, warning=FALSE-------------------------------------------- +# omic = Metab +metab_list <- get_list_mol_cluster(cluster.info, user.block = "METAB") +omic_data <- lapply(metab_list, function(x)dplyr::select(hmp_T2D$data$METAB, x)) + +# other data = "RNA", "PROT", "CYTO", "GUT", "CLINICAL" +other_data_list <- get_list_mol_cluster(cluster.info, + user.block = c("RNA", "PROT", "CYTO", + "GUT", "CLINICAL")) +other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x)) + +# get interaction between gut data and other data +interaction_df_metab <- get_interaction_from_correlation(X = omic_data, + Y = other_data, + threshold = 0.99) + +# and merge with full graph +full.graph <- combine_layers(graph1 = full.graph, + graph2 = graph.metab, + interaction.df = interaction_df_metab$All) + + +## ----------------------------------------------------------------------------- +# ORA by cluster/All +mol_ora <- get_list_mol_cluster(cluster.info, + user.block = c("RNA", "PROT", "CYTO")) + +# get ORA interaction graph by cluster +graph.go <- get_interaction_from_ORA(query = mol_ora, + sources = "GO", + organism = "hsapiens", + signif.value = TRUE) + +# merge +full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.go) + + +## ----------------------------------------------------------------------------- +# medlineRanker -> database +medlineranker.res.df <- hmp_T2D$medlineranker.res.df %>% + dplyr::select(Disease, symbol) %>% + set_names(c("from", "to")) + +mol_list <- get_list_mol_cluster(cluster.info = cluster.info, + user.block = c("RNA", "PROT", "CYTO")) +graph.medlineranker <- get_interaction_from_database(X = mol_list, + db = medlineranker.res.df, + type = "Disease", + user.ego = TRUE) +# get_graph_stats(graph.medlineranker) + +# merging +full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.medlineranker) + + +## ----------------------------------------------------------------------------- +# graph cleaning +graph_cleaning <- function(X, cluster.info){ + # no reusability + X <- igraph::simplify(X) + va <- vertex_attr(X) + viewed_mol <- c() + for(omic in unique(cluster.info$block)){ + mol <- intersect(cluster.info %>% dplyr::filter(.$block == omic) %>% + pull(molecule), V(X)$name) + viewed_mol <- c(viewed_mol, mol) + X <- set_vertex_attr(graph = X, + name = "type", + index = mol, + value = omic) + X <- set_vertex_attr(graph = X, + name = "mode", + index = mol, + value = "core") + } + # add medline ranker and go + mol <- intersect(map(graph.go, ~ as_data_frame(.x)$to) %>% + unlist %>% unique(), V(X)$name) # only GO terms + viewed_mol <- c(viewed_mol, mol) + X <- set_vertex_attr(graph = X, name = "type", index = mol, value = "GO") + X <- set_vertex_attr(graph = X, name = "mode", + index = mol, value = "extended") + + mol <- intersect(as.character(medlineranker.res.df$from), V(X)$name) + viewed_mol <- c(viewed_mol, mol) + X <- set_vertex_attr(graph = X, name = "type", + index = mol, value = "Disease") + X <- set_vertex_attr(graph = X, name = "mode", + index = mol, value = "extended") + + other_mol <- setdiff(V(X), viewed_mol) + if(!is_empty(other_mol)){ + X <- set_vertex_attr(graph = X, name = "mode", + index = other_mol, value = "extended") + } + X <- set_vertex_attr(graph = X, name = "mode", + index = intersect(cluster.info$molecule, V(X)$name), + value = "core") + + # signature + mol <- intersect(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule) + X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = TRUE) + mol <- setdiff(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule) + X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = FALSE) + + return(X) +} + + +## ----------------------------------------------------------------------------- +FULL <- lapply(full.graph, function(x) graph_cleaning(x, cluster.info)) +get_graph_stats(FULL) + + +## ----eval = FALSE------------------------------------------------------------- +## # degree analysis +## d <- degree(FULL$All) +## hist(d) +## d[max(d)] +## +## # modularity # Warnings: can take several minutes +## res.mod <- walktrap.community(FULL$All) +## # ... +## +## # modularity +## sp <- shortest.paths(FULL$All) + + +## ----------------------------------------------------------------------------- +# seeds = all vertices -> takes 5 minutes to run on regular computer +# seeds <- V(FULL$All)$name +# rwr_res <- random_walk_restart(FULL, seeds) + +# seed = some GO terms +seeds <- head(V(FULL$All)$name[V(FULL$All)$type == "GO"]) +rwr_res <- random_walk_restart(FULL, seeds) + + +## ----------------------------------------------------------------------------- +rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res, + attribute = "type", k = 15) + +# a summary plot function +summary_plot_rwr_attributes(rwr_type_k15) +summary_plot_rwr_attributes(rwr_type_k15$All) + + +## ----------------------------------------------------------------------------- +rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res$All, + attribute = "cluster", k = 15) +summary_plot_rwr_attributes(rwr_type_k15) + + +## ----------------------------------------------------------------------------- +sub_res <- rwr_type_k15$`GO:0005737` +sub <- plot_rwr_subnetwork(sub_res, legend = TRUE, plot = TRUE) + + +## ----------------------------------------------------------------------------- +rwr_res <- random_walk_restart(FULL$All, seed = "ZNF263") + +# closest GO term +rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", + value = "GO", top = 5) + +# closest Disease +rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", + value = "Disease", top = 5) + +# closest nodes with an attribute "cluster" and the value "-1" +rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "cluster", + value = "-1", top = 5) + + +## ----eval = FALSE------------------------------------------------------------- +## seeds <- V(FULL$All)$name[V(FULL$All)$type %in% c("GO", "Disease")] + + +## ----------------------------------------------------------------------------- +sessionInfo() +