Diff of /vignettes/netOmics.R [000000] .. [73f552]

Switch to side-by-side view

--- a
+++ b/vignettes/netOmics.R
@@ -0,0 +1,407 @@
+## ----echo=FALSE---------------------------------------------------------------
+knitr::opts_chunk$set(fig.align = "center")
+
+
+## ----eval=FALSE---------------------------------------------------------------
+## # install the package via BioConductor
+## if (!requireNamespace("BiocManager", quietly = TRUE))
+##     install.packages("BiocManager")
+## 
+## BiocManager::install("netOmics")
+
+
+## ----eval=FALSE---------------------------------------------------------------
+## # install the package via github
+## library(devtools)
+## install_github("abodein/netOmics")
+
+
+## ----eval=TRUE, message=FALSE-------------------------------------------------
+# load the package
+library(netOmics)
+
+
+## ----eval=TRUE, message=FALSE-------------------------------------------------
+# usefull packages to build this vignette
+library(timeOmics)
+library(tidyverse)
+library(igraph)
+
+
+## ----load_data----------------------------------------------------------------
+# load data
+data("hmp_T2D")
+
+
+## ----timeOmics_1, eval=FALSE--------------------------------------------------
+## # not evaluated in this vignette
+## 
+## #1 filter fold-change
+## remove.low.cv <- function(X, cutoff = 0.5){
+##     # var.coef
+##     cv <- unlist(lapply(as.data.frame(X),
+##                     function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE))))
+##     return(X[,cv > cutoff])
+## }
+## fc.threshold <- list("RNA"= 1.5, "CLINICAL"=0.2, "GUT"=1.5, "METAB"=1.5,
+##                      "PROT" = 1.5, "CYTO" = 1)
+## 
+## # --> hmp_T2D$raw
+## data.filter <- imap(raw, ~{remove.low.cv(.x, cutoff = fc.threshold[[.y]])})
+## 
+## #2 scale
+## data <- lapply(data.filter, function(x) log(x+1))
+## # --> hmp_T2D$data
+## 
+## 
+## #3 modelling
+## lmms.func <- function(X){
+##     time <- rownames(X) %>% str_split("_") %>%
+##       map_chr(~.x[[2]]) %>% as.numeric()
+##     lmms.output <- lmms::lmmSpline(data = X, time = time,
+##                                    sampleID = rownames(X), deri = FALSE,
+##                                    basis = "p-spline", numCores = 4,
+##                                    keepModels = TRUE)
+##     return(lmms.output)
+## }
+## data.modelled <- lapply(data, function(x) lmms.func(x))
+## 
+## # 4 clustering
+## block.res <- block.pls(data.modelled, indY = 1, ncomp = 1)
+## getCluster.res <- getCluster(block.res)
+## # --> hmp_T2D$getCluster.res
+## 
+## 
+## # 5 signature
+## list.keepX <- list("CLINICAL" = 4, "CYTO" = 3, "GUT" = 10, "METAB" = 3,
+##                    "PROT" = 2,"RNA" = 34)
+## sparse.block.res  <- block.spls(data.modelled, indY = 1, ncomp = 1, scale =TRUE,
+##                                 keepX =list.keepX)
+## getCluster.sparse.res <- getCluster(sparse.block.res)
+## # --> hmp_T2D$getCluster.sparse.res
+
+
+## ----timeOmics_2--------------------------------------------------------------
+# clustering results
+cluster.info <- hmp_T2D$getCluster.res
+
+
+## ----graph.rna, warning=FALSE-------------------------------------------------
+cluster.info.RNA <- timeOmics::getCluster(cluster.info, user.block = "RNA")
+graph.rna <- get_grn(X = hmp_T2D$data$RNA, cluster = cluster.info.RNA)
+
+# to get info about the network
+get_graph_stats(graph.rna)
+
+
+## ----PROT_graph, warning=FALSE------------------------------------------------
+# Utility function to get the molecules by cluster
+get_list_mol_cluster <- function(cluster.info, user.block){
+  require(timeOmics)
+    tmp <- timeOmics::getCluster(cluster.info, user.block) 
+    res <- tmp %>% split(.$cluster) %>% 
+        lapply(function(x) x$molecule)
+    res[["All"]] <- tmp$molecule
+    return(res)
+}
+
+cluster.info.prot <- get_list_mol_cluster(cluster.info, user.block = 'PROT')
+graph.prot <-  get_interaction_from_database(X = cluster.info.prot, 
+                                             db = hmp_T2D$interaction.biogrid, 
+                                             type = "PROT", user.ego = TRUE)
+# get_graph_stats(graph.prot)
+
+
+## ----GUT_graph, eval = FALSE--------------------------------------------------
+## # not evaluated in this vignette
+## library(SpiecEasi)
+## 
+## get_sparcc_graph <- function(X, threshold = 0.3){
+##     res.sparcc <- sparcc(data = X)
+##     sparcc.graph <- abs(res.sparcc$Cor) >= threshold
+##     colnames(sparcc.graph) <-  colnames(X)
+##     rownames(sparcc.graph) <-  colnames(X)
+##     res.graph <- graph_from_adjacency_matrix(sparcc.graph,
+##                                              mode = "undirected") %>% simplify
+##     return(res.graph)
+## }
+## 
+## gut_list <- get_list_mol_cluster(cluster.info, user.block = 'GUT')
+## 
+## graph.gut <- list()
+## graph.gut[["All"]] <- get_sparcc_graph(hmp_T2D$raw$GUT, threshold = 0.3)
+## graph.gut[["1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>%
+##                                        dplyr::select(gut_list[["1"]]),
+##                                      threshold = 0.3)
+## graph.gut[["-1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>%
+##                                         dplyr::select(gut_list[["-1"]]),
+##                                       threshold = 0.3)
+## class(graph.gut) <- "list.igraph"
+
+
+## ----GUT----------------------------------------------------------------------
+graph.gut <- hmp_T2D$graph.gut
+# get_graph_stats(graph.gut)
+
+
+## ----CYTO_graph, warning=FALSE------------------------------------------------
+# CYTO -> from database (biogrid)
+cyto_list = get_list_mol_cluster(cluster.info = cluster.info, 
+                                 user.block = "CYTO")
+graph.cyto <-  get_interaction_from_database(X = cyto_list,
+                                             db = hmp_T2D$interaction.biogrid, 
+                                             type = "CYTO", user.ego = TRUE)
+# get_graph_stats(graph.cyto)
+
+# METAB -> inference
+cluster.info.metab <-  timeOmics::getCluster(X = cluster.info, 
+                                             user.block = "METAB")
+graph.metab <-  get_grn(X = hmp_T2D$data$METAB, 
+                        cluster = cluster.info.metab)
+# get_graph_stats(graph.metab)
+
+# CLINICAL -> inference
+cluster.info.clinical <- timeOmics::getCluster(X = cluster.info, 
+                                               user.block = 'CLINICAL')
+graph.clinical <- get_grn(X = hmp_T2D$data$CLINICAL,
+                          cluster = cluster.info.clinical)
+# get_graph_stats(graph.clinical)
+
+
+## ----merged_0-----------------------------------------------------------------
+full.graph <- combine_layers(graph1 = graph.rna, graph2 = graph.prot)
+full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.cyto)
+
+full.graph <- combine_layers(graph1 = full.graph,
+                             graph2 = hmp_T2D$interaction.TF)
+# get_graph_stats(full.graph)
+
+
+## ----merged_1_gut, warning=FALSE----------------------------------------------
+all_data <- reduce(hmp_T2D$data, cbind)
+
+# omic = gut
+gut_list <- get_list_mol_cluster(cluster.info, user.block = "GUT")
+omic_data <- lapply(gut_list, function(x)dplyr::select(hmp_T2D$data$GUT, x))
+
+# other data = "RNA", "PROT", "CYTO"
+other_data_list <- get_list_mol_cluster(cluster.info,
+                                        user.block = c("RNA", "PROT", "CYTO"))
+other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))
+
+# get interaction between gut data and other data
+interaction_df_gut <- get_interaction_from_correlation(X = omic_data,
+                                                       Y = other_data,
+                                                       threshold = 0.99)
+
+# and merge with full graph
+full.graph <- combine_layers(graph1 = full.graph,
+                             graph2 = graph.gut,
+                             interaction.df = interaction_df_gut$All)
+
+
+## ----merged_2_clinical, warning=FALSE-----------------------------------------
+# omic =  Clinical
+clinical_list <- get_list_mol_cluster(cluster.info, user.block = "CLINICAL")
+omic_data <- lapply(clinical_list, 
+                    function(x)dplyr::select(hmp_T2D$data$CLINICAL, x))
+
+# other data = "RNA", "PROT", "CYTO", "GUT"
+other_data_list <- get_list_mol_cluster(cluster.info,
+                                        user.block = c("RNA", "PROT", 
+                                                       "CYTO", "GUT"))
+other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))
+
+
+# get interaction between gut data and other data
+interaction_df_clinical <- get_interaction_from_correlation(X = omic_data
+                                                            , Y = other_data,
+                                                            threshold = 0.99)
+
+# and merge with full graph
+full.graph <- combine_layers(graph1 = full.graph,
+                             graph2 = graph.clinical, 
+                             interaction.df = interaction_df_clinical$All)
+
+
+## ----merged_3_metab, warning=FALSE--------------------------------------------
+# omic =  Metab
+metab_list <- get_list_mol_cluster(cluster.info, user.block = "METAB")
+omic_data <- lapply(metab_list, function(x)dplyr::select(hmp_T2D$data$METAB, x))
+
+# other data = "RNA", "PROT", "CYTO", "GUT", "CLINICAL"
+other_data_list <- get_list_mol_cluster(cluster.info,
+                                        user.block = c("RNA", "PROT", "CYTO", 
+                                                       "GUT", "CLINICAL"))
+other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))
+
+# get interaction between gut data and other data
+interaction_df_metab <- get_interaction_from_correlation(X = omic_data,
+                                                         Y = other_data, 
+                                                         threshold = 0.99)
+
+# and merge with full graph
+full.graph <- combine_layers(graph1 = full.graph, 
+                             graph2 = graph.metab, 
+                             interaction.df = interaction_df_metab$All)
+
+
+## -----------------------------------------------------------------------------
+# ORA by cluster/All
+mol_ora <- get_list_mol_cluster(cluster.info, 
+                                user.block = c("RNA", "PROT", "CYTO"))
+
+# get ORA interaction graph by cluster
+graph.go <- get_interaction_from_ORA(query = mol_ora,
+                                     sources = "GO",
+                                     organism = "hsapiens",
+                                     signif.value = TRUE)
+
+# merge
+full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.go)
+
+
+## -----------------------------------------------------------------------------
+# medlineRanker -> database
+medlineranker.res.df <- hmp_T2D$medlineranker.res.df %>% 
+  dplyr::select(Disease, symbol) %>% 
+  set_names(c("from", "to"))
+  
+mol_list <-  get_list_mol_cluster(cluster.info = cluster.info,
+                                  user.block = c("RNA", "PROT", "CYTO"))
+graph.medlineranker <-  get_interaction_from_database(X = mol_list,
+                                                      db = medlineranker.res.df, 
+                                                      type = "Disease",
+                                                      user.ego = TRUE)
+# get_graph_stats(graph.medlineranker)
+
+# merging
+full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.medlineranker)
+
+
+## -----------------------------------------------------------------------------
+# graph cleaning
+graph_cleaning <- function(X, cluster.info){
+    # no reusability
+    X <- igraph::simplify(X)
+    va <- vertex_attr(X)
+    viewed_mol <- c()
+    for(omic in unique(cluster.info$block)){
+        mol <- intersect(cluster.info %>% dplyr::filter(.$block == omic) %>%
+                           pull(molecule), V(X)$name)
+        viewed_mol <- c(viewed_mol, mol)
+        X <- set_vertex_attr(graph = X, 
+                             name = "type", 
+                             index = mol, 
+                             value = omic)
+        X <- set_vertex_attr(graph = X, 
+                             name = "mode",
+                             index = mol,
+                             value = "core")
+    }
+    # add medline ranker and go
+    mol <- intersect(map(graph.go, ~ as_data_frame(.x)$to) %>%
+                       unlist %>% unique(), V(X)$name) # only GO terms
+    viewed_mol <- c(viewed_mol, mol)
+    X <- set_vertex_attr(graph = X, name = "type", index = mol, value = "GO")
+    X <- set_vertex_attr(graph = X, name = "mode", 
+                         index = mol, value = "extended")
+    
+    mol <- intersect(as.character(medlineranker.res.df$from), V(X)$name)
+    viewed_mol <- c(viewed_mol, mol)
+    X <- set_vertex_attr(graph = X, name = "type",
+                         index = mol, value = "Disease")
+    X <- set_vertex_attr(graph = X, name = "mode",
+                         index = mol, value = "extended")
+    
+    other_mol <- setdiff(V(X), viewed_mol)
+    if(!is_empty(other_mol)){
+        X <- set_vertex_attr(graph = X, name = "mode",
+                             index = other_mol, value = "extended")
+    }
+    X <- set_vertex_attr(graph = X, name = "mode", 
+                         index = intersect(cluster.info$molecule, V(X)$name), 
+                         value = "core")
+    
+    # signature
+    mol <-  intersect(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule)
+    X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = TRUE)
+    mol <-  setdiff(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule)
+    X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = FALSE)
+    
+    return(X)
+}
+
+
+## -----------------------------------------------------------------------------
+FULL <- lapply(full.graph, function(x) graph_cleaning(x, cluster.info))
+get_graph_stats(FULL)
+
+
+## ----eval = FALSE-------------------------------------------------------------
+## # degree analysis
+## d <- degree(FULL$All)
+## hist(d)
+## d[max(d)]
+## 
+## # modularity # Warnings: can take several minutes
+## res.mod <- walktrap.community(FULL$All)
+## # ...
+## 
+## # modularity
+## sp <- shortest.paths(FULL$All)
+
+
+## -----------------------------------------------------------------------------
+# seeds = all vertices -> takes 5 minutes to run on regular computer
+# seeds <- V(FULL$All)$name
+# rwr_res <- random_walk_restart(FULL, seeds)
+
+# seed = some GO terms
+seeds <- head(V(FULL$All)$name[V(FULL$All)$type == "GO"])
+rwr_res <- random_walk_restart(FULL, seeds)
+
+
+## -----------------------------------------------------------------------------
+rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res, 
+                                                  attribute = "type", k = 15)
+
+# a summary plot function
+summary_plot_rwr_attributes(rwr_type_k15)
+summary_plot_rwr_attributes(rwr_type_k15$All)
+
+
+## -----------------------------------------------------------------------------
+rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res$All, 
+                                                  attribute = "cluster", k = 15)
+summary_plot_rwr_attributes(rwr_type_k15)
+
+
+## -----------------------------------------------------------------------------
+sub_res <- rwr_type_k15$`GO:0005737`
+sub <- plot_rwr_subnetwork(sub_res, legend = TRUE, plot = TRUE)
+
+
+## -----------------------------------------------------------------------------
+rwr_res <- random_walk_restart(FULL$All, seed = "ZNF263")
+
+# closest GO term
+rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", 
+                      value = "GO", top = 5)
+
+# closest Disease
+rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", 
+                      value = "Disease", top = 5)
+
+# closest nodes with an attribute "cluster" and the value "-1"
+rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "cluster",
+                      value = "-1", top = 5)
+
+
+## ----eval = FALSE-------------------------------------------------------------
+## seeds <- V(FULL$All)$name[V(FULL$All)$type %in% c("GO", "Disease")]
+
+
+## -----------------------------------------------------------------------------
+sessionInfo()
+