a b/vignettes/netOmics.R
1
## ----echo=FALSE---------------------------------------------------------------
2
knitr::opts_chunk$set(fig.align = "center")
3
4
5
## ----eval=FALSE---------------------------------------------------------------
6
## # install the package via BioConductor
7
## if (!requireNamespace("BiocManager", quietly = TRUE))
8
##     install.packages("BiocManager")
9
## 
10
## BiocManager::install("netOmics")
11
12
13
## ----eval=FALSE---------------------------------------------------------------
14
## # install the package via github
15
## library(devtools)
16
## install_github("abodein/netOmics")
17
18
19
## ----eval=TRUE, message=FALSE-------------------------------------------------
20
# load the package
21
library(netOmics)
22
23
24
## ----eval=TRUE, message=FALSE-------------------------------------------------
25
# usefull packages to build this vignette
26
library(timeOmics)
27
library(tidyverse)
28
library(igraph)
29
30
31
## ----load_data----------------------------------------------------------------
32
# load data
33
data("hmp_T2D")
34
35
36
## ----timeOmics_1, eval=FALSE--------------------------------------------------
37
## # not evaluated in this vignette
38
## 
39
## #1 filter fold-change
40
## remove.low.cv <- function(X, cutoff = 0.5){
41
##     # var.coef
42
##     cv <- unlist(lapply(as.data.frame(X),
43
##                     function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE))))
44
##     return(X[,cv > cutoff])
45
## }
46
## fc.threshold <- list("RNA"= 1.5, "CLINICAL"=0.2, "GUT"=1.5, "METAB"=1.5,
47
##                      "PROT" = 1.5, "CYTO" = 1)
48
## 
49
## # --> hmp_T2D$raw
50
## data.filter <- imap(raw, ~{remove.low.cv(.x, cutoff = fc.threshold[[.y]])})
51
## 
52
## #2 scale
53
## data <- lapply(data.filter, function(x) log(x+1))
54
## # --> hmp_T2D$data
55
## 
56
## 
57
## #3 modelling
58
## lmms.func <- function(X){
59
##     time <- rownames(X) %>% str_split("_") %>%
60
##       map_chr(~.x[[2]]) %>% as.numeric()
61
##     lmms.output <- lmms::lmmSpline(data = X, time = time,
62
##                                    sampleID = rownames(X), deri = FALSE,
63
##                                    basis = "p-spline", numCores = 4,
64
##                                    keepModels = TRUE)
65
##     return(lmms.output)
66
## }
67
## data.modelled <- lapply(data, function(x) lmms.func(x))
68
## 
69
## # 4 clustering
70
## block.res <- block.pls(data.modelled, indY = 1, ncomp = 1)
71
## getCluster.res <- getCluster(block.res)
72
## # --> hmp_T2D$getCluster.res
73
## 
74
## 
75
## # 5 signature
76
## list.keepX <- list("CLINICAL" = 4, "CYTO" = 3, "GUT" = 10, "METAB" = 3,
77
##                    "PROT" = 2,"RNA" = 34)
78
## sparse.block.res  <- block.spls(data.modelled, indY = 1, ncomp = 1, scale =TRUE,
79
##                                 keepX =list.keepX)
80
## getCluster.sparse.res <- getCluster(sparse.block.res)
81
## # --> hmp_T2D$getCluster.sparse.res
82
83
84
## ----timeOmics_2--------------------------------------------------------------
85
# clustering results
86
cluster.info <- hmp_T2D$getCluster.res
87
88
89
## ----graph.rna, warning=FALSE-------------------------------------------------
90
cluster.info.RNA <- timeOmics::getCluster(cluster.info, user.block = "RNA")
91
graph.rna <- get_grn(X = hmp_T2D$data$RNA, cluster = cluster.info.RNA)
92
93
# to get info about the network
94
get_graph_stats(graph.rna)
95
96
97
## ----PROT_graph, warning=FALSE------------------------------------------------
98
# Utility function to get the molecules by cluster
99
get_list_mol_cluster <- function(cluster.info, user.block){
100
  require(timeOmics)
101
    tmp <- timeOmics::getCluster(cluster.info, user.block) 
102
    res <- tmp %>% split(.$cluster) %>% 
103
        lapply(function(x) x$molecule)
104
    res[["All"]] <- tmp$molecule
105
    return(res)
106
}
107
108
cluster.info.prot <- get_list_mol_cluster(cluster.info, user.block = 'PROT')
109
graph.prot <-  get_interaction_from_database(X = cluster.info.prot, 
110
                                             db = hmp_T2D$interaction.biogrid, 
111
                                             type = "PROT", user.ego = TRUE)
112
# get_graph_stats(graph.prot)
113
114
115
## ----GUT_graph, eval = FALSE--------------------------------------------------
116
## # not evaluated in this vignette
117
## library(SpiecEasi)
118
## 
119
## get_sparcc_graph <- function(X, threshold = 0.3){
120
##     res.sparcc <- sparcc(data = X)
121
##     sparcc.graph <- abs(res.sparcc$Cor) >= threshold
122
##     colnames(sparcc.graph) <-  colnames(X)
123
##     rownames(sparcc.graph) <-  colnames(X)
124
##     res.graph <- graph_from_adjacency_matrix(sparcc.graph,
125
##                                              mode = "undirected") %>% simplify
126
##     return(res.graph)
127
## }
128
## 
129
## gut_list <- get_list_mol_cluster(cluster.info, user.block = 'GUT')
130
## 
131
## graph.gut <- list()
132
## graph.gut[["All"]] <- get_sparcc_graph(hmp_T2D$raw$GUT, threshold = 0.3)
133
## graph.gut[["1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>%
134
##                                        dplyr::select(gut_list[["1"]]),
135
##                                      threshold = 0.3)
136
## graph.gut[["-1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>%
137
##                                         dplyr::select(gut_list[["-1"]]),
138
##                                       threshold = 0.3)
139
## class(graph.gut) <- "list.igraph"
140
141
142
## ----GUT----------------------------------------------------------------------
143
graph.gut <- hmp_T2D$graph.gut
144
# get_graph_stats(graph.gut)
145
146
147
## ----CYTO_graph, warning=FALSE------------------------------------------------
148
# CYTO -> from database (biogrid)
149
cyto_list = get_list_mol_cluster(cluster.info = cluster.info, 
150
                                 user.block = "CYTO")
151
graph.cyto <-  get_interaction_from_database(X = cyto_list,
152
                                             db = hmp_T2D$interaction.biogrid, 
153
                                             type = "CYTO", user.ego = TRUE)
154
# get_graph_stats(graph.cyto)
155
156
# METAB -> inference
157
cluster.info.metab <-  timeOmics::getCluster(X = cluster.info, 
158
                                             user.block = "METAB")
159
graph.metab <-  get_grn(X = hmp_T2D$data$METAB, 
160
                        cluster = cluster.info.metab)
161
# get_graph_stats(graph.metab)
162
163
# CLINICAL -> inference
164
cluster.info.clinical <- timeOmics::getCluster(X = cluster.info, 
165
                                               user.block = 'CLINICAL')
166
graph.clinical <- get_grn(X = hmp_T2D$data$CLINICAL,
167
                          cluster = cluster.info.clinical)
168
# get_graph_stats(graph.clinical)
169
170
171
## ----merged_0-----------------------------------------------------------------
172
full.graph <- combine_layers(graph1 = graph.rna, graph2 = graph.prot)
173
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.cyto)
174
175
full.graph <- combine_layers(graph1 = full.graph,
176
                             graph2 = hmp_T2D$interaction.TF)
177
# get_graph_stats(full.graph)
178
179
180
## ----merged_1_gut, warning=FALSE----------------------------------------------
181
all_data <- reduce(hmp_T2D$data, cbind)
182
183
# omic = gut
184
gut_list <- get_list_mol_cluster(cluster.info, user.block = "GUT")
185
omic_data <- lapply(gut_list, function(x)dplyr::select(hmp_T2D$data$GUT, x))
186
187
# other data = "RNA", "PROT", "CYTO"
188
other_data_list <- get_list_mol_cluster(cluster.info,
189
                                        user.block = c("RNA", "PROT", "CYTO"))
190
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))
191
192
# get interaction between gut data and other data
193
interaction_df_gut <- get_interaction_from_correlation(X = omic_data,
194
                                                       Y = other_data,
195
                                                       threshold = 0.99)
196
197
# and merge with full graph
198
full.graph <- combine_layers(graph1 = full.graph,
199
                             graph2 = graph.gut,
200
                             interaction.df = interaction_df_gut$All)
201
202
203
## ----merged_2_clinical, warning=FALSE-----------------------------------------
204
# omic =  Clinical
205
clinical_list <- get_list_mol_cluster(cluster.info, user.block = "CLINICAL")
206
omic_data <- lapply(clinical_list, 
207
                    function(x)dplyr::select(hmp_T2D$data$CLINICAL, x))
208
209
# other data = "RNA", "PROT", "CYTO", "GUT"
210
other_data_list <- get_list_mol_cluster(cluster.info,
211
                                        user.block = c("RNA", "PROT", 
212
                                                       "CYTO", "GUT"))
213
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))
214
215
216
# get interaction between gut data and other data
217
interaction_df_clinical <- get_interaction_from_correlation(X = omic_data
218
                                                            , Y = other_data,
219
                                                            threshold = 0.99)
220
221
# and merge with full graph
222
full.graph <- combine_layers(graph1 = full.graph,
223
                             graph2 = graph.clinical, 
224
                             interaction.df = interaction_df_clinical$All)
225
226
227
## ----merged_3_metab, warning=FALSE--------------------------------------------
228
# omic =  Metab
229
metab_list <- get_list_mol_cluster(cluster.info, user.block = "METAB")
230
omic_data <- lapply(metab_list, function(x)dplyr::select(hmp_T2D$data$METAB, x))
231
232
# other data = "RNA", "PROT", "CYTO", "GUT", "CLINICAL"
233
other_data_list <- get_list_mol_cluster(cluster.info,
234
                                        user.block = c("RNA", "PROT", "CYTO", 
235
                                                       "GUT", "CLINICAL"))
236
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))
237
238
# get interaction between gut data and other data
239
interaction_df_metab <- get_interaction_from_correlation(X = omic_data,
240
                                                         Y = other_data, 
241
                                                         threshold = 0.99)
242
243
# and merge with full graph
244
full.graph <- combine_layers(graph1 = full.graph, 
245
                             graph2 = graph.metab, 
246
                             interaction.df = interaction_df_metab$All)
247
248
249
## -----------------------------------------------------------------------------
250
# ORA by cluster/All
251
mol_ora <- get_list_mol_cluster(cluster.info, 
252
                                user.block = c("RNA", "PROT", "CYTO"))
253
254
# get ORA interaction graph by cluster
255
graph.go <- get_interaction_from_ORA(query = mol_ora,
256
                                     sources = "GO",
257
                                     organism = "hsapiens",
258
                                     signif.value = TRUE)
259
260
# merge
261
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.go)
262
263
264
## -----------------------------------------------------------------------------
265
# medlineRanker -> database
266
medlineranker.res.df <- hmp_T2D$medlineranker.res.df %>% 
267
  dplyr::select(Disease, symbol) %>% 
268
  set_names(c("from", "to"))
269
  
270
mol_list <-  get_list_mol_cluster(cluster.info = cluster.info,
271
                                  user.block = c("RNA", "PROT", "CYTO"))
272
graph.medlineranker <-  get_interaction_from_database(X = mol_list,
273
                                                      db = medlineranker.res.df, 
274
                                                      type = "Disease",
275
                                                      user.ego = TRUE)
276
# get_graph_stats(graph.medlineranker)
277
278
# merging
279
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.medlineranker)
280
281
282
## -----------------------------------------------------------------------------
283
# graph cleaning
284
graph_cleaning <- function(X, cluster.info){
285
    # no reusability
286
    X <- igraph::simplify(X)
287
    va <- vertex_attr(X)
288
    viewed_mol <- c()
289
    for(omic in unique(cluster.info$block)){
290
        mol <- intersect(cluster.info %>% dplyr::filter(.$block == omic) %>%
291
                           pull(molecule), V(X)$name)
292
        viewed_mol <- c(viewed_mol, mol)
293
        X <- set_vertex_attr(graph = X, 
294
                             name = "type", 
295
                             index = mol, 
296
                             value = omic)
297
        X <- set_vertex_attr(graph = X, 
298
                             name = "mode",
299
                             index = mol,
300
                             value = "core")
301
    }
302
    # add medline ranker and go
303
    mol <- intersect(map(graph.go, ~ as_data_frame(.x)$to) %>%
304
                       unlist %>% unique(), V(X)$name) # only GO terms
305
    viewed_mol <- c(viewed_mol, mol)
306
    X <- set_vertex_attr(graph = X, name = "type", index = mol, value = "GO")
307
    X <- set_vertex_attr(graph = X, name = "mode", 
308
                         index = mol, value = "extended")
309
    
310
    mol <- intersect(as.character(medlineranker.res.df$from), V(X)$name)
311
    viewed_mol <- c(viewed_mol, mol)
312
    X <- set_vertex_attr(graph = X, name = "type",
313
                         index = mol, value = "Disease")
314
    X <- set_vertex_attr(graph = X, name = "mode",
315
                         index = mol, value = "extended")
316
    
317
    other_mol <- setdiff(V(X), viewed_mol)
318
    if(!is_empty(other_mol)){
319
        X <- set_vertex_attr(graph = X, name = "mode",
320
                             index = other_mol, value = "extended")
321
    }
322
    X <- set_vertex_attr(graph = X, name = "mode", 
323
                         index = intersect(cluster.info$molecule, V(X)$name), 
324
                         value = "core")
325
    
326
    # signature
327
    mol <-  intersect(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule)
328
    X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = TRUE)
329
    mol <-  setdiff(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule)
330
    X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = FALSE)
331
    
332
    return(X)
333
}
334
335
336
## -----------------------------------------------------------------------------
337
FULL <- lapply(full.graph, function(x) graph_cleaning(x, cluster.info))
338
get_graph_stats(FULL)
339
340
341
## ----eval = FALSE-------------------------------------------------------------
342
## # degree analysis
343
## d <- degree(FULL$All)
344
## hist(d)
345
## d[max(d)]
346
## 
347
## # modularity # Warnings: can take several minutes
348
## res.mod <- walktrap.community(FULL$All)
349
## # ...
350
## 
351
## # modularity
352
## sp <- shortest.paths(FULL$All)
353
354
355
## -----------------------------------------------------------------------------
356
# seeds = all vertices -> takes 5 minutes to run on regular computer
357
# seeds <- V(FULL$All)$name
358
# rwr_res <- random_walk_restart(FULL, seeds)
359
360
# seed = some GO terms
361
seeds <- head(V(FULL$All)$name[V(FULL$All)$type == "GO"])
362
rwr_res <- random_walk_restart(FULL, seeds)
363
364
365
## -----------------------------------------------------------------------------
366
rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res, 
367
                                                  attribute = "type", k = 15)
368
369
# a summary plot function
370
summary_plot_rwr_attributes(rwr_type_k15)
371
summary_plot_rwr_attributes(rwr_type_k15$All)
372
373
374
## -----------------------------------------------------------------------------
375
rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res$All, 
376
                                                  attribute = "cluster", k = 15)
377
summary_plot_rwr_attributes(rwr_type_k15)
378
379
380
## -----------------------------------------------------------------------------
381
sub_res <- rwr_type_k15$`GO:0005737`
382
sub <- plot_rwr_subnetwork(sub_res, legend = TRUE, plot = TRUE)
383
384
385
## -----------------------------------------------------------------------------
386
rwr_res <- random_walk_restart(FULL$All, seed = "ZNF263")
387
388
# closest GO term
389
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", 
390
                      value = "GO", top = 5)
391
392
# closest Disease
393
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", 
394
                      value = "Disease", top = 5)
395
396
# closest nodes with an attribute "cluster" and the value "-1"
397
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "cluster",
398
                      value = "-1", top = 5)
399
400
401
## ----eval = FALSE-------------------------------------------------------------
402
## seeds <- V(FULL$All)$name[V(FULL$All)$type %in% c("GO", "Disease")]
403
404
405
## -----------------------------------------------------------------------------
406
sessionInfo()
407