|
a |
|
b/vignettes/netOmics.R |
|
|
1 |
## ----echo=FALSE--------------------------------------------------------------- |
|
|
2 |
knitr::opts_chunk$set(fig.align = "center") |
|
|
3 |
|
|
|
4 |
|
|
|
5 |
## ----eval=FALSE--------------------------------------------------------------- |
|
|
6 |
## # install the package via BioConductor |
|
|
7 |
## if (!requireNamespace("BiocManager", quietly = TRUE)) |
|
|
8 |
## install.packages("BiocManager") |
|
|
9 |
## |
|
|
10 |
## BiocManager::install("netOmics") |
|
|
11 |
|
|
|
12 |
|
|
|
13 |
## ----eval=FALSE--------------------------------------------------------------- |
|
|
14 |
## # install the package via github |
|
|
15 |
## library(devtools) |
|
|
16 |
## install_github("abodein/netOmics") |
|
|
17 |
|
|
|
18 |
|
|
|
19 |
## ----eval=TRUE, message=FALSE------------------------------------------------- |
|
|
20 |
# load the package |
|
|
21 |
library(netOmics) |
|
|
22 |
|
|
|
23 |
|
|
|
24 |
## ----eval=TRUE, message=FALSE------------------------------------------------- |
|
|
25 |
# usefull packages to build this vignette |
|
|
26 |
library(timeOmics) |
|
|
27 |
library(tidyverse) |
|
|
28 |
library(igraph) |
|
|
29 |
|
|
|
30 |
|
|
|
31 |
## ----load_data---------------------------------------------------------------- |
|
|
32 |
# load data |
|
|
33 |
data("hmp_T2D") |
|
|
34 |
|
|
|
35 |
|
|
|
36 |
## ----timeOmics_1, eval=FALSE-------------------------------------------------- |
|
|
37 |
## # not evaluated in this vignette |
|
|
38 |
## |
|
|
39 |
## #1 filter fold-change |
|
|
40 |
## remove.low.cv <- function(X, cutoff = 0.5){ |
|
|
41 |
## # var.coef |
|
|
42 |
## cv <- unlist(lapply(as.data.frame(X), |
|
|
43 |
## function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE)))) |
|
|
44 |
## return(X[,cv > cutoff]) |
|
|
45 |
## } |
|
|
46 |
## fc.threshold <- list("RNA"= 1.5, "CLINICAL"=0.2, "GUT"=1.5, "METAB"=1.5, |
|
|
47 |
## "PROT" = 1.5, "CYTO" = 1) |
|
|
48 |
## |
|
|
49 |
## # --> hmp_T2D$raw |
|
|
50 |
## data.filter <- imap(raw, ~{remove.low.cv(.x, cutoff = fc.threshold[[.y]])}) |
|
|
51 |
## |
|
|
52 |
## #2 scale |
|
|
53 |
## data <- lapply(data.filter, function(x) log(x+1)) |
|
|
54 |
## # --> hmp_T2D$data |
|
|
55 |
## |
|
|
56 |
## |
|
|
57 |
## #3 modelling |
|
|
58 |
## lmms.func <- function(X){ |
|
|
59 |
## time <- rownames(X) %>% str_split("_") %>% |
|
|
60 |
## map_chr(~.x[[2]]) %>% as.numeric() |
|
|
61 |
## lmms.output <- lmms::lmmSpline(data = X, time = time, |
|
|
62 |
## sampleID = rownames(X), deri = FALSE, |
|
|
63 |
## basis = "p-spline", numCores = 4, |
|
|
64 |
## keepModels = TRUE) |
|
|
65 |
## return(lmms.output) |
|
|
66 |
## } |
|
|
67 |
## data.modelled <- lapply(data, function(x) lmms.func(x)) |
|
|
68 |
## |
|
|
69 |
## # 4 clustering |
|
|
70 |
## block.res <- block.pls(data.modelled, indY = 1, ncomp = 1) |
|
|
71 |
## getCluster.res <- getCluster(block.res) |
|
|
72 |
## # --> hmp_T2D$getCluster.res |
|
|
73 |
## |
|
|
74 |
## |
|
|
75 |
## # 5 signature |
|
|
76 |
## list.keepX <- list("CLINICAL" = 4, "CYTO" = 3, "GUT" = 10, "METAB" = 3, |
|
|
77 |
## "PROT" = 2,"RNA" = 34) |
|
|
78 |
## sparse.block.res <- block.spls(data.modelled, indY = 1, ncomp = 1, scale =TRUE, |
|
|
79 |
## keepX =list.keepX) |
|
|
80 |
## getCluster.sparse.res <- getCluster(sparse.block.res) |
|
|
81 |
## # --> hmp_T2D$getCluster.sparse.res |
|
|
82 |
|
|
|
83 |
|
|
|
84 |
## ----timeOmics_2-------------------------------------------------------------- |
|
|
85 |
# clustering results |
|
|
86 |
cluster.info <- hmp_T2D$getCluster.res |
|
|
87 |
|
|
|
88 |
|
|
|
89 |
## ----graph.rna, warning=FALSE------------------------------------------------- |
|
|
90 |
cluster.info.RNA <- timeOmics::getCluster(cluster.info, user.block = "RNA") |
|
|
91 |
graph.rna <- get_grn(X = hmp_T2D$data$RNA, cluster = cluster.info.RNA) |
|
|
92 |
|
|
|
93 |
# to get info about the network |
|
|
94 |
get_graph_stats(graph.rna) |
|
|
95 |
|
|
|
96 |
|
|
|
97 |
## ----PROT_graph, warning=FALSE------------------------------------------------ |
|
|
98 |
# Utility function to get the molecules by cluster |
|
|
99 |
get_list_mol_cluster <- function(cluster.info, user.block){ |
|
|
100 |
require(timeOmics) |
|
|
101 |
tmp <- timeOmics::getCluster(cluster.info, user.block) |
|
|
102 |
res <- tmp %>% split(.$cluster) %>% |
|
|
103 |
lapply(function(x) x$molecule) |
|
|
104 |
res[["All"]] <- tmp$molecule |
|
|
105 |
return(res) |
|
|
106 |
} |
|
|
107 |
|
|
|
108 |
cluster.info.prot <- get_list_mol_cluster(cluster.info, user.block = 'PROT') |
|
|
109 |
graph.prot <- get_interaction_from_database(X = cluster.info.prot, |
|
|
110 |
db = hmp_T2D$interaction.biogrid, |
|
|
111 |
type = "PROT", user.ego = TRUE) |
|
|
112 |
# get_graph_stats(graph.prot) |
|
|
113 |
|
|
|
114 |
|
|
|
115 |
## ----GUT_graph, eval = FALSE-------------------------------------------------- |
|
|
116 |
## # not evaluated in this vignette |
|
|
117 |
## library(SpiecEasi) |
|
|
118 |
## |
|
|
119 |
## get_sparcc_graph <- function(X, threshold = 0.3){ |
|
|
120 |
## res.sparcc <- sparcc(data = X) |
|
|
121 |
## sparcc.graph <- abs(res.sparcc$Cor) >= threshold |
|
|
122 |
## colnames(sparcc.graph) <- colnames(X) |
|
|
123 |
## rownames(sparcc.graph) <- colnames(X) |
|
|
124 |
## res.graph <- graph_from_adjacency_matrix(sparcc.graph, |
|
|
125 |
## mode = "undirected") %>% simplify |
|
|
126 |
## return(res.graph) |
|
|
127 |
## } |
|
|
128 |
## |
|
|
129 |
## gut_list <- get_list_mol_cluster(cluster.info, user.block = 'GUT') |
|
|
130 |
## |
|
|
131 |
## graph.gut <- list() |
|
|
132 |
## graph.gut[["All"]] <- get_sparcc_graph(hmp_T2D$raw$GUT, threshold = 0.3) |
|
|
133 |
## graph.gut[["1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>% |
|
|
134 |
## dplyr::select(gut_list[["1"]]), |
|
|
135 |
## threshold = 0.3) |
|
|
136 |
## graph.gut[["-1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>% |
|
|
137 |
## dplyr::select(gut_list[["-1"]]), |
|
|
138 |
## threshold = 0.3) |
|
|
139 |
## class(graph.gut) <- "list.igraph" |
|
|
140 |
|
|
|
141 |
|
|
|
142 |
## ----GUT---------------------------------------------------------------------- |
|
|
143 |
graph.gut <- hmp_T2D$graph.gut |
|
|
144 |
# get_graph_stats(graph.gut) |
|
|
145 |
|
|
|
146 |
|
|
|
147 |
## ----CYTO_graph, warning=FALSE------------------------------------------------ |
|
|
148 |
# CYTO -> from database (biogrid) |
|
|
149 |
cyto_list = get_list_mol_cluster(cluster.info = cluster.info, |
|
|
150 |
user.block = "CYTO") |
|
|
151 |
graph.cyto <- get_interaction_from_database(X = cyto_list, |
|
|
152 |
db = hmp_T2D$interaction.biogrid, |
|
|
153 |
type = "CYTO", user.ego = TRUE) |
|
|
154 |
# get_graph_stats(graph.cyto) |
|
|
155 |
|
|
|
156 |
# METAB -> inference |
|
|
157 |
cluster.info.metab <- timeOmics::getCluster(X = cluster.info, |
|
|
158 |
user.block = "METAB") |
|
|
159 |
graph.metab <- get_grn(X = hmp_T2D$data$METAB, |
|
|
160 |
cluster = cluster.info.metab) |
|
|
161 |
# get_graph_stats(graph.metab) |
|
|
162 |
|
|
|
163 |
# CLINICAL -> inference |
|
|
164 |
cluster.info.clinical <- timeOmics::getCluster(X = cluster.info, |
|
|
165 |
user.block = 'CLINICAL') |
|
|
166 |
graph.clinical <- get_grn(X = hmp_T2D$data$CLINICAL, |
|
|
167 |
cluster = cluster.info.clinical) |
|
|
168 |
# get_graph_stats(graph.clinical) |
|
|
169 |
|
|
|
170 |
|
|
|
171 |
## ----merged_0----------------------------------------------------------------- |
|
|
172 |
full.graph <- combine_layers(graph1 = graph.rna, graph2 = graph.prot) |
|
|
173 |
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.cyto) |
|
|
174 |
|
|
|
175 |
full.graph <- combine_layers(graph1 = full.graph, |
|
|
176 |
graph2 = hmp_T2D$interaction.TF) |
|
|
177 |
# get_graph_stats(full.graph) |
|
|
178 |
|
|
|
179 |
|
|
|
180 |
## ----merged_1_gut, warning=FALSE---------------------------------------------- |
|
|
181 |
all_data <- reduce(hmp_T2D$data, cbind) |
|
|
182 |
|
|
|
183 |
# omic = gut |
|
|
184 |
gut_list <- get_list_mol_cluster(cluster.info, user.block = "GUT") |
|
|
185 |
omic_data <- lapply(gut_list, function(x)dplyr::select(hmp_T2D$data$GUT, x)) |
|
|
186 |
|
|
|
187 |
# other data = "RNA", "PROT", "CYTO" |
|
|
188 |
other_data_list <- get_list_mol_cluster(cluster.info, |
|
|
189 |
user.block = c("RNA", "PROT", "CYTO")) |
|
|
190 |
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x)) |
|
|
191 |
|
|
|
192 |
# get interaction between gut data and other data |
|
|
193 |
interaction_df_gut <- get_interaction_from_correlation(X = omic_data, |
|
|
194 |
Y = other_data, |
|
|
195 |
threshold = 0.99) |
|
|
196 |
|
|
|
197 |
# and merge with full graph |
|
|
198 |
full.graph <- combine_layers(graph1 = full.graph, |
|
|
199 |
graph2 = graph.gut, |
|
|
200 |
interaction.df = interaction_df_gut$All) |
|
|
201 |
|
|
|
202 |
|
|
|
203 |
## ----merged_2_clinical, warning=FALSE----------------------------------------- |
|
|
204 |
# omic = Clinical |
|
|
205 |
clinical_list <- get_list_mol_cluster(cluster.info, user.block = "CLINICAL") |
|
|
206 |
omic_data <- lapply(clinical_list, |
|
|
207 |
function(x)dplyr::select(hmp_T2D$data$CLINICAL, x)) |
|
|
208 |
|
|
|
209 |
# other data = "RNA", "PROT", "CYTO", "GUT" |
|
|
210 |
other_data_list <- get_list_mol_cluster(cluster.info, |
|
|
211 |
user.block = c("RNA", "PROT", |
|
|
212 |
"CYTO", "GUT")) |
|
|
213 |
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x)) |
|
|
214 |
|
|
|
215 |
|
|
|
216 |
# get interaction between gut data and other data |
|
|
217 |
interaction_df_clinical <- get_interaction_from_correlation(X = omic_data |
|
|
218 |
, Y = other_data, |
|
|
219 |
threshold = 0.99) |
|
|
220 |
|
|
|
221 |
# and merge with full graph |
|
|
222 |
full.graph <- combine_layers(graph1 = full.graph, |
|
|
223 |
graph2 = graph.clinical, |
|
|
224 |
interaction.df = interaction_df_clinical$All) |
|
|
225 |
|
|
|
226 |
|
|
|
227 |
## ----merged_3_metab, warning=FALSE-------------------------------------------- |
|
|
228 |
# omic = Metab |
|
|
229 |
metab_list <- get_list_mol_cluster(cluster.info, user.block = "METAB") |
|
|
230 |
omic_data <- lapply(metab_list, function(x)dplyr::select(hmp_T2D$data$METAB, x)) |
|
|
231 |
|
|
|
232 |
# other data = "RNA", "PROT", "CYTO", "GUT", "CLINICAL" |
|
|
233 |
other_data_list <- get_list_mol_cluster(cluster.info, |
|
|
234 |
user.block = c("RNA", "PROT", "CYTO", |
|
|
235 |
"GUT", "CLINICAL")) |
|
|
236 |
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x)) |
|
|
237 |
|
|
|
238 |
# get interaction between gut data and other data |
|
|
239 |
interaction_df_metab <- get_interaction_from_correlation(X = omic_data, |
|
|
240 |
Y = other_data, |
|
|
241 |
threshold = 0.99) |
|
|
242 |
|
|
|
243 |
# and merge with full graph |
|
|
244 |
full.graph <- combine_layers(graph1 = full.graph, |
|
|
245 |
graph2 = graph.metab, |
|
|
246 |
interaction.df = interaction_df_metab$All) |
|
|
247 |
|
|
|
248 |
|
|
|
249 |
## ----------------------------------------------------------------------------- |
|
|
250 |
# ORA by cluster/All |
|
|
251 |
mol_ora <- get_list_mol_cluster(cluster.info, |
|
|
252 |
user.block = c("RNA", "PROT", "CYTO")) |
|
|
253 |
|
|
|
254 |
# get ORA interaction graph by cluster |
|
|
255 |
graph.go <- get_interaction_from_ORA(query = mol_ora, |
|
|
256 |
sources = "GO", |
|
|
257 |
organism = "hsapiens", |
|
|
258 |
signif.value = TRUE) |
|
|
259 |
|
|
|
260 |
# merge |
|
|
261 |
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.go) |
|
|
262 |
|
|
|
263 |
|
|
|
264 |
## ----------------------------------------------------------------------------- |
|
|
265 |
# medlineRanker -> database |
|
|
266 |
medlineranker.res.df <- hmp_T2D$medlineranker.res.df %>% |
|
|
267 |
dplyr::select(Disease, symbol) %>% |
|
|
268 |
set_names(c("from", "to")) |
|
|
269 |
|
|
|
270 |
mol_list <- get_list_mol_cluster(cluster.info = cluster.info, |
|
|
271 |
user.block = c("RNA", "PROT", "CYTO")) |
|
|
272 |
graph.medlineranker <- get_interaction_from_database(X = mol_list, |
|
|
273 |
db = medlineranker.res.df, |
|
|
274 |
type = "Disease", |
|
|
275 |
user.ego = TRUE) |
|
|
276 |
# get_graph_stats(graph.medlineranker) |
|
|
277 |
|
|
|
278 |
# merging |
|
|
279 |
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.medlineranker) |
|
|
280 |
|
|
|
281 |
|
|
|
282 |
## ----------------------------------------------------------------------------- |
|
|
283 |
# graph cleaning |
|
|
284 |
graph_cleaning <- function(X, cluster.info){ |
|
|
285 |
# no reusability |
|
|
286 |
X <- igraph::simplify(X) |
|
|
287 |
va <- vertex_attr(X) |
|
|
288 |
viewed_mol <- c() |
|
|
289 |
for(omic in unique(cluster.info$block)){ |
|
|
290 |
mol <- intersect(cluster.info %>% dplyr::filter(.$block == omic) %>% |
|
|
291 |
pull(molecule), V(X)$name) |
|
|
292 |
viewed_mol <- c(viewed_mol, mol) |
|
|
293 |
X <- set_vertex_attr(graph = X, |
|
|
294 |
name = "type", |
|
|
295 |
index = mol, |
|
|
296 |
value = omic) |
|
|
297 |
X <- set_vertex_attr(graph = X, |
|
|
298 |
name = "mode", |
|
|
299 |
index = mol, |
|
|
300 |
value = "core") |
|
|
301 |
} |
|
|
302 |
# add medline ranker and go |
|
|
303 |
mol <- intersect(map(graph.go, ~ as_data_frame(.x)$to) %>% |
|
|
304 |
unlist %>% unique(), V(X)$name) # only GO terms |
|
|
305 |
viewed_mol <- c(viewed_mol, mol) |
|
|
306 |
X <- set_vertex_attr(graph = X, name = "type", index = mol, value = "GO") |
|
|
307 |
X <- set_vertex_attr(graph = X, name = "mode", |
|
|
308 |
index = mol, value = "extended") |
|
|
309 |
|
|
|
310 |
mol <- intersect(as.character(medlineranker.res.df$from), V(X)$name) |
|
|
311 |
viewed_mol <- c(viewed_mol, mol) |
|
|
312 |
X <- set_vertex_attr(graph = X, name = "type", |
|
|
313 |
index = mol, value = "Disease") |
|
|
314 |
X <- set_vertex_attr(graph = X, name = "mode", |
|
|
315 |
index = mol, value = "extended") |
|
|
316 |
|
|
|
317 |
other_mol <- setdiff(V(X), viewed_mol) |
|
|
318 |
if(!is_empty(other_mol)){ |
|
|
319 |
X <- set_vertex_attr(graph = X, name = "mode", |
|
|
320 |
index = other_mol, value = "extended") |
|
|
321 |
} |
|
|
322 |
X <- set_vertex_attr(graph = X, name = "mode", |
|
|
323 |
index = intersect(cluster.info$molecule, V(X)$name), |
|
|
324 |
value = "core") |
|
|
325 |
|
|
|
326 |
# signature |
|
|
327 |
mol <- intersect(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule) |
|
|
328 |
X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = TRUE) |
|
|
329 |
mol <- setdiff(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule) |
|
|
330 |
X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = FALSE) |
|
|
331 |
|
|
|
332 |
return(X) |
|
|
333 |
} |
|
|
334 |
|
|
|
335 |
|
|
|
336 |
## ----------------------------------------------------------------------------- |
|
|
337 |
FULL <- lapply(full.graph, function(x) graph_cleaning(x, cluster.info)) |
|
|
338 |
get_graph_stats(FULL) |
|
|
339 |
|
|
|
340 |
|
|
|
341 |
## ----eval = FALSE------------------------------------------------------------- |
|
|
342 |
## # degree analysis |
|
|
343 |
## d <- degree(FULL$All) |
|
|
344 |
## hist(d) |
|
|
345 |
## d[max(d)] |
|
|
346 |
## |
|
|
347 |
## # modularity # Warnings: can take several minutes |
|
|
348 |
## res.mod <- walktrap.community(FULL$All) |
|
|
349 |
## # ... |
|
|
350 |
## |
|
|
351 |
## # modularity |
|
|
352 |
## sp <- shortest.paths(FULL$All) |
|
|
353 |
|
|
|
354 |
|
|
|
355 |
## ----------------------------------------------------------------------------- |
|
|
356 |
# seeds = all vertices -> takes 5 minutes to run on regular computer |
|
|
357 |
# seeds <- V(FULL$All)$name |
|
|
358 |
# rwr_res <- random_walk_restart(FULL, seeds) |
|
|
359 |
|
|
|
360 |
# seed = some GO terms |
|
|
361 |
seeds <- head(V(FULL$All)$name[V(FULL$All)$type == "GO"]) |
|
|
362 |
rwr_res <- random_walk_restart(FULL, seeds) |
|
|
363 |
|
|
|
364 |
|
|
|
365 |
## ----------------------------------------------------------------------------- |
|
|
366 |
rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res, |
|
|
367 |
attribute = "type", k = 15) |
|
|
368 |
|
|
|
369 |
# a summary plot function |
|
|
370 |
summary_plot_rwr_attributes(rwr_type_k15) |
|
|
371 |
summary_plot_rwr_attributes(rwr_type_k15$All) |
|
|
372 |
|
|
|
373 |
|
|
|
374 |
## ----------------------------------------------------------------------------- |
|
|
375 |
rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res$All, |
|
|
376 |
attribute = "cluster", k = 15) |
|
|
377 |
summary_plot_rwr_attributes(rwr_type_k15) |
|
|
378 |
|
|
|
379 |
|
|
|
380 |
## ----------------------------------------------------------------------------- |
|
|
381 |
sub_res <- rwr_type_k15$`GO:0005737` |
|
|
382 |
sub <- plot_rwr_subnetwork(sub_res, legend = TRUE, plot = TRUE) |
|
|
383 |
|
|
|
384 |
|
|
|
385 |
## ----------------------------------------------------------------------------- |
|
|
386 |
rwr_res <- random_walk_restart(FULL$All, seed = "ZNF263") |
|
|
387 |
|
|
|
388 |
# closest GO term |
|
|
389 |
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", |
|
|
390 |
value = "GO", top = 5) |
|
|
391 |
|
|
|
392 |
# closest Disease |
|
|
393 |
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", |
|
|
394 |
value = "Disease", top = 5) |
|
|
395 |
|
|
|
396 |
# closest nodes with an attribute "cluster" and the value "-1" |
|
|
397 |
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "cluster", |
|
|
398 |
value = "-1", top = 5) |
|
|
399 |
|
|
|
400 |
|
|
|
401 |
## ----eval = FALSE------------------------------------------------------------- |
|
|
402 |
## seeds <- V(FULL$All)$name[V(FULL$All)$type %in% c("GO", "Disease")] |
|
|
403 |
|
|
|
404 |
|
|
|
405 |
## ----------------------------------------------------------------------------- |
|
|
406 |
sessionInfo() |
|
|
407 |
|