Diff of /data-raw/hmp_T2D_raw.R [000000] .. [73f552]

Switch to unified view

a b/data-raw/hmp_T2D_raw.R
1
2
library(tidyverse)
3
library(timeOmics)
4
library(lubridate)
5
library(lmms)
6
7
rm(list=ls())
8
9
# RData from:
10
# Sailani, M. R., Metwally, A. A., Zhou, W., Rose, S. M. S. F., Ahadi, S., Contrepois, K., ... & Snyder, M. P. (2020). 
11
# Deep longitudinal multiomics profiling reveals two biological seasonal patterns in California. Nature communications, 11(1), 1-12.
12
load("/home/antoine/Documents/timeomics_analysis/HMP_seasoning/Multi_Omics_Seasonal.RData")
13
14
# 0. DATA CLEANING
15
16
Gut_annotation_colData <- Gut_annotation_colData %>%
17
    mutate(YMD = lubridate::dmy(IRIS)) %>%
18
    mutate(Date = IRIS) %>%
19
    mutate(Time = yday(YMD)) %>%
20
    mutate(omics = "Gut") %>% dplyr::select(-Date, -BMI, -IRIS)
21
22
list_lab <- list("RNA" = RNA_annotation_colData,
23
                 "Metabo" = Metabolomics_annotation_colData,
24
                 #"Gut" =Gut_annotation_colData,
25
                 "Clinical" = Clinical_labs_annotation_colData)
26
27
list_lab_df <- imap_dfr(list_lab, ~{.x %>% 
28
        mutate("omics" = .y) %>% 
29
        mutate(YMD = lubridate::ymd(as.Date(Date))) %>% 
30
        dplyr::select(-Date)
31
})
32
33
IRIS_BMI <- list_lab_df[c(1:3)] %>% unique() 
34
IRIS_only <- IRIS_BMI %>% dplyr::select(-BMI) %>% unique %>% filter(!is.na(IRIS), !is.na(SubjectID)) %>%
35
    unique
36
IRIS_1 <- IRIS_only %>% group_by(SubjectID) %>%
37
    dplyr::summarise(N = n()) %>%
38
    filter(N == 1) %>% pull(SubjectID) %>% as.character()
39
IRIS_only <- IRIS_only %>% filter(SubjectID %in% IRIS_1)
40
41
# 1. DATA PREPARATION
42
# GUT
43
###########################
44
GUT_sample <- Gut_annotation_colData %>% 
45
    mutate(Year = ifelse(year(YMD) < 2000, year(YMD) +2000, year(YMD))) %>%
46
    left_join(IRIS_only) %>%
47
    mutate(SampleID = paste0(SubjectID, "_", Time, "_",  Year, "_", IRIS, "_", rownames(.)))
48
49
GUT <- gut_df_Data
50
rownames(GUT) <- GUT_sample$SampleID
51
52
# CLINICAL
53
###########################
54
CLINICAL_sample <- Clinical_labs_annotation_colData %>% 
55
    mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>%
56
    left_join(IRIS_only) %>%
57
    mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) 
58
59
CLINICAL <- clinical_labs_Data
60
rownames(CLINICAL) <- CLINICAL_sample$SampleID
61
index.na <- CLINICAL %>% lapply(function(x) is.na(x) %>% sum) %>% unlist
62
CLINICAL <- CLINICAL[index.na<=11] %>% na.omit
63
64
# RNA
65
###########################
66
RNA_sample <- RNA_annotation_colData %>% 
67
    mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>%
68
    left_join(IRIS_only) %>%
69
    mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) 
70
71
index.NA <- (!is.na(RNA_annotation_colData$Time) & !is.na(RNA_annotation_colData$Time))
72
RNA_sample <- RNA_sample[index.NA,]
73
RNA <- RNA_df_Data[index.NA,]
74
rownames(RNA) <- RNA_sample$SampleID
75
76
# NOSE
77
###########################
78
Nasal_sample <- Nasal_annotation_colData %>% 
79
    mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>%
80
    left_join(IRIS_only) %>%
81
    mutate(SampleID = paste0(SubjectID, "_", Time, "_",  Year, "_", IRIS, "_", rownames(.)))
82
83
NASAL <- Nasal_df_Data
84
rownames(NASAL) <- Nasal_sample$SampleID
85
86
# PROTEIN
87
###########################
88
PROT_sample <- Proteomics_annotation_colData %>% 
89
    mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>%
90
    left_join(IRIS_only) %>%
91
    mutate(SampleID = paste0(SubjectID, "_", Time, "_",  Year, "_", IRIS, "_", rownames(.)))
92
93
PROT <- Proteomics_df_Data
94
rownames(PROT) <- PROT_sample$SampleID
95
96
# METABOLITE
97
###########################
98
METAB_sample <- Metabolomics_annotation_colData %>% 
99
    mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>%
100
    left_join(IRIS_only) %>%
101
    mutate(SampleID = paste0(SubjectID, "_", Time, "_",  Year, "_", IRIS, "_", rownames(.)))
102
103
METAB <- Metabolomics_df_Data
104
rownames(METAB) <- METAB_sample$SampleID
105
106
# CYTOKINE
107
###########################
108
CYTO_sample <- Cytokines_annotation_colData %>% 
109
    mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>%
110
    left_join(IRIS_only) %>%
111
    mutate(SampleID = paste0(SubjectID, "_", Time, "_",  Year, "_", IRIS, "_", rownames(.)))
112
113
CYTO <- Cytokines_df_Data
114
rownames(CYTO) <- CYTO_sample$SampleID
115
116
############################
117
118
# DATA: only RNA/CLINICAL/GUT/METAB
119
# split by IR/IS
120
DATA <- list("RNA.IR" = RNA[str_split(rownames(RNA),"_") %>% map_chr(~.x[[4]]) == "IR",],
121
             "GUT.IR" = GUT[str_split(rownames(GUT),"_") %>% map_chr(~.x[[4]]) == "IR",],
122
             
123
             "CLINICAL.IR" = CLINICAL[str_split(rownames(CLINICAL),"_") %>% map_chr(~.x[[4]]) == "IR",],
124
             "RNA.IS" = RNA[str_split(rownames(RNA),"_") %>% map_chr(~.x[[4]]) == "IS",],
125
             
126
             "GUT.IS" = GUT[str_split(rownames(GUT),"_") %>% map_chr(~.x[[4]]) == "IS",],
127
             "CLINICAL.IS" = CLINICAL[str_split(rownames(CLINICAL),"_") %>% map_chr(~.x[[4]]) == "IS",],
128
             
129
             "METAB.IR" = METAB[str_split(rownames(METAB),"_") %>% map_chr(~.x[[4]]) == "IR",],
130
             "METAB.IS" = METAB[str_split(rownames(METAB),"_") %>% map_chr(~.x[[4]]) == "IS",],
131
             
132
             "PROT.IS" = PROT[str_split(rownames(PROT),"_") %>% map_chr(~.x[[4]]) == "IS",],
133
             "PROT.IR" = PROT[str_split(rownames(PROT),"_") %>% map_chr(~.x[[4]]) == "IR",],
134
             
135
             "CYTO.IS" = CYTO[str_split(rownames(CYTO),"_") %>% map_chr(~.x[[4]]) == "IS",],
136
             "CYTO.IR" = CYTO[str_split(rownames(CYTO),"_") %>% map_chr(~.x[[4]]) == "IR",]
137
) 
138
139
COMBINED <- list("RNA" = RNA, CLINICAL = CLINICAL, GUT = GUT, METAB = METAB, PROT = PROT, CYTO = CYTO)
140
save(DATA, COMBINED, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/RAW_DATA.RDA")
141
############################################################
142
143
stat_raw_data <- lapply(list(RNA=RNA, GUT=GUT, METAB=METAB, CLINICAL=CLINICAL, PROT = PROT, CYTO = CYTO), dim) %>%
144
    as.data.frame() %>% t %>% as.data.frame() %>%
145
    setNames(c("sample", "feature"))
146
lapply(list(RNA=RNA, GUT=GUT, METAB=METAB, CLINICAL=CLINICAL, PROT = PROT, CYTO = CYTO), function(x){
147
    rownames(x) %>% str_remove("_.*") %>% unique %>% length()}) %>% 
148
    as.data.frame() %>%  t %>% as.data.frame() %>% setNames("uniqueID") %>%
149
    rownames_to_column("omic") %>%
150
    left_join(stat_raw_data %>% rownames_to_column("omic")) %>% column_to_rownames("omic") %>% t %>%
151
    as.data.frame() %>% knitr::kable()
152
153
# 2. DATA FILTERING
154
155
# 1. coef. of var
156
cv.data <- lapply(DATA, function(X){
157
    unlist(lapply(as.data.frame(X), 
158
                  function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE))))
159
})
160
161
fc.data <- list("RNA.IR"= 1.5, "RNA.IS"=1.5,
162
                "CLINICAL.IR"=0, "CLINICAL.IS"=0,
163
                "GUT.IR"=1.5, "GUT.IS"=1.5,
164
                "METAB.IR"=1.5 , "METAB.IS"=1.5, 
165
                "PROT.IR"=1.5 , "PROT.IS"=1.5, 
166
                "CYTO.IR" = 1.5, "CYTO.IS"=1.5)
167
168
par(mfrow = c(6,2))
169
#for(i in c("RNA.IR","CLINICAL.IR", "GUT.IR", "METAB.IR", "RNA.IS","CLINICAL.IS", "GUT.IS", "METAB.IS", "PROT.IR", "PROT.IS", "CYTO.IR", "CYTO.IS")){
170
for(i in c("RNA.IR","RNA.IS", "CLINICAL.IR", "CLINICAL.IS", "GUT.IR","GUT.IS", "METAB.IS", "METAB.IR", "PROT.IR", "PROT.IS", "CYTO.IR", "CYTO.IS")){
171
    
172
    hist(cv.data[[i]], breaks = 20, main =i)
173
    abline(v = fc.data[[i]], col = "red")
174
    legend("topright", legend = paste0("FC = ",fc.data[[i]]), col = "red", lty = 1) 
175
}
176
par(mfrow = c(1,1))
177
178
179
# 2. Remove low cv features
180
remove.low.cv <- function(X, cutoff = 0.5){
181
    # var.coef
182
    cv <- unlist(lapply(as.data.frame(X), 
183
                        function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE))))
184
    return(X[,cv > cutoff])
185
}
186
187
DATA.filtered <- list("RNA.IR" = remove.low.cv(DATA$RNA.IR, 1.5),
188
                      "RNA.IS" = remove.low.cv(DATA$RNA.IS, 1.5),
189
                      "GUT.IR" = remove.low.cv(DATA$GUT.IR, 1.5),
190
                      "GUT.IS" = remove.low.cv(DATA$GUT.IS, 1.5),
191
                      # "CLINICAL.IR" = remove.low.cv(DATA$CLINICAL.IR, 0),
192
                      # "CLINICAL.IS" = remove.low.cv(DATA$CLINICAL.IS, 0),
193
                      "CLINICAL.IR" = DATA$CLINICAL.IR,
194
                      "CLINICAL.IS" = DATA$CLINICAL.IS,
195
                      
196
                      "METAB.IR" = remove.low.cv(DATA$METAB.IR, 1.5),
197
                      "METAB.IS" = remove.low.cv(DATA$METAB.IS, 1.5), 
198
                      "PROT.IS" = remove.low.cv(DATA$PROT.IS, 1.5),
199
                      "PROT.IR" = remove.low.cv(DATA$PROT.IR, 1.5),
200
                      "CYTO.IS" = remove.low.cv(DATA$CYTO.IS, 1),
201
                      "CYTO.IR" = remove.low.cv(DATA$CYTO.IR, 1))
202
lapply(DATA.filtered, dim)
203
204
# 3. scale filtered value (log, scale, CLR)
205
206
# scale for OTU
207
norm_OTU <- function(DF, AR = F){
208
    DF <- DF + 0.0001
209
    
210
    data.TSS.clr = mixOmics::logratio.transfo(DF, logratio = 'CLR')
211
    
212
    # reconstrcuct dataframe
213
    data.good <- as.data.frame(matrix(ncol = ncol(data.TSS.clr), 
214
                                      nrow = nrow( data.TSS.clr)))
215
    rownames(data.good) <- rownames(data.TSS.clr)
216
    colnames(data.good) <- colnames(data.TSS.clr)
217
    for( i in c(1:nrow(data.TSS.clr))){
218
        for( j in c(1:ncol(data.TSS.clr))){
219
            data.good[i,j] <- data.TSS.clr[i,j]
220
        }
221
    }
222
    return(data.good)
223
}
224
225
226
DATA.filtered.scale <- list(
227
    "RNA.IR" = log(DATA.filtered$RNA.IR + 1) %>% scale,
228
    "RNA.IS" = log(DATA.filtered$RNA.IS + 1) %>% scale,
229
    
230
    "CLINICAL.IR" = log(DATA.filtered$CLINICAL.IR +1)%>% scale,
231
    "CLINICAL.IS" = log(DATA.filtered$CLINICAL.IS +1)%>% scale,
232
    
233
    "GUT.IR" = norm_OTU(DATA.filtered$GUT.IR),
234
    "GUT.IS" = norm_OTU(DATA.filtered$GUT.IS),
235
    
236
    "METAB.IR" = log(DATA.filtered$METAB.IR +1)%>% scale,
237
    "METAB.IS" = log(DATA.filtered$METAB.IS +1)%>% scale,
238
    
239
    # "PROT.IR" = log(DATA.filtered$PROT.IR +1)%>% scale,
240
    # "PROT.IS" = log(DATA.filtered$PROT.IS +1)%>% scale
241
    
242
    "PROT.IR" = DATA.filtered$PROT.IR,  # already scale
243
    "PROT.IS" = DATA.filtered$PROT.IS,
244
    
245
    "CYTO.IR" = log(DATA.filtered$CYTO.IR +1),  
246
    "CYTO.IS" = log(DATA.filtered$CYTO.IS +1)
247
    
248
)
249
250
lapply(DATA.filtered, dim) %>%
251
    as.data.frame() %>% t %>% as.data.frame() %>%
252
    setNames(c("sample", "feature")) %>%
253
    rownames_to_column("OMIC") %>%
254
    mutate(IRIS = str_extract(OMIC,"..$"), OMIC = str_remove(OMIC, "...$"))  %>%
255
    gather(meta, value, -c(OMIC, IRIS)) %>%
256
    spread(OMIC, value) %>% arrange(IRIS) %>%
257
    dplyr::select(IRIS, meta, RNA, GUT, METAB, CLINICAL, PROT, CYTO)
258
259
save(DATA.filtered.scale, DATA.filtered, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/DATA_FILTERED.RDA")
260
############################################################
261
262
fc.data.combined <- list("RNA"= 1.5, 
263
                         "CLINICAL"=0.2,
264
                         "GUT"=1.5,
265
                         "METAB"=1.5,
266
                         "PROT" = 1.5,
267
                         "CYTO" = 1)
268
cv.data.combined <- lapply(COMBINED, function(X){
269
    unlist(lapply(as.data.frame(X), 
270
                  function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE))))
271
})
272
fc.color <- list("RNA"= color.mixo(4), 
273
                 "CLINICAL"=color.mixo(1),
274
                 "GUT"=color.mixo(2),
275
                 "METAB"=color.mixo(3),
276
                 "PROT"=color.mixo(5),
277
                 "CYTO" = color.mixo(6))
278
279
par(mfrow = c(3,2))
280
for(i in c("RNA","CLINICAL", "GUT", "METAB", "PROT", "CYTO")){
281
    hist(cv.data.combined[[i]], breaks = 20, main =i, xlab = paste0("Var. Coef. (", i, ")"), 
282
         col = fc.color[[i]])
283
    abline(v = fc.data.combined[[i]], col = "red")
284
    legend("topright", legend = paste0("CV = ",fc.data.combined[[i]]), col = "red", lty = 1) 
285
}
286
par(mfrow = c(1,1))
287
288
# 3. MODELLING
289
290
lmms.func <- function(X, mode = "p-spline"){
291
    time <- rownames(X) %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric()
292
    lmms.output <- lmms::lmmSpline(data = X, time = time,
293
                                   sampleID = rownames(X), deri = FALSE,
294
                                   basis = mode, numCores = 4, 
295
                                   keepModels = TRUE)
296
    return(lmms.output)
297
}
298
299
# only one ID/Year
300
ID_u <- "ZLZNCLZ"
301
Year_u <- 2015
302
303
# ID_u <- "ZOZOW1T"
304
# Year_u <- 2015
305
306
307
tmp <-   imap_dfr(DATA.filtered.scale, ~{
308
    .x %>% as.data.frame() %>% rownames_to_column("sample") %>%
309
        gather(feature, value, -sample) %>%
310
        mutate(ID = str_split(sample, "_") %>% map_chr(~.x[[1]])) %>%
311
        mutate(year = str_split(sample, "_") %>% map_chr(~.x[[3]])) %>%
312
        mutate(OMIC = str_remove(.y, "...$")) %>% 
313
        mutate(IRIS = str_extract(.y, "..$"))
314
}) 
315
tmp %>%  dplyr::select(-feature, -value) %>% unique() %>% na.omit %>% 
316
    group_by(ID, year, OMIC, IRIS) %>% summarise(N = n()) %>% spread(OMIC, N) %>% 
317
    na.omit() %>% split(.$year)
318
tmp %>%  dplyr::select(-feature, -value) %>% unique() %>% na.omit %>% 
319
    group_by(ID, year, OMIC, IRIS) %>% summarise(N = n()) %>% spread(OMIC, N) %>% 
320
    filter(ID == ID_u) %>% dplyr::select(-IRIS) %>% na.omit
321
322
# just a filter to get only the selected ID/Year
323
DATA.GOOD <- imap_dfr(DATA.filtered.scale, ~{
324
    .x %>% as.data.frame() %>% rownames_to_column("sample") %>%
325
        gather(feature, value, -sample) %>%
326
        mutate(ID = str_split(sample, "_") %>% map_chr(~.x[[1]])) %>%
327
        mutate(year = str_split(sample, "_") %>% map_chr(~.x[[3]])) %>%
328
        mutate(OMIC = str_remove(.y, "...$")) %>%
329
        dplyr::filter(ID == ID_u) %>% 
330
        dplyr::filter(year == Year_u)
331
}) %>% split(.$OMIC) %>%
332
    purrr::map(~{
333
        .x %>% 
334
            dplyr::select(sample, feature, value) %>%
335
            spread(feature, value) %>% 
336
            column_to_rownames("sample")
337
    })
338
339
340
# only 1 year
341
MODELLED <- lapply(DATA.GOOD, function(x) lmms.func(x))
342
343
MODELLED %>% lapply(function(x)x@predSpline %>% dim) %>%
344
    as.data.frame() %>% t %>% as.data.frame() %>%
345
    setNames(c("sample", "feature")) %>% t %>%as.data.frame() %>%
346
    dplyr::select(RNA, GUT, METAB, CLINICAL, PROT)
347
348
MODELLED %>% imap_dfr(~.x@modelsUsed %>% table %>% as.data.frame  %>%
349
                          column_to_rownames(".") %>% t %>% as.data.frame %>% 
350
                          mutate(omic = .y)) %>% remove_rownames() %>%  
351
    column_to_rownames('omic') %>% t %>%
352
    as.data.frame() %>% dplyr::select(RNA, GUT, METAB, CLINICAL, PROT)
353
354
# 4. STRAIGHT LINE FILTERING
355
356
filterlmms.func <- function(modelled.data, lmms.output){
357
    time = modelled.data %>% rownames() %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric()
358
    #time = rownames(modelled.data) %>% as.numeric()
359
    filter.res <- lmms.filter.lines(data = modelled.data,
360
                                    lmms.obj = lmms.output, time = time,
361
                                    homoskedasticity.cutoff=0.05)$filtered
362
}
363
364
FILTER <- lapply(names(DATA.GOOD), function(x) filterlmms.func(modelled.data = DATA.GOOD[[x]], lmms.output = MODELLED[[x]]))
365
names(FILTER) <- names(MODELLED)
366
367
FILTER %>% lapply(dim) %>%
368
    as.data.frame() %>% t %>% as.data.frame() %>%
369
    setNames(c("sample", "feature")) %>%
370
    t %>%as.data.frame() %>%
371
    dplyr::select(RNA, GUT, METAB, CLINICAL)
372
373
FINAL.FILTER <- FILTER[c("CLINICAL", "GUT", "METAB", "RNA", "PROT")]
374
rownames(FINAL.FILTER[["GUT"]]) <- rownames(FINAL.FILTER[["RNA"]]) # change 86 par 85
375
376
DATA.LMMS <- lapply(MODELLED, function(x)x@predSpline %>% t %>% as.data.frame)
377
rownames(DATA.LMMS[["GUT"]]) <- rownames(DATA.LMMS[["RNA"]]) # change 86 par 85
378
379
save(FINAL.FILTER, MODELLED, DATA.GOOD, DATA.LMMS, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/LMMS.RDA")
380
381
lapply(DATA.LMMS, dim)
382
# 5. MULTI-OMICS CLUSTERING
383
384
block.res <- block.pls(DATA.LMMS, indY = 1, ncomp = 5)
385
getNcomp.res <- getNcomp(block.res, X = DATA.LMMS, indY = 1)
386
387
# block.res <- block.pls(FINAL.FILTER, indY = 1, ncomp = 3)
388
# getNcomp.res <- getNcomp(block.res, X = FINAL.FILTER, indY = 1)
389
390
plot(getNcomp.res)
391
392
# ncomp = 2
393
block.res <- block.pls(DATA.LMMS, indY = 1, ncomp = 1, scale =FALSE) 
394
395
# block.res <- block.pls(FINAL.FILTER, indY = 1, ncomp = 1, scale =FALSE) 
396
397
plotLong(object = block.res, title = "Block-PLS Clusters, scale = TRUE", legend = TRUE)
398
399
getCluster(block.res) %>% group_by(block, cluster) %>% summarise(N = n()) %>%
400
    spread(block, N) %>%
401
    dplyr::select(cluster, RNA, GUT, METAB, CLINICAL)
402
403
save(block.res, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/timeomics_res_block.rda")
404
405
406
# elagage
407
test.list.keepX <- list(
408
    "CLINICAL" = seq(2,8,by=1),
409
    "GUT" = seq(2,10,by=1),
410
    "METAB" = seq(2,9,by=1),
411
    "RNA" = seq(10,50,by=2)
412
)
413
414
tune.block.res <- tuneCluster.block.spls(X= FINAL.FILTER, indY = 1,
415
                                         test.list.keepX=test.list.keepX, 
416
                                         scale=FALSE, 
417
                                         mode = "canonical", ncomp = 1)
418
tune.block.res$choice.keepX 
419
final.block <- block.spls(FINAL.FILTER, indY = 1, ncomp = 1, scale =FALSE, 
420
                          keepX = tune.block.res$choice.keepX) 
421
plotLong(final.block, legend = TRUE)
422
423
getCluster(final.block) %>% group_by(block, cluster) %>% summarise(N = n()) %>%
424
    spread(block, N) %>%
425
    dplyr::select(cluster, RNA, GUT, METAB, CLINICAL)
426
427
library("openxlsx")
428
cluster_comp <- getCluster(final.block) %>% dplyr::select(molecule, block, cluster, comp, contribution) %>% 
429
    mutate(cluster = ifelse(cluster == -1, "Cluster 1", "Cluster 2")) %>%
430
    split(.$cluster)
431
write.xlsx(cluster_comp, file = "cluster_composition.xlsx")
432
433
434
# final data for netOmics package // shrink
435
#   DATA.GOOD -> ! individual, 7 timepoints but no modelisation
436
#   DATA RAW -> filter based on DATA.GOOD molecules (for OTU -> sparcc needs RAW)
437
# DATA$RNA.IS %>% rownames() %>% str_remove("_.*") %>% str_detect("ZLZNCLZ") %>% any()
438
# [1] TRUE
439
440
hmp_T2D <- list()
441
hmp_T2D$raw <- list() 
442
hmp_T2D$data <- list()
443
for(i in names(DATA.GOOD)){
444
    # hmp_diabetes$raw[[i]] <- DATA[[paste0(i, ".IS")]][str_detect(rownames(DATA[[paste0(i, ".IS")]]), "ZLZNCLZ"), colnames(DATA.GOOD[[i]])]
445
    hmp_T2D$raw[[i]] <- DATA[[paste0(i, ".IS")]][rownames(DATA.GOOD[[i]]), colnames(DATA.GOOD[[i]])]
446
    rownames(hmp_T2D$raw[[i]]) <- rownames(DATA.GOOD$RNA)
447
    hmp_T2D$raw[[i]] <- hmp_T2D$raw[[i]] %>% rownames_to_column("Rownames") %>% 
448
        mutate(Rownames = Rownames %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric()) %>% 
449
        arrange(Rownames) %>% column_to_rownames(var = "Rownames")
450
    
451
    #data
452
    hmp_T2D$data[[i]] <- DATA.GOOD[[i]]
453
    rownames(hmp_T2D$data[[i]]) <- rownames(hmp_T2D$raw[[i]])
454
}
455
456
hmp_T2D$data <- DATA.GOOD
457
lmms.func <- function(X){
458
    # time <- rownames(X) %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric()
459
    time <- rownames(X) %>% as.numeric()
460
    lmms.output <- lmms::lmmSpline(data = X, time = time,
461
                                   sampleID = rownames(X), deri = FALSE,
462
                                   basis = "p-spline", numCores = 4, 
463
                                   keepModels = TRUE)
464
    return(lmms.output)
465
}
466
467
MODELLED <- lapply(hmp_T2D$data, function(x) lmms.func(x))
468
469
MODELLED %>% imap_dfr(~.x@modelsUsed %>% table %>% as.data.frame  %>%
470
                          column_to_rownames(".") %>% t %>% as.data.frame %>% 
471
                          mutate(omic = .y)) %>% remove_rownames() %>%  
472
    column_to_rownames('omic') %>% t %>%
473
    as.data.frame()
474
475
data.MODELLED <-  lapply(MODELLED, function(x) x@predSpline %>% t)
476
rownames(data.MODELLED$GUT) <- rownames(data.MODELLED$RNA)
477
478
479
# .$timeOmics
480
# block.res.no.model  <- block.pls(hmp_T2D$data, indY = 1, ncomp = 5, scale =FALSE) 
481
# getNcomp.res <- getNcomp(block.res.no.model, X = hmp_T2D$data, indY = 1)
482
# plot(getNcomp.res)
483
block.res.no.model  <- block.pls(hmp_T2D$data, indY = 1, ncomp = 1, scale =TRUE) 
484
hmp_T2D$getCluster.res <- getCluster(block.res.no.model)
485
486
block.res.w.model  <- block.pls(data.MODELLED, indY = 1, ncomp = 5, scale =TRUE) 
487
getNcomp.res <- getNcomp(block.res.w.model, X = data.MODELLED, indY = 1)
488
block.res.w.model  <- block.pls(data.MODELLED, indY = 1, ncomp = 1, scale =TRUE) 
489
490
hmp_T2D$getCluster.res <- getCluster(block.res.w.model)
491
492
# .$sparse
493
test.list.keepX <- list(
494
    "CLINICAL" = seq(2,39,by=2),
495
    "CYTO" = seq(2,10,b=2),
496
    "GUT" = seq(2,50,by=2),
497
    "METAB" = seq(2,50,by=2),
498
    "PROT" = seq(2,30,b=2),
499
    "RNA" = seq(10,100,by=3))
500
501
# tune.block.res <- tuneCluster.block.spls(X= hmp_T2D$data, indY = 1,
502
#                                          test.list.keepX=test.list.keepX, 
503
#                                          scale=FALSE, 
504
#                                          mode = "canonical", ncomp = 1)
505
# too long
506
507
list.keepX <- list(
508
    "CLINICAL" = 4,
509
    "CYTO" = 3,
510
    "GUT" = 10,
511
    "METAB" = 3,
512
    "PROT" = 2,
513
    "RNA" = 34)
514
515
sparse.block.res.w.model  <- block.spls(data.MODELLED, indY = 1, ncomp = 1, scale =TRUE, keepX = list.keepX) 
516
plotLong(sparse.block.res.w.model, scale = TRUE, legend = TRUE)
517
518
519
hmp_T2D$getCluster.sparse.res <-  getCluster(sparse.block.res.w.model)
520
timeOmics::getSilhouette(sparse.block.res.w.model)
521
timeOmics::getSilhouette(block.res.w.model)
522
523
## Biogrid ## DATABASES
524
biogrid <- read_tsv("/home/antoine/Documents/TO2/netOmics-case-studies/HeLa_Cell_Cycling/data/BIOGRID-ALL-3.5.187.tab3.txt")
525
biogrid.filtered <- biogrid %>% dplyr::select("Official Symbol Interactor A", "Official Symbol Interactor B") %>% unique %>%
526
    set_names(c("from", "to"))
527
528
biogrid.filtered.tmp <- biogrid.filtered %>% filter(from %in% cluster.info$molecule | to %in% cluster.info$molecule)
529
530
hmp_T2D$interaction.biogrid <- biogrid.filtered.tmp
531
532
TFome <- readRDS( "~/Documents/TO2/TFome.Rds")
533
tf.1 <- TFome %>% filter(TF %in% hmp_T2D$getCluster.res$molecule | Target %in% hmp_T2D$getCluster.res$molecule) %>% 
534
    unlist() %>% unique
535
hmp_T2D$interaction.TF <- TFome %>% filter(TF %in% tf.1 | Target %in% tf.1)
536
537
hmp_T2D$interaction.TF <- TFome.igraph
538
539
hmp_T2D$interaction.TF <- TF.interact
540
usethis::use_data(hmp_T2D, overwrite = TRUE)
541
542
543
medlineranker.res <- read_tsv("/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/res_medlineranker_all.csv")
544
tmp <- dplyr::select(medlineranker.res, c("Disease", 'Gene symbols'))  %>% set_names(c("Disease", "symbol")) 
545
# separate_rows(tmp, Disease, symbol, sep = "|", convert = TRUE)
546
547
library(splitstackshape)
548
tmp[c(1,2),] %>% separate_rows(Disease, symbol)
549
tmp[c(2,3),] %>% 
550
    rownames_to_column("id") %>% 
551
    cSplit(., sep = "|", splitCols = 2:ncol(.), direction = "long", makeEqual = T) %>% 
552
    as_tibble() %>% 
553
    group_by(id) %>% 
554
    fill(2:ncol(.)) %>% 
555
    unique() %>% 
556
    ungroup(id) %>% 
557
    select(-id)
558
559
medlineranker.res.df <- tmp %>% rownames_to_column("id") %>% 
560
    cSplit(., sep = "|", splitCols = 3:ncol(.), direction = "long", makeEqual = T) %>% 
561
    as_tibble() %>% group_by(id) %>% fill(2:ncol(.)) %>% unique() %>% ungroup(id) %>% select(-id)
562
563
medlineranker.res.df <- left_join(medlineranker.res.df, medlineranker.res) %>% mutate(symbol = as.character(symbol))
564
hmp_T2D$medlineranker.res.df <- medlineranker.res.df 
565
usethis::use_data(hmp_T2D, overwrite = TRUE)