|
a |
|
b/data-raw/hmp_T2D_raw.R |
|
|
1 |
|
|
|
2 |
library(tidyverse) |
|
|
3 |
library(timeOmics) |
|
|
4 |
library(lubridate) |
|
|
5 |
library(lmms) |
|
|
6 |
|
|
|
7 |
rm(list=ls()) |
|
|
8 |
|
|
|
9 |
# RData from: |
|
|
10 |
# Sailani, M. R., Metwally, A. A., Zhou, W., Rose, S. M. S. F., Ahadi, S., Contrepois, K., ... & Snyder, M. P. (2020). |
|
|
11 |
# Deep longitudinal multiomics profiling reveals two biological seasonal patterns in California. Nature communications, 11(1), 1-12. |
|
|
12 |
load("/home/antoine/Documents/timeomics_analysis/HMP_seasoning/Multi_Omics_Seasonal.RData") |
|
|
13 |
|
|
|
14 |
# 0. DATA CLEANING |
|
|
15 |
|
|
|
16 |
Gut_annotation_colData <- Gut_annotation_colData %>% |
|
|
17 |
mutate(YMD = lubridate::dmy(IRIS)) %>% |
|
|
18 |
mutate(Date = IRIS) %>% |
|
|
19 |
mutate(Time = yday(YMD)) %>% |
|
|
20 |
mutate(omics = "Gut") %>% dplyr::select(-Date, -BMI, -IRIS) |
|
|
21 |
|
|
|
22 |
list_lab <- list("RNA" = RNA_annotation_colData, |
|
|
23 |
"Metabo" = Metabolomics_annotation_colData, |
|
|
24 |
#"Gut" =Gut_annotation_colData, |
|
|
25 |
"Clinical" = Clinical_labs_annotation_colData) |
|
|
26 |
|
|
|
27 |
list_lab_df <- imap_dfr(list_lab, ~{.x %>% |
|
|
28 |
mutate("omics" = .y) %>% |
|
|
29 |
mutate(YMD = lubridate::ymd(as.Date(Date))) %>% |
|
|
30 |
dplyr::select(-Date) |
|
|
31 |
}) |
|
|
32 |
|
|
|
33 |
IRIS_BMI <- list_lab_df[c(1:3)] %>% unique() |
|
|
34 |
IRIS_only <- IRIS_BMI %>% dplyr::select(-BMI) %>% unique %>% filter(!is.na(IRIS), !is.na(SubjectID)) %>% |
|
|
35 |
unique |
|
|
36 |
IRIS_1 <- IRIS_only %>% group_by(SubjectID) %>% |
|
|
37 |
dplyr::summarise(N = n()) %>% |
|
|
38 |
filter(N == 1) %>% pull(SubjectID) %>% as.character() |
|
|
39 |
IRIS_only <- IRIS_only %>% filter(SubjectID %in% IRIS_1) |
|
|
40 |
|
|
|
41 |
# 1. DATA PREPARATION |
|
|
42 |
# GUT |
|
|
43 |
########################### |
|
|
44 |
GUT_sample <- Gut_annotation_colData %>% |
|
|
45 |
mutate(Year = ifelse(year(YMD) < 2000, year(YMD) +2000, year(YMD))) %>% |
|
|
46 |
left_join(IRIS_only) %>% |
|
|
47 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
48 |
|
|
|
49 |
GUT <- gut_df_Data |
|
|
50 |
rownames(GUT) <- GUT_sample$SampleID |
|
|
51 |
|
|
|
52 |
# CLINICAL |
|
|
53 |
########################### |
|
|
54 |
CLINICAL_sample <- Clinical_labs_annotation_colData %>% |
|
|
55 |
mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>% |
|
|
56 |
left_join(IRIS_only) %>% |
|
|
57 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
58 |
|
|
|
59 |
CLINICAL <- clinical_labs_Data |
|
|
60 |
rownames(CLINICAL) <- CLINICAL_sample$SampleID |
|
|
61 |
index.na <- CLINICAL %>% lapply(function(x) is.na(x) %>% sum) %>% unlist |
|
|
62 |
CLINICAL <- CLINICAL[index.na<=11] %>% na.omit |
|
|
63 |
|
|
|
64 |
# RNA |
|
|
65 |
########################### |
|
|
66 |
RNA_sample <- RNA_annotation_colData %>% |
|
|
67 |
mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>% |
|
|
68 |
left_join(IRIS_only) %>% |
|
|
69 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
70 |
|
|
|
71 |
index.NA <- (!is.na(RNA_annotation_colData$Time) & !is.na(RNA_annotation_colData$Time)) |
|
|
72 |
RNA_sample <- RNA_sample[index.NA,] |
|
|
73 |
RNA <- RNA_df_Data[index.NA,] |
|
|
74 |
rownames(RNA) <- RNA_sample$SampleID |
|
|
75 |
|
|
|
76 |
# NOSE |
|
|
77 |
########################### |
|
|
78 |
Nasal_sample <- Nasal_annotation_colData %>% |
|
|
79 |
mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>% |
|
|
80 |
left_join(IRIS_only) %>% |
|
|
81 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
82 |
|
|
|
83 |
NASAL <- Nasal_df_Data |
|
|
84 |
rownames(NASAL) <- Nasal_sample$SampleID |
|
|
85 |
|
|
|
86 |
# PROTEIN |
|
|
87 |
########################### |
|
|
88 |
PROT_sample <- Proteomics_annotation_colData %>% |
|
|
89 |
mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>% |
|
|
90 |
left_join(IRIS_only) %>% |
|
|
91 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
92 |
|
|
|
93 |
PROT <- Proteomics_df_Data |
|
|
94 |
rownames(PROT) <- PROT_sample$SampleID |
|
|
95 |
|
|
|
96 |
# METABOLITE |
|
|
97 |
########################### |
|
|
98 |
METAB_sample <- Metabolomics_annotation_colData %>% |
|
|
99 |
mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>% |
|
|
100 |
left_join(IRIS_only) %>% |
|
|
101 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
102 |
|
|
|
103 |
METAB <- Metabolomics_df_Data |
|
|
104 |
rownames(METAB) <- METAB_sample$SampleID |
|
|
105 |
|
|
|
106 |
# CYTOKINE |
|
|
107 |
########################### |
|
|
108 |
CYTO_sample <- Cytokines_annotation_colData %>% |
|
|
109 |
mutate(Year = ifelse(year(Date) < 2000, year(Date) +2000, year(Date))) %>% |
|
|
110 |
left_join(IRIS_only) %>% |
|
|
111 |
mutate(SampleID = paste0(SubjectID, "_", Time, "_", Year, "_", IRIS, "_", rownames(.))) |
|
|
112 |
|
|
|
113 |
CYTO <- Cytokines_df_Data |
|
|
114 |
rownames(CYTO) <- CYTO_sample$SampleID |
|
|
115 |
|
|
|
116 |
############################ |
|
|
117 |
|
|
|
118 |
# DATA: only RNA/CLINICAL/GUT/METAB |
|
|
119 |
# split by IR/IS |
|
|
120 |
DATA <- list("RNA.IR" = RNA[str_split(rownames(RNA),"_") %>% map_chr(~.x[[4]]) == "IR",], |
|
|
121 |
"GUT.IR" = GUT[str_split(rownames(GUT),"_") %>% map_chr(~.x[[4]]) == "IR",], |
|
|
122 |
|
|
|
123 |
"CLINICAL.IR" = CLINICAL[str_split(rownames(CLINICAL),"_") %>% map_chr(~.x[[4]]) == "IR",], |
|
|
124 |
"RNA.IS" = RNA[str_split(rownames(RNA),"_") %>% map_chr(~.x[[4]]) == "IS",], |
|
|
125 |
|
|
|
126 |
"GUT.IS" = GUT[str_split(rownames(GUT),"_") %>% map_chr(~.x[[4]]) == "IS",], |
|
|
127 |
"CLINICAL.IS" = CLINICAL[str_split(rownames(CLINICAL),"_") %>% map_chr(~.x[[4]]) == "IS",], |
|
|
128 |
|
|
|
129 |
"METAB.IR" = METAB[str_split(rownames(METAB),"_") %>% map_chr(~.x[[4]]) == "IR",], |
|
|
130 |
"METAB.IS" = METAB[str_split(rownames(METAB),"_") %>% map_chr(~.x[[4]]) == "IS",], |
|
|
131 |
|
|
|
132 |
"PROT.IS" = PROT[str_split(rownames(PROT),"_") %>% map_chr(~.x[[4]]) == "IS",], |
|
|
133 |
"PROT.IR" = PROT[str_split(rownames(PROT),"_") %>% map_chr(~.x[[4]]) == "IR",], |
|
|
134 |
|
|
|
135 |
"CYTO.IS" = CYTO[str_split(rownames(CYTO),"_") %>% map_chr(~.x[[4]]) == "IS",], |
|
|
136 |
"CYTO.IR" = CYTO[str_split(rownames(CYTO),"_") %>% map_chr(~.x[[4]]) == "IR",] |
|
|
137 |
) |
|
|
138 |
|
|
|
139 |
COMBINED <- list("RNA" = RNA, CLINICAL = CLINICAL, GUT = GUT, METAB = METAB, PROT = PROT, CYTO = CYTO) |
|
|
140 |
save(DATA, COMBINED, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/RAW_DATA.RDA") |
|
|
141 |
############################################################ |
|
|
142 |
|
|
|
143 |
stat_raw_data <- lapply(list(RNA=RNA, GUT=GUT, METAB=METAB, CLINICAL=CLINICAL, PROT = PROT, CYTO = CYTO), dim) %>% |
|
|
144 |
as.data.frame() %>% t %>% as.data.frame() %>% |
|
|
145 |
setNames(c("sample", "feature")) |
|
|
146 |
lapply(list(RNA=RNA, GUT=GUT, METAB=METAB, CLINICAL=CLINICAL, PROT = PROT, CYTO = CYTO), function(x){ |
|
|
147 |
rownames(x) %>% str_remove("_.*") %>% unique %>% length()}) %>% |
|
|
148 |
as.data.frame() %>% t %>% as.data.frame() %>% setNames("uniqueID") %>% |
|
|
149 |
rownames_to_column("omic") %>% |
|
|
150 |
left_join(stat_raw_data %>% rownames_to_column("omic")) %>% column_to_rownames("omic") %>% t %>% |
|
|
151 |
as.data.frame() %>% knitr::kable() |
|
|
152 |
|
|
|
153 |
# 2. DATA FILTERING |
|
|
154 |
|
|
|
155 |
# 1. coef. of var |
|
|
156 |
cv.data <- lapply(DATA, function(X){ |
|
|
157 |
unlist(lapply(as.data.frame(X), |
|
|
158 |
function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE)))) |
|
|
159 |
}) |
|
|
160 |
|
|
|
161 |
fc.data <- list("RNA.IR"= 1.5, "RNA.IS"=1.5, |
|
|
162 |
"CLINICAL.IR"=0, "CLINICAL.IS"=0, |
|
|
163 |
"GUT.IR"=1.5, "GUT.IS"=1.5, |
|
|
164 |
"METAB.IR"=1.5 , "METAB.IS"=1.5, |
|
|
165 |
"PROT.IR"=1.5 , "PROT.IS"=1.5, |
|
|
166 |
"CYTO.IR" = 1.5, "CYTO.IS"=1.5) |
|
|
167 |
|
|
|
168 |
par(mfrow = c(6,2)) |
|
|
169 |
#for(i in c("RNA.IR","CLINICAL.IR", "GUT.IR", "METAB.IR", "RNA.IS","CLINICAL.IS", "GUT.IS", "METAB.IS", "PROT.IR", "PROT.IS", "CYTO.IR", "CYTO.IS")){ |
|
|
170 |
for(i in c("RNA.IR","RNA.IS", "CLINICAL.IR", "CLINICAL.IS", "GUT.IR","GUT.IS", "METAB.IS", "METAB.IR", "PROT.IR", "PROT.IS", "CYTO.IR", "CYTO.IS")){ |
|
|
171 |
|
|
|
172 |
hist(cv.data[[i]], breaks = 20, main =i) |
|
|
173 |
abline(v = fc.data[[i]], col = "red") |
|
|
174 |
legend("topright", legend = paste0("FC = ",fc.data[[i]]), col = "red", lty = 1) |
|
|
175 |
} |
|
|
176 |
par(mfrow = c(1,1)) |
|
|
177 |
|
|
|
178 |
|
|
|
179 |
# 2. Remove low cv features |
|
|
180 |
remove.low.cv <- function(X, cutoff = 0.5){ |
|
|
181 |
# var.coef |
|
|
182 |
cv <- unlist(lapply(as.data.frame(X), |
|
|
183 |
function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE)))) |
|
|
184 |
return(X[,cv > cutoff]) |
|
|
185 |
} |
|
|
186 |
|
|
|
187 |
DATA.filtered <- list("RNA.IR" = remove.low.cv(DATA$RNA.IR, 1.5), |
|
|
188 |
"RNA.IS" = remove.low.cv(DATA$RNA.IS, 1.5), |
|
|
189 |
"GUT.IR" = remove.low.cv(DATA$GUT.IR, 1.5), |
|
|
190 |
"GUT.IS" = remove.low.cv(DATA$GUT.IS, 1.5), |
|
|
191 |
# "CLINICAL.IR" = remove.low.cv(DATA$CLINICAL.IR, 0), |
|
|
192 |
# "CLINICAL.IS" = remove.low.cv(DATA$CLINICAL.IS, 0), |
|
|
193 |
"CLINICAL.IR" = DATA$CLINICAL.IR, |
|
|
194 |
"CLINICAL.IS" = DATA$CLINICAL.IS, |
|
|
195 |
|
|
|
196 |
"METAB.IR" = remove.low.cv(DATA$METAB.IR, 1.5), |
|
|
197 |
"METAB.IS" = remove.low.cv(DATA$METAB.IS, 1.5), |
|
|
198 |
"PROT.IS" = remove.low.cv(DATA$PROT.IS, 1.5), |
|
|
199 |
"PROT.IR" = remove.low.cv(DATA$PROT.IR, 1.5), |
|
|
200 |
"CYTO.IS" = remove.low.cv(DATA$CYTO.IS, 1), |
|
|
201 |
"CYTO.IR" = remove.low.cv(DATA$CYTO.IR, 1)) |
|
|
202 |
lapply(DATA.filtered, dim) |
|
|
203 |
|
|
|
204 |
# 3. scale filtered value (log, scale, CLR) |
|
|
205 |
|
|
|
206 |
# scale for OTU |
|
|
207 |
norm_OTU <- function(DF, AR = F){ |
|
|
208 |
DF <- DF + 0.0001 |
|
|
209 |
|
|
|
210 |
data.TSS.clr = mixOmics::logratio.transfo(DF, logratio = 'CLR') |
|
|
211 |
|
|
|
212 |
# reconstrcuct dataframe |
|
|
213 |
data.good <- as.data.frame(matrix(ncol = ncol(data.TSS.clr), |
|
|
214 |
nrow = nrow( data.TSS.clr))) |
|
|
215 |
rownames(data.good) <- rownames(data.TSS.clr) |
|
|
216 |
colnames(data.good) <- colnames(data.TSS.clr) |
|
|
217 |
for( i in c(1:nrow(data.TSS.clr))){ |
|
|
218 |
for( j in c(1:ncol(data.TSS.clr))){ |
|
|
219 |
data.good[i,j] <- data.TSS.clr[i,j] |
|
|
220 |
} |
|
|
221 |
} |
|
|
222 |
return(data.good) |
|
|
223 |
} |
|
|
224 |
|
|
|
225 |
|
|
|
226 |
DATA.filtered.scale <- list( |
|
|
227 |
"RNA.IR" = log(DATA.filtered$RNA.IR + 1) %>% scale, |
|
|
228 |
"RNA.IS" = log(DATA.filtered$RNA.IS + 1) %>% scale, |
|
|
229 |
|
|
|
230 |
"CLINICAL.IR" = log(DATA.filtered$CLINICAL.IR +1)%>% scale, |
|
|
231 |
"CLINICAL.IS" = log(DATA.filtered$CLINICAL.IS +1)%>% scale, |
|
|
232 |
|
|
|
233 |
"GUT.IR" = norm_OTU(DATA.filtered$GUT.IR), |
|
|
234 |
"GUT.IS" = norm_OTU(DATA.filtered$GUT.IS), |
|
|
235 |
|
|
|
236 |
"METAB.IR" = log(DATA.filtered$METAB.IR +1)%>% scale, |
|
|
237 |
"METAB.IS" = log(DATA.filtered$METAB.IS +1)%>% scale, |
|
|
238 |
|
|
|
239 |
# "PROT.IR" = log(DATA.filtered$PROT.IR +1)%>% scale, |
|
|
240 |
# "PROT.IS" = log(DATA.filtered$PROT.IS +1)%>% scale |
|
|
241 |
|
|
|
242 |
"PROT.IR" = DATA.filtered$PROT.IR, # already scale |
|
|
243 |
"PROT.IS" = DATA.filtered$PROT.IS, |
|
|
244 |
|
|
|
245 |
"CYTO.IR" = log(DATA.filtered$CYTO.IR +1), |
|
|
246 |
"CYTO.IS" = log(DATA.filtered$CYTO.IS +1) |
|
|
247 |
|
|
|
248 |
) |
|
|
249 |
|
|
|
250 |
lapply(DATA.filtered, dim) %>% |
|
|
251 |
as.data.frame() %>% t %>% as.data.frame() %>% |
|
|
252 |
setNames(c("sample", "feature")) %>% |
|
|
253 |
rownames_to_column("OMIC") %>% |
|
|
254 |
mutate(IRIS = str_extract(OMIC,"..$"), OMIC = str_remove(OMIC, "...$")) %>% |
|
|
255 |
gather(meta, value, -c(OMIC, IRIS)) %>% |
|
|
256 |
spread(OMIC, value) %>% arrange(IRIS) %>% |
|
|
257 |
dplyr::select(IRIS, meta, RNA, GUT, METAB, CLINICAL, PROT, CYTO) |
|
|
258 |
|
|
|
259 |
save(DATA.filtered.scale, DATA.filtered, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/DATA_FILTERED.RDA") |
|
|
260 |
############################################################ |
|
|
261 |
|
|
|
262 |
fc.data.combined <- list("RNA"= 1.5, |
|
|
263 |
"CLINICAL"=0.2, |
|
|
264 |
"GUT"=1.5, |
|
|
265 |
"METAB"=1.5, |
|
|
266 |
"PROT" = 1.5, |
|
|
267 |
"CYTO" = 1) |
|
|
268 |
cv.data.combined <- lapply(COMBINED, function(X){ |
|
|
269 |
unlist(lapply(as.data.frame(X), |
|
|
270 |
function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE)))) |
|
|
271 |
}) |
|
|
272 |
fc.color <- list("RNA"= color.mixo(4), |
|
|
273 |
"CLINICAL"=color.mixo(1), |
|
|
274 |
"GUT"=color.mixo(2), |
|
|
275 |
"METAB"=color.mixo(3), |
|
|
276 |
"PROT"=color.mixo(5), |
|
|
277 |
"CYTO" = color.mixo(6)) |
|
|
278 |
|
|
|
279 |
par(mfrow = c(3,2)) |
|
|
280 |
for(i in c("RNA","CLINICAL", "GUT", "METAB", "PROT", "CYTO")){ |
|
|
281 |
hist(cv.data.combined[[i]], breaks = 20, main =i, xlab = paste0("Var. Coef. (", i, ")"), |
|
|
282 |
col = fc.color[[i]]) |
|
|
283 |
abline(v = fc.data.combined[[i]], col = "red") |
|
|
284 |
legend("topright", legend = paste0("CV = ",fc.data.combined[[i]]), col = "red", lty = 1) |
|
|
285 |
} |
|
|
286 |
par(mfrow = c(1,1)) |
|
|
287 |
|
|
|
288 |
# 3. MODELLING |
|
|
289 |
|
|
|
290 |
lmms.func <- function(X, mode = "p-spline"){ |
|
|
291 |
time <- rownames(X) %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric() |
|
|
292 |
lmms.output <- lmms::lmmSpline(data = X, time = time, |
|
|
293 |
sampleID = rownames(X), deri = FALSE, |
|
|
294 |
basis = mode, numCores = 4, |
|
|
295 |
keepModels = TRUE) |
|
|
296 |
return(lmms.output) |
|
|
297 |
} |
|
|
298 |
|
|
|
299 |
# only one ID/Year |
|
|
300 |
ID_u <- "ZLZNCLZ" |
|
|
301 |
Year_u <- 2015 |
|
|
302 |
|
|
|
303 |
# ID_u <- "ZOZOW1T" |
|
|
304 |
# Year_u <- 2015 |
|
|
305 |
|
|
|
306 |
|
|
|
307 |
tmp <- imap_dfr(DATA.filtered.scale, ~{ |
|
|
308 |
.x %>% as.data.frame() %>% rownames_to_column("sample") %>% |
|
|
309 |
gather(feature, value, -sample) %>% |
|
|
310 |
mutate(ID = str_split(sample, "_") %>% map_chr(~.x[[1]])) %>% |
|
|
311 |
mutate(year = str_split(sample, "_") %>% map_chr(~.x[[3]])) %>% |
|
|
312 |
mutate(OMIC = str_remove(.y, "...$")) %>% |
|
|
313 |
mutate(IRIS = str_extract(.y, "..$")) |
|
|
314 |
}) |
|
|
315 |
tmp %>% dplyr::select(-feature, -value) %>% unique() %>% na.omit %>% |
|
|
316 |
group_by(ID, year, OMIC, IRIS) %>% summarise(N = n()) %>% spread(OMIC, N) %>% |
|
|
317 |
na.omit() %>% split(.$year) |
|
|
318 |
tmp %>% dplyr::select(-feature, -value) %>% unique() %>% na.omit %>% |
|
|
319 |
group_by(ID, year, OMIC, IRIS) %>% summarise(N = n()) %>% spread(OMIC, N) %>% |
|
|
320 |
filter(ID == ID_u) %>% dplyr::select(-IRIS) %>% na.omit |
|
|
321 |
|
|
|
322 |
# just a filter to get only the selected ID/Year |
|
|
323 |
DATA.GOOD <- imap_dfr(DATA.filtered.scale, ~{ |
|
|
324 |
.x %>% as.data.frame() %>% rownames_to_column("sample") %>% |
|
|
325 |
gather(feature, value, -sample) %>% |
|
|
326 |
mutate(ID = str_split(sample, "_") %>% map_chr(~.x[[1]])) %>% |
|
|
327 |
mutate(year = str_split(sample, "_") %>% map_chr(~.x[[3]])) %>% |
|
|
328 |
mutate(OMIC = str_remove(.y, "...$")) %>% |
|
|
329 |
dplyr::filter(ID == ID_u) %>% |
|
|
330 |
dplyr::filter(year == Year_u) |
|
|
331 |
}) %>% split(.$OMIC) %>% |
|
|
332 |
purrr::map(~{ |
|
|
333 |
.x %>% |
|
|
334 |
dplyr::select(sample, feature, value) %>% |
|
|
335 |
spread(feature, value) %>% |
|
|
336 |
column_to_rownames("sample") |
|
|
337 |
}) |
|
|
338 |
|
|
|
339 |
|
|
|
340 |
# only 1 year |
|
|
341 |
MODELLED <- lapply(DATA.GOOD, function(x) lmms.func(x)) |
|
|
342 |
|
|
|
343 |
MODELLED %>% lapply(function(x)x@predSpline %>% dim) %>% |
|
|
344 |
as.data.frame() %>% t %>% as.data.frame() %>% |
|
|
345 |
setNames(c("sample", "feature")) %>% t %>%as.data.frame() %>% |
|
|
346 |
dplyr::select(RNA, GUT, METAB, CLINICAL, PROT) |
|
|
347 |
|
|
|
348 |
MODELLED %>% imap_dfr(~.x@modelsUsed %>% table %>% as.data.frame %>% |
|
|
349 |
column_to_rownames(".") %>% t %>% as.data.frame %>% |
|
|
350 |
mutate(omic = .y)) %>% remove_rownames() %>% |
|
|
351 |
column_to_rownames('omic') %>% t %>% |
|
|
352 |
as.data.frame() %>% dplyr::select(RNA, GUT, METAB, CLINICAL, PROT) |
|
|
353 |
|
|
|
354 |
# 4. STRAIGHT LINE FILTERING |
|
|
355 |
|
|
|
356 |
filterlmms.func <- function(modelled.data, lmms.output){ |
|
|
357 |
time = modelled.data %>% rownames() %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric() |
|
|
358 |
#time = rownames(modelled.data) %>% as.numeric() |
|
|
359 |
filter.res <- lmms.filter.lines(data = modelled.data, |
|
|
360 |
lmms.obj = lmms.output, time = time, |
|
|
361 |
homoskedasticity.cutoff=0.05)$filtered |
|
|
362 |
} |
|
|
363 |
|
|
|
364 |
FILTER <- lapply(names(DATA.GOOD), function(x) filterlmms.func(modelled.data = DATA.GOOD[[x]], lmms.output = MODELLED[[x]])) |
|
|
365 |
names(FILTER) <- names(MODELLED) |
|
|
366 |
|
|
|
367 |
FILTER %>% lapply(dim) %>% |
|
|
368 |
as.data.frame() %>% t %>% as.data.frame() %>% |
|
|
369 |
setNames(c("sample", "feature")) %>% |
|
|
370 |
t %>%as.data.frame() %>% |
|
|
371 |
dplyr::select(RNA, GUT, METAB, CLINICAL) |
|
|
372 |
|
|
|
373 |
FINAL.FILTER <- FILTER[c("CLINICAL", "GUT", "METAB", "RNA", "PROT")] |
|
|
374 |
rownames(FINAL.FILTER[["GUT"]]) <- rownames(FINAL.FILTER[["RNA"]]) # change 86 par 85 |
|
|
375 |
|
|
|
376 |
DATA.LMMS <- lapply(MODELLED, function(x)x@predSpline %>% t %>% as.data.frame) |
|
|
377 |
rownames(DATA.LMMS[["GUT"]]) <- rownames(DATA.LMMS[["RNA"]]) # change 86 par 85 |
|
|
378 |
|
|
|
379 |
save(FINAL.FILTER, MODELLED, DATA.GOOD, DATA.LMMS, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/LMMS.RDA") |
|
|
380 |
|
|
|
381 |
lapply(DATA.LMMS, dim) |
|
|
382 |
# 5. MULTI-OMICS CLUSTERING |
|
|
383 |
|
|
|
384 |
block.res <- block.pls(DATA.LMMS, indY = 1, ncomp = 5) |
|
|
385 |
getNcomp.res <- getNcomp(block.res, X = DATA.LMMS, indY = 1) |
|
|
386 |
|
|
|
387 |
# block.res <- block.pls(FINAL.FILTER, indY = 1, ncomp = 3) |
|
|
388 |
# getNcomp.res <- getNcomp(block.res, X = FINAL.FILTER, indY = 1) |
|
|
389 |
|
|
|
390 |
plot(getNcomp.res) |
|
|
391 |
|
|
|
392 |
# ncomp = 2 |
|
|
393 |
block.res <- block.pls(DATA.LMMS, indY = 1, ncomp = 1, scale =FALSE) |
|
|
394 |
|
|
|
395 |
# block.res <- block.pls(FINAL.FILTER, indY = 1, ncomp = 1, scale =FALSE) |
|
|
396 |
|
|
|
397 |
plotLong(object = block.res, title = "Block-PLS Clusters, scale = TRUE", legend = TRUE) |
|
|
398 |
|
|
|
399 |
getCluster(block.res) %>% group_by(block, cluster) %>% summarise(N = n()) %>% |
|
|
400 |
spread(block, N) %>% |
|
|
401 |
dplyr::select(cluster, RNA, GUT, METAB, CLINICAL) |
|
|
402 |
|
|
|
403 |
save(block.res, file = "/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/timeomics_res_block.rda") |
|
|
404 |
|
|
|
405 |
|
|
|
406 |
# elagage |
|
|
407 |
test.list.keepX <- list( |
|
|
408 |
"CLINICAL" = seq(2,8,by=1), |
|
|
409 |
"GUT" = seq(2,10,by=1), |
|
|
410 |
"METAB" = seq(2,9,by=1), |
|
|
411 |
"RNA" = seq(10,50,by=2) |
|
|
412 |
) |
|
|
413 |
|
|
|
414 |
tune.block.res <- tuneCluster.block.spls(X= FINAL.FILTER, indY = 1, |
|
|
415 |
test.list.keepX=test.list.keepX, |
|
|
416 |
scale=FALSE, |
|
|
417 |
mode = "canonical", ncomp = 1) |
|
|
418 |
tune.block.res$choice.keepX |
|
|
419 |
final.block <- block.spls(FINAL.FILTER, indY = 1, ncomp = 1, scale =FALSE, |
|
|
420 |
keepX = tune.block.res$choice.keepX) |
|
|
421 |
plotLong(final.block, legend = TRUE) |
|
|
422 |
|
|
|
423 |
getCluster(final.block) %>% group_by(block, cluster) %>% summarise(N = n()) %>% |
|
|
424 |
spread(block, N) %>% |
|
|
425 |
dplyr::select(cluster, RNA, GUT, METAB, CLINICAL) |
|
|
426 |
|
|
|
427 |
library("openxlsx") |
|
|
428 |
cluster_comp <- getCluster(final.block) %>% dplyr::select(molecule, block, cluster, comp, contribution) %>% |
|
|
429 |
mutate(cluster = ifelse(cluster == -1, "Cluster 1", "Cluster 2")) %>% |
|
|
430 |
split(.$cluster) |
|
|
431 |
write.xlsx(cluster_comp, file = "cluster_composition.xlsx") |
|
|
432 |
|
|
|
433 |
|
|
|
434 |
# final data for netOmics package // shrink |
|
|
435 |
# DATA.GOOD -> ! individual, 7 timepoints but no modelisation |
|
|
436 |
# DATA RAW -> filter based on DATA.GOOD molecules (for OTU -> sparcc needs RAW) |
|
|
437 |
# DATA$RNA.IS %>% rownames() %>% str_remove("_.*") %>% str_detect("ZLZNCLZ") %>% any() |
|
|
438 |
# [1] TRUE |
|
|
439 |
|
|
|
440 |
hmp_T2D <- list() |
|
|
441 |
hmp_T2D$raw <- list() |
|
|
442 |
hmp_T2D$data <- list() |
|
|
443 |
for(i in names(DATA.GOOD)){ |
|
|
444 |
# hmp_diabetes$raw[[i]] <- DATA[[paste0(i, ".IS")]][str_detect(rownames(DATA[[paste0(i, ".IS")]]), "ZLZNCLZ"), colnames(DATA.GOOD[[i]])] |
|
|
445 |
hmp_T2D$raw[[i]] <- DATA[[paste0(i, ".IS")]][rownames(DATA.GOOD[[i]]), colnames(DATA.GOOD[[i]])] |
|
|
446 |
rownames(hmp_T2D$raw[[i]]) <- rownames(DATA.GOOD$RNA) |
|
|
447 |
hmp_T2D$raw[[i]] <- hmp_T2D$raw[[i]] %>% rownames_to_column("Rownames") %>% |
|
|
448 |
mutate(Rownames = Rownames %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric()) %>% |
|
|
449 |
arrange(Rownames) %>% column_to_rownames(var = "Rownames") |
|
|
450 |
|
|
|
451 |
#data |
|
|
452 |
hmp_T2D$data[[i]] <- DATA.GOOD[[i]] |
|
|
453 |
rownames(hmp_T2D$data[[i]]) <- rownames(hmp_T2D$raw[[i]]) |
|
|
454 |
} |
|
|
455 |
|
|
|
456 |
hmp_T2D$data <- DATA.GOOD |
|
|
457 |
lmms.func <- function(X){ |
|
|
458 |
# time <- rownames(X) %>% str_split("_") %>% map_chr(~.x[[2]]) %>% as.numeric() |
|
|
459 |
time <- rownames(X) %>% as.numeric() |
|
|
460 |
lmms.output <- lmms::lmmSpline(data = X, time = time, |
|
|
461 |
sampleID = rownames(X), deri = FALSE, |
|
|
462 |
basis = "p-spline", numCores = 4, |
|
|
463 |
keepModels = TRUE) |
|
|
464 |
return(lmms.output) |
|
|
465 |
} |
|
|
466 |
|
|
|
467 |
MODELLED <- lapply(hmp_T2D$data, function(x) lmms.func(x)) |
|
|
468 |
|
|
|
469 |
MODELLED %>% imap_dfr(~.x@modelsUsed %>% table %>% as.data.frame %>% |
|
|
470 |
column_to_rownames(".") %>% t %>% as.data.frame %>% |
|
|
471 |
mutate(omic = .y)) %>% remove_rownames() %>% |
|
|
472 |
column_to_rownames('omic') %>% t %>% |
|
|
473 |
as.data.frame() |
|
|
474 |
|
|
|
475 |
data.MODELLED <- lapply(MODELLED, function(x) x@predSpline %>% t) |
|
|
476 |
rownames(data.MODELLED$GUT) <- rownames(data.MODELLED$RNA) |
|
|
477 |
|
|
|
478 |
|
|
|
479 |
# .$timeOmics |
|
|
480 |
# block.res.no.model <- block.pls(hmp_T2D$data, indY = 1, ncomp = 5, scale =FALSE) |
|
|
481 |
# getNcomp.res <- getNcomp(block.res.no.model, X = hmp_T2D$data, indY = 1) |
|
|
482 |
# plot(getNcomp.res) |
|
|
483 |
block.res.no.model <- block.pls(hmp_T2D$data, indY = 1, ncomp = 1, scale =TRUE) |
|
|
484 |
hmp_T2D$getCluster.res <- getCluster(block.res.no.model) |
|
|
485 |
|
|
|
486 |
block.res.w.model <- block.pls(data.MODELLED, indY = 1, ncomp = 5, scale =TRUE) |
|
|
487 |
getNcomp.res <- getNcomp(block.res.w.model, X = data.MODELLED, indY = 1) |
|
|
488 |
block.res.w.model <- block.pls(data.MODELLED, indY = 1, ncomp = 1, scale =TRUE) |
|
|
489 |
|
|
|
490 |
hmp_T2D$getCluster.res <- getCluster(block.res.w.model) |
|
|
491 |
|
|
|
492 |
# .$sparse |
|
|
493 |
test.list.keepX <- list( |
|
|
494 |
"CLINICAL" = seq(2,39,by=2), |
|
|
495 |
"CYTO" = seq(2,10,b=2), |
|
|
496 |
"GUT" = seq(2,50,by=2), |
|
|
497 |
"METAB" = seq(2,50,by=2), |
|
|
498 |
"PROT" = seq(2,30,b=2), |
|
|
499 |
"RNA" = seq(10,100,by=3)) |
|
|
500 |
|
|
|
501 |
# tune.block.res <- tuneCluster.block.spls(X= hmp_T2D$data, indY = 1, |
|
|
502 |
# test.list.keepX=test.list.keepX, |
|
|
503 |
# scale=FALSE, |
|
|
504 |
# mode = "canonical", ncomp = 1) |
|
|
505 |
# too long |
|
|
506 |
|
|
|
507 |
list.keepX <- list( |
|
|
508 |
"CLINICAL" = 4, |
|
|
509 |
"CYTO" = 3, |
|
|
510 |
"GUT" = 10, |
|
|
511 |
"METAB" = 3, |
|
|
512 |
"PROT" = 2, |
|
|
513 |
"RNA" = 34) |
|
|
514 |
|
|
|
515 |
sparse.block.res.w.model <- block.spls(data.MODELLED, indY = 1, ncomp = 1, scale =TRUE, keepX = list.keepX) |
|
|
516 |
plotLong(sparse.block.res.w.model, scale = TRUE, legend = TRUE) |
|
|
517 |
|
|
|
518 |
|
|
|
519 |
hmp_T2D$getCluster.sparse.res <- getCluster(sparse.block.res.w.model) |
|
|
520 |
timeOmics::getSilhouette(sparse.block.res.w.model) |
|
|
521 |
timeOmics::getSilhouette(block.res.w.model) |
|
|
522 |
|
|
|
523 |
## Biogrid ## DATABASES |
|
|
524 |
biogrid <- read_tsv("/home/antoine/Documents/TO2/netOmics-case-studies/HeLa_Cell_Cycling/data/BIOGRID-ALL-3.5.187.tab3.txt") |
|
|
525 |
biogrid.filtered <- biogrid %>% dplyr::select("Official Symbol Interactor A", "Official Symbol Interactor B") %>% unique %>% |
|
|
526 |
set_names(c("from", "to")) |
|
|
527 |
|
|
|
528 |
biogrid.filtered.tmp <- biogrid.filtered %>% filter(from %in% cluster.info$molecule | to %in% cluster.info$molecule) |
|
|
529 |
|
|
|
530 |
hmp_T2D$interaction.biogrid <- biogrid.filtered.tmp |
|
|
531 |
|
|
|
532 |
TFome <- readRDS( "~/Documents/TO2/TFome.Rds") |
|
|
533 |
tf.1 <- TFome %>% filter(TF %in% hmp_T2D$getCluster.res$molecule | Target %in% hmp_T2D$getCluster.res$molecule) %>% |
|
|
534 |
unlist() %>% unique |
|
|
535 |
hmp_T2D$interaction.TF <- TFome %>% filter(TF %in% tf.1 | Target %in% tf.1) |
|
|
536 |
|
|
|
537 |
hmp_T2D$interaction.TF <- TFome.igraph |
|
|
538 |
|
|
|
539 |
hmp_T2D$interaction.TF <- TF.interact |
|
|
540 |
usethis::use_data(hmp_T2D, overwrite = TRUE) |
|
|
541 |
|
|
|
542 |
|
|
|
543 |
medlineranker.res <- read_tsv("/home/antoine/Documents/timeomics_analysis/HMP_seasoning/netomics/res_medlineranker_all.csv") |
|
|
544 |
tmp <- dplyr::select(medlineranker.res, c("Disease", 'Gene symbols')) %>% set_names(c("Disease", "symbol")) |
|
|
545 |
# separate_rows(tmp, Disease, symbol, sep = "|", convert = TRUE) |
|
|
546 |
|
|
|
547 |
library(splitstackshape) |
|
|
548 |
tmp[c(1,2),] %>% separate_rows(Disease, symbol) |
|
|
549 |
tmp[c(2,3),] %>% |
|
|
550 |
rownames_to_column("id") %>% |
|
|
551 |
cSplit(., sep = "|", splitCols = 2:ncol(.), direction = "long", makeEqual = T) %>% |
|
|
552 |
as_tibble() %>% |
|
|
553 |
group_by(id) %>% |
|
|
554 |
fill(2:ncol(.)) %>% |
|
|
555 |
unique() %>% |
|
|
556 |
ungroup(id) %>% |
|
|
557 |
select(-id) |
|
|
558 |
|
|
|
559 |
medlineranker.res.df <- tmp %>% rownames_to_column("id") %>% |
|
|
560 |
cSplit(., sep = "|", splitCols = 3:ncol(.), direction = "long", makeEqual = T) %>% |
|
|
561 |
as_tibble() %>% group_by(id) %>% fill(2:ncol(.)) %>% unique() %>% ungroup(id) %>% select(-id) |
|
|
562 |
|
|
|
563 |
medlineranker.res.df <- left_join(medlineranker.res.df, medlineranker.res) %>% mutate(symbol = as.character(symbol)) |
|
|
564 |
hmp_T2D$medlineranker.res.df <- medlineranker.res.df |
|
|
565 |
usethis::use_data(hmp_T2D, overwrite = TRUE) |