|
a |
|
b/5-pipeline-DMP_test.R |
|
|
1 |
#merge EHR and CpG |
|
|
2 |
{ |
|
|
3 |
library(impute) |
|
|
4 |
library(tibble) |
|
|
5 |
library(dplyr) |
|
|
6 |
library(impute) |
|
|
7 |
#JHU |
|
|
8 |
{ |
|
|
9 |
setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary") |
|
|
10 |
#cell |
|
|
11 |
load(file="test/CellFraction.Rdata") |
|
|
12 |
load(file="HFpEF.Rdata") |
|
|
13 |
data_cpg_test = filter(HFpEF,PACKS_SET == "JHU") |
|
|
14 |
data_cpg_chf <- filter(data_cpg_test , chf == 1) |
|
|
15 |
data_cpg_nochf <- filter(data_cpg_test , chf == 0) |
|
|
16 |
data_cpg <- rbind(data_cpg_chf,data_cpg_nochf) |
|
|
17 |
data_cpg$Sample_Name <- paste(rep(c("chf","control"),c(nrow(data_cpg_chf),nrow(data_cpg_nochf))), |
|
|
18 |
c(c(1:nrow(data_cpg_chf)),c(1:nrow(data_cpg_nochf))),sep = "") |
|
|
19 |
|
|
|
20 |
test_meta <- data_cpg[,-c(4:7)] |
|
|
21 |
test_meta <- test_meta[,c(128,1:127)] |
|
|
22 |
test_meta_raw <- cbind(test_meta,CellFraction) |
|
|
23 |
{ |
|
|
24 |
colnames(test_meta_raw)= c("Sample_Name","shareid", |
|
|
25 |
"Ejectionfraction","PACKS_SET", |
|
|
26 |
"Omega3","Omega3amount", |
|
|
27 |
"Statin","Statinamount", |
|
|
28 |
"Thiazides","Thiazidesamount", |
|
|
29 |
"Diuretic","Diureticamount", |
|
|
30 |
"Potassium","Potassiumamount", |
|
|
31 |
"Aldosterone","Aldosteroneamount", |
|
|
32 |
"Amiodarone","Amiodaroneamount", |
|
|
33 |
"Vasodilators","Vasodilatorsamount", |
|
|
34 |
"CoQ10","CoQ10amount", |
|
|
35 |
"Betablocking","Betablockingamount", |
|
|
36 |
"AngiotensinIIantagonists","AngiotensinIIantagonistsamount", |
|
|
37 |
"ACEI","ACEIamount", |
|
|
38 |
"Warfarin","Warfarinamount", |
|
|
39 |
"Clopidogrel","Clopidogrelamount", |
|
|
40 |
"Aspirin","Aspirinamount", |
|
|
41 |
"Folicacid","Folicacidamount", |
|
|
42 |
"cvd","Coronaryheartdisease","Heartfailure", |
|
|
43 |
"chddate","chfdate","cvddate","midate", |
|
|
44 |
"Myocardialinfarction","Diabetes", |
|
|
45 |
"Atrialfibrillation","afxdate", |
|
|
46 |
"Stroke","strokedate", |
|
|
47 |
"DATE8","DATE9", |
|
|
48 |
"Gender","Age","Bloodglucose","BMI","LDLcholesterol", |
|
|
49 |
"Numberofcigarettessmoked","Creatinineserum", |
|
|
50 |
"Smoking","Averagediastolicbloodpressure", |
|
|
51 |
"Leftventricularhypertrophy", |
|
|
52 |
"Fastingbloodglucose","HDLcholesterol","Hight", |
|
|
53 |
"Averagesystolicbloodpressure", |
|
|
54 |
"Totalcholesterol","Triglycerides" ,"Ventricularrate", |
|
|
55 |
"Waist","Weight","Treatedforhypertension", |
|
|
56 |
"Treatedforlipids","aspirin.1", |
|
|
57 |
"Drinkbeer","Drinkwine","Drinkliquor","Sleep", |
|
|
58 |
"Albuminurine", "Creatinineurine" , |
|
|
59 |
"HemoglobinA1cwholeblood","Atrialenlargement", |
|
|
60 |
"Rightventricularhypertrophy","lvh","Rheumatic", |
|
|
61 |
"Aorticvalve","Mitralvalve","other_heart", |
|
|
62 |
"Arrhythmia","other_peripheral_vascular_disease", |
|
|
63 |
"other_vascular_diagnosis", |
|
|
64 |
"Dementia","Parkinson","Adultseizuredisorder" , |
|
|
65 |
"Neurological","Thyroid","Endocrine","Renal","Gynecologic", |
|
|
66 |
"Emphysema","Pneumonia","Asthma","Pulmonary","Gout", |
|
|
67 |
"Degenerative","Rheumatoidarthritis","Musculoskeletal","Gallbladder", |
|
|
68 |
"Gerd","Liver","Gidisease","Hematologicdisorder","Bleedingdisorder", |
|
|
69 |
"Eye","Ent","Skin","other","Depression","Anxiety","Psychosis", |
|
|
70 |
"other2","Prostate","Infectious","Fever","pneumonia.1", |
|
|
71 |
"Chronicbronchitis","emphysema.1","COPD","Creactiveprotein", |
|
|
72 |
"CD8T","CD4T","NK","Bcell","Mono","Gran") |
|
|
73 |
} |
|
|
74 |
setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary/test/") |
|
|
75 |
save(test_meta_raw,file="test_meta_raw.Rdata") |
|
|
76 |
test_meta = test_meta_raw[,!colnames(test_meta_raw) %in% c("PACKS_SET","Omega3amount","Statinamount","Thiazidesamount","Diureticamount", |
|
|
77 |
"Potassiumamount" , "Aldosteroneamount" , "Amiodaroneamount", |
|
|
78 |
"Vasodilatorsamount","CoQ10amount","Betablockingamount", |
|
|
79 |
"AngiotensinIIantagonistsamount", "ACEIamount" , "Warfarinamount" , |
|
|
80 |
"Clopidogrelamount" , "Aspirinamount" , "Folicacidamount" ,"chddate" , |
|
|
81 |
"chfdate" ,"cvddate" ,"midate" ,"afxdate" ,"strokedate" ,"DATE8","lvh","cvd", |
|
|
82 |
"DATE9","aspirin.1","other_heart","other_peripheral_vascular_disease", |
|
|
83 |
"other_vascular_diagnosis","other","other2","pneumonia.1","emphysema.1")] |
|
|
84 |
} |
|
|
85 |
} |
|
|
86 |
|
|
|
87 |
#EHR |
|
|
88 |
{ |
|
|
89 |
#sex |
|
|
90 |
test_meta$Gender <- ifelse(test_meta$Gender == 1,"F","M") |
|
|
91 |
test_meta = test_meta[,-c(1,2)] |
|
|
92 |
{ |
|
|
93 |
library(table1) |
|
|
94 |
library(tableone) |
|
|
95 |
{ |
|
|
96 |
#0,1 feature |
|
|
97 |
c = c("Omega3" ,"Statin" , "Thiazides" ,"Diuretic" ,"Potassium" ,"Aldosterone", |
|
|
98 |
"Amiodarone","Vasodilators" , "CoQ10", "Betablocking", |
|
|
99 |
"AngiotensinIIantagonists" , "ACEI", "Warfarin","Clopidogrel" ,"Aspirin" , |
|
|
100 |
"Folicacid", |
|
|
101 |
"Coronaryheartdisease", "Myocardialinfarction", "Diabetes","Atrialfibrillation" , "Stroke" , |
|
|
102 |
"Smoking" ,"Leftventricularhypertrophy" ,"Treatedforhypertension", "Treatedforlipids" , |
|
|
103 |
"Drinkbeer", "Drinkwine" ,"Drinkliquor", |
|
|
104 |
"Atrialenlargement","Rightventricularhypertrophy" , |
|
|
105 |
"Rheumatic", "Aorticvalve","Mitralvalve" ,"Arrhythmia", |
|
|
106 |
"Dementia", "Parkinson", |
|
|
107 |
"Adultseizuredisorder","Neurological" , "Thyroid","Endocrine" , |
|
|
108 |
"Renal" , "Gynecologic", "Emphysema" , "Pneumonia" ,"Asthma","Pulmonary", |
|
|
109 |
"Gout" ,"Degenerative", "Rheumatoidarthritis","Musculoskeletal" , |
|
|
110 |
"Gallbladder" , "Gerd" , "Liver" , "Gidisease" ,"Hematologicdisorder" , |
|
|
111 |
"Bleedingdisorder" , "Eye" , "Ent" , "Skin", "Depression" ,"Anxiety" , |
|
|
112 |
"Psychosis" ,"Prostate" ,"Infectious", "Fever" , |
|
|
113 |
"Chronicbronchitis" , "COPD" ) |
|
|
114 |
for(i in c){ |
|
|
115 |
test_meta[,i]<- as.factor(test_meta[,i]) |
|
|
116 |
test_meta[,i]<- as.logical(test_meta[,i] == 1)} |
|
|
117 |
{ |
|
|
118 |
label(test_meta$Ejectionfraction) <- "Ejection fraction" |
|
|
119 |
label(test_meta$CoQ10) <- "CoQ 10" |
|
|
120 |
label(test_meta$Omega3) <- "Omega 3" |
|
|
121 |
label(test_meta$AngiotensinIIantagonists) <- "Angiotensin II antagonists" |
|
|
122 |
label(test_meta$Betablocking) <- "Beta blocking" |
|
|
123 |
label(test_meta$Coronaryheartdisease) <- "Coronary heart disease" |
|
|
124 |
label(test_meta$Myocardialinfarction) <- "Myocardial infarction" |
|
|
125 |
label(test_meta$Atrialfibrillation) <- "Atrial fibrillation" |
|
|
126 |
label(test_meta$Bloodglucose) <- "Blood glucose" |
|
|
127 |
label(test_meta$LDLcholesterol) <- "LDL cholesterol" |
|
|
128 |
label(test_meta$Numberofcigarettessmoked) <- "Number of cigarettes smoked" |
|
|
129 |
label(test_meta$Creatinineserum) <- "Creatinine serum" |
|
|
130 |
label(test_meta$Averagediastolicbloodpressure) <- "Average diastolic blood pressure" |
|
|
131 |
label(test_meta$Leftventricularhypertrophy) <- "Left ventricular hypertrophy" |
|
|
132 |
label(test_meta$Fastingbloodglucose) <- "Fasting blood glucose" |
|
|
133 |
label(test_meta$HDLcholesterol) <- "HDL cholesterol" |
|
|
134 |
label(test_meta$Averagesystolicbloodpressure) <- "Average systolic blood pressure" |
|
|
135 |
label(test_meta$Totalcholesterol) <- "Total cholesterol" |
|
|
136 |
label(test_meta$Ventricularrate) <- "Ventricular rate" |
|
|
137 |
label(test_meta$Treatedforhypertension) <- "Treated for hypertension" |
|
|
138 |
label(test_meta$Treatedforlipids) <- "Treated for lipids" |
|
|
139 |
label(test_meta$Drinkbeer) <- "Drink beer" |
|
|
140 |
label(test_meta$Drinkwine) <- "Drink wine" |
|
|
141 |
label(test_meta$Drinkliquor) <- "Drink liquor" |
|
|
142 |
label(test_meta$Albuminurine) <- "Albumin urine" |
|
|
143 |
label(test_meta$Creatinineurine) <- "Creatinine urine" |
|
|
144 |
label(test_meta$HemoglobinA1cwholeblood) <- "Hemoglobin A1c whole blood" |
|
|
145 |
label(test_meta$Atrialenlargement) <- "Atrial enlargement" |
|
|
146 |
label(test_meta$Rightventricularhypertrophy) <- "Right ventricular hypertrophy" |
|
|
147 |
label(test_meta$Aorticvalve) <- "Aortic valve" |
|
|
148 |
label(test_meta$Mitralvalve) <- "Mitral valve" |
|
|
149 |
label(test_meta$Adultseizuredisorder) <- "Adultseizure disorder" |
|
|
150 |
label(test_meta$Hematologicdisorder) <- "Hematologic disorder" |
|
|
151 |
label(test_meta$Bleedingdisorder) <- "Bleeding disorder" |
|
|
152 |
label(test_meta$Chronicbronchitis) <- "Chronic bronchitis" |
|
|
153 |
label(test_meta$Creactiveprotein) <- "C reactive protein" |
|
|
154 |
label(test_meta$CD8T) <- "CD8+ T cell" |
|
|
155 |
label(test_meta$CD4T) <- "CD4+ T cell" |
|
|
156 |
label(test_meta$NK) <- "Natural killer cell" |
|
|
157 |
label(test_meta$Bcell) <- "B cell" |
|
|
158 |
label(test_meta$Mono) <- "Monocyte cell" |
|
|
159 |
label(test_meta$Gran) <- "Granulocytes cell" |
|
|
160 |
} |
|
|
161 |
|
|
|
162 |
{ |
|
|
163 |
units(test_meta$Age) <- "years" |
|
|
164 |
units(test_meta$Sleep) <- "hours" |
|
|
165 |
units(test_meta$Numberofcigarettessmoked) <- "day" |
|
|
166 |
units(test_meta$CD8T) <- "proportions" |
|
|
167 |
units(test_meta$CD4T) <- "proportions" |
|
|
168 |
units(test_meta$NK) <- "proportions" |
|
|
169 |
units(test_meta$Bcell) <- "proportions" |
|
|
170 |
units(test_meta$Mono) <- "proportions" |
|
|
171 |
units(test_meta$Gran) <- "proportions" |
|
|
172 |
test_meta$Heartfailure <- factor(test_meta$Heartfailure, levels=c(0, 1, 2), labels=c("NoChf", "Chf", "P-value")) |
|
|
173 |
} |
|
|
174 |
rndr <- function(x, name, ...) { |
|
|
175 |
if (length(x) == 0) { |
|
|
176 |
y <- test_meta[[name]] |
|
|
177 |
s <- rep("", length(render.default(x=y, name=name, ...))) |
|
|
178 |
if (is.numeric(y)) { |
|
|
179 |
p <- wilcox.test(y ~ test_meta$Heartfailure)$p.value #wilcoxon = Mann-Whitney U |
|
|
180 |
} else {#t.test |
|
|
181 |
p <- chisq.test(table(y, droplevels(test_meta$Heartfailure)))$p.value |
|
|
182 |
} |
|
|
183 |
s[2] <- sub("<", "<", format.pval(p, digits=3, eps=0.001)) |
|
|
184 |
s |
|
|
185 |
} else { |
|
|
186 |
render.default(x=x, name=name, ...) |
|
|
187 |
} |
|
|
188 |
} |
|
|
189 |
rndr.strat <- function(label, n, ...) { |
|
|
190 |
ifelse(n==0, label, render.strat.default(label, n, ...)) |
|
|
191 |
} |
|
|
192 |
table1(~ Gender+Age+ |
|
|
193 |
CD8T+CD4T+NK+Bcell+Mono+Gran+ |
|
|
194 |
Smoking+Numberofcigarettessmoked+BMI+Hight+Waist+Weight+ |
|
|
195 |
Fastingbloodglucose+Bloodglucose+LDLcholesterol+HDLcholesterol+Totalcholesterol+ |
|
|
196 |
Triglycerides+Ventricularrate+Averagediastolicbloodpressure+Averagesystolicbloodpressure+ |
|
|
197 |
Treatedforhypertension+Treatedforlipids+ |
|
|
198 |
Ejectionfraction+Creactiveprotein+Creatinineserum+Creatinineurine+ |
|
|
199 |
Albuminurine+HemoglobinA1cwholeblood+ |
|
|
200 |
Drinkbeer+Drinkwine+Drinkliquor+Sleep+ |
|
|
201 |
Omega3+Statin+Thiazides+Diuretic+Potassium+Aldosterone+Amiodarone+Vasodilators+CoQ10+ |
|
|
202 |
Betablocking+AngiotensinIIantagonists+ACEI+Warfarin+Clopidogrel+Aspirin+Folicacid+ |
|
|
203 |
Coronaryheartdisease+Myocardialinfarction+Diabetes+Atrialfibrillation+ |
|
|
204 |
Stroke+Leftventricularhypertrophy+Atrialenlargement+Rightventricularhypertrophy+ |
|
|
205 |
Rheumatic+Aorticvalve+Mitralvalve+Arrhythmia+Dementia+Parkinson+Adultseizuredisorder+ |
|
|
206 |
Neurological+Thyroid+Endocrine+Renal+Gynecologic+Emphysema+Pneumonia+Asthma+Pulmonary+Gout+ |
|
|
207 |
Degenerative+Rheumatoidarthritis+Musculoskeletal+Gallbladder+Gerd+Liver+ |
|
|
208 |
Gidisease+Hematologicdisorder+Bleedingdisorder+Eye+Ent+Skin+Depression+ |
|
|
209 |
Anxiety+Psychosis+Prostate+Infectious+Fever+Chronicbronchitis+COPD | Heartfailure,data=test_meta, |
|
|
210 |
droplevels=F, render=rndr, render.strat=rndr.strat, overall=F) |
|
|
211 |
} |
|
|
212 |
|
|
|
213 |
} |
|
|
214 |
#rm- same of UMN |
|
|
215 |
{ |
|
|
216 |
#p value |
|
|
217 |
test_meta <- test_meta[,!colnames(test_meta) %in% c("Ejectionfraction", "Omega3", "Statin", |
|
|
218 |
"Thiazides", "Potassium", |
|
|
219 |
"Aldosterone", "Amiodarone", "Vasodilators", "CoQ10", |
|
|
220 |
"Warfarin", "Clopidogrel", "Folicacid", "Myocardialinfarction", "Stroke", |
|
|
221 |
"Numberofcigarettessmoked", "Smoking", "Leftventricularhypertrophy", "Hight", "Triglycerides", "Ventricularrate", |
|
|
222 |
"Drinkbeer", "Drinkwine", "Drinkliquor", "Sleep", "Creatinineurine", |
|
|
223 |
"Mitralvalve", "Arrhythmia", "Dementia", |
|
|
224 |
"Parkinson", "Adultseizuredisorder", "Neurological", |
|
|
225 |
"Thyroid", "Endocrine", "Renal", "Gynecologic", "Emphysema", |
|
|
226 |
"Pneumonia", "Asthma", "Pulmonary", "Gout", "Degenerative", |
|
|
227 |
"Musculoskeletal", "Gallbladder", "Gerd", "Liver", |
|
|
228 |
"Gidisease", "Hematologicdisorder", "Bleedingdisorder", |
|
|
229 |
"Eye", "Ent", "Skin", "Depression", "Anxiety", "Psychosis", |
|
|
230 |
"Prostate", "Infectious", "Fever", "Chronicbronchitis", |
|
|
231 |
"COPD","CD8T","CD4T","NK","Bcell","Mono","Gran")] |
|
|
232 |
#missing |
|
|
233 |
test_meta <- test_meta[,!colnames(test_meta) %in% c("Diabetes")] |
|
|
234 |
# all 0 |
|
|
235 |
test_meta <- test_meta[,!colnames(test_meta) %in% c("Rightventricularhypertrophy")] |
|
|
236 |
save(test_meta,file="test_meta.Rdata") |
|
|
237 |
} |
|
|
238 |
#impute |
|
|
239 |
{ |
|
|
240 |
library(tibble) |
|
|
241 |
library(impute) |
|
|
242 |
|
|
|
243 |
load(file="test_meta_raw.Rdata") |
|
|
244 |
load(file="test_meta.Rdata") |
|
|
245 |
test_meta <- test_meta_raw[,colnames(test_meta_raw) %in% c(colnames(test_meta))] |
|
|
246 |
#same feature is many NA (NA>1%) |
|
|
247 |
Patient_impute_test <- impute.knn(as.matrix(data.frame(t(test_meta)))) |
|
|
248 |
Patient_impute_test <- data.frame(t(Patient_impute_test$data)) |
|
|
249 |
#change 0,1, |
|
|
250 |
for(i in c("LDLcholesterol","Fastingbloodglucose","Atrialenlargement")){ |
|
|
251 |
Patient_impute_test[,i] <- round(Patient_impute_test[,i],0) |
|
|
252 |
} |
|
|
253 |
for(i in c("BMI","Waist","Albuminurine","Creactiveprotein")){ |
|
|
254 |
Patient_impute_test[,i] <- round(Patient_impute_test[,i],2) |
|
|
255 |
} |
|
|
256 |
write.csv(Patient_impute_test,"Patient_impute_test.csv") |
|
|
257 |
} |
|
|
258 |
Patient_impute_test <- read.table("Patient_impute_test.csv",sep=",",header = T,row.names = 1) |
|
|
259 |
colnames(Patient_impute_test)= c("Diuretic","Beta blocking", |
|
|
260 |
"Angiotensin II antagonists","ACEI", |
|
|
261 |
"Aspirin","Coronary heart disease","Heart failure", |
|
|
262 |
"Atrial fibrillation","Gender","Age","Blood glucose","BMI", |
|
|
263 |
"LDL cholesterol","Creatinine serum", |
|
|
264 |
"Average diastolic blood pressure","Fasting blood glucose", |
|
|
265 |
"HDL cholesterol","Average systolic blood pressure", |
|
|
266 |
"Total cholesterol","Waist","Weight", |
|
|
267 |
"Treated for hypertension","Treated for lipids", |
|
|
268 |
"Albumin urine","Hemoglobin a1c whole blood", |
|
|
269 |
"Atrial enlargement","Rheumatic","Aortic valve", |
|
|
270 |
"Rheumatoid arthritis","C reactive protein") |
|
|
271 |
##rm cor<0.8 feature same UMN |
|
|
272 |
Patient_impute_test <- Patient_impute_test[,!colnames(Patient_impute_test) %in% c("Blood glucose","LDL cholesterol","Waist","Weight")]#删除彼此相关性高,和chf相关性低的特征,还剩下41-5=36 |
|
|
273 |
write.csv(Patient_impute_test,"Patient_impute_test_25.csv") |
|
|
274 |
#norm |
|
|
275 |
{ |
|
|
276 |
#normalization-not 0,1 feature |
|
|
277 |
normalization<-function(x){ |
|
|
278 |
return((x-min(x))/(max(x)-min(x)))} |
|
|
279 |
#c= c("AGE8","BMI8","CREAT8","DBP8","FASTING_BG8","HDL8","SBP8","TC8,Albumin_urine","Hemoglobin_A1c_wholeblood","crp") |
|
|
280 |
for(i in c(10:17,20,21,26)){ |
|
|
281 |
Patient_impute_test[,i] <- normalization(Patient_impute_test[,i]) |
|
|
282 |
} |
|
|
283 |
write.csv(Patient_impute_test,"Patient_impute_test_25_nor.csv") |
|
|
284 |
|
|
|
285 |
} |
|
|
286 |
} |
|
|
287 |
|
|
|
288 |
#merge cpg and valid EHR |
|
|
289 |
{ |
|
|
290 |
#beta |
|
|
291 |
load(file= "CorrectedBeta.Rdata") |
|
|
292 |
new_beta_test = data.frame(t(CorrectedBeta)) |
|
|
293 |
new_beta_test[1:4,1:10] |
|
|
294 |
|
|
|
295 |
#valid EHR |
|
|
296 |
Patient_impute <- read.csv("Patient_impute_test_25_nor.csv",head=T,row.names = 1) |
|
|
297 |
#beta+ehr |
|
|
298 |
test_data <- cbind(Patient_impute,new_beta_test) |
|
|
299 |
save(test_data,file="test_data.Rdata") |
|
|
300 |
} |