--- a +++ b/5-pipeline-DMP_test.R @@ -0,0 +1,300 @@ +#merge EHR and CpG +{ + library(impute) + library(tibble) + library(dplyr) + library(impute) + #JHU + { + setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary") + #cell + load(file="test/CellFraction.Rdata") + load(file="HFpEF.Rdata") + data_cpg_test = filter(HFpEF,PACKS_SET == "JHU") + data_cpg_chf <- filter(data_cpg_test , chf == 1) + data_cpg_nochf <- filter(data_cpg_test , chf == 0) + data_cpg <- rbind(data_cpg_chf,data_cpg_nochf) + data_cpg$Sample_Name <- paste(rep(c("chf","control"),c(nrow(data_cpg_chf),nrow(data_cpg_nochf))), + c(c(1:nrow(data_cpg_chf)),c(1:nrow(data_cpg_nochf))),sep = "") + + test_meta <- data_cpg[,-c(4:7)] + test_meta <- test_meta[,c(128,1:127)] + test_meta_raw <- cbind(test_meta,CellFraction) + { + colnames(test_meta_raw)= c("Sample_Name","shareid", + "Ejectionfraction","PACKS_SET", + "Omega3","Omega3amount", + "Statin","Statinamount", + "Thiazides","Thiazidesamount", + "Diuretic","Diureticamount", + "Potassium","Potassiumamount", + "Aldosterone","Aldosteroneamount", + "Amiodarone","Amiodaroneamount", + "Vasodilators","Vasodilatorsamount", + "CoQ10","CoQ10amount", + "Betablocking","Betablockingamount", + "AngiotensinIIantagonists","AngiotensinIIantagonistsamount", + "ACEI","ACEIamount", + "Warfarin","Warfarinamount", + "Clopidogrel","Clopidogrelamount", + "Aspirin","Aspirinamount", + "Folicacid","Folicacidamount", + "cvd","Coronaryheartdisease","Heartfailure", + "chddate","chfdate","cvddate","midate", + "Myocardialinfarction","Diabetes", + "Atrialfibrillation","afxdate", + "Stroke","strokedate", + "DATE8","DATE9", + "Gender","Age","Bloodglucose","BMI","LDLcholesterol", + "Numberofcigarettessmoked","Creatinineserum", + "Smoking","Averagediastolicbloodpressure", + "Leftventricularhypertrophy", + "Fastingbloodglucose","HDLcholesterol","Hight", + "Averagesystolicbloodpressure", + "Totalcholesterol","Triglycerides" ,"Ventricularrate", + "Waist","Weight","Treatedforhypertension", + "Treatedforlipids","aspirin.1", + "Drinkbeer","Drinkwine","Drinkliquor","Sleep", + "Albuminurine", "Creatinineurine" , + "HemoglobinA1cwholeblood","Atrialenlargement", + "Rightventricularhypertrophy","lvh","Rheumatic", + "Aorticvalve","Mitralvalve","other_heart", + "Arrhythmia","other_peripheral_vascular_disease", + "other_vascular_diagnosis", + "Dementia","Parkinson","Adultseizuredisorder" , + "Neurological","Thyroid","Endocrine","Renal","Gynecologic", + "Emphysema","Pneumonia","Asthma","Pulmonary","Gout", + "Degenerative","Rheumatoidarthritis","Musculoskeletal","Gallbladder", + "Gerd","Liver","Gidisease","Hematologicdisorder","Bleedingdisorder", + "Eye","Ent","Skin","other","Depression","Anxiety","Psychosis", + "other2","Prostate","Infectious","Fever","pneumonia.1", + "Chronicbronchitis","emphysema.1","COPD","Creactiveprotein", + "CD8T","CD4T","NK","Bcell","Mono","Gran") + } + setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary/test/") + save(test_meta_raw,file="test_meta_raw.Rdata") + test_meta = test_meta_raw[,!colnames(test_meta_raw) %in% c("PACKS_SET","Omega3amount","Statinamount","Thiazidesamount","Diureticamount", + "Potassiumamount" , "Aldosteroneamount" , "Amiodaroneamount", + "Vasodilatorsamount","CoQ10amount","Betablockingamount", + "AngiotensinIIantagonistsamount", "ACEIamount" , "Warfarinamount" , + "Clopidogrelamount" , "Aspirinamount" , "Folicacidamount" ,"chddate" , + "chfdate" ,"cvddate" ,"midate" ,"afxdate" ,"strokedate" ,"DATE8","lvh","cvd", + "DATE9","aspirin.1","other_heart","other_peripheral_vascular_disease", + "other_vascular_diagnosis","other","other2","pneumonia.1","emphysema.1")] + } +} + +#EHR +{ + #sex + test_meta$Gender <- ifelse(test_meta$Gender == 1,"F","M") + test_meta = test_meta[,-c(1,2)] + { + library(table1) + library(tableone) + { + #0,1 feature + c = c("Omega3" ,"Statin" , "Thiazides" ,"Diuretic" ,"Potassium" ,"Aldosterone", + "Amiodarone","Vasodilators" , "CoQ10", "Betablocking", + "AngiotensinIIantagonists" , "ACEI", "Warfarin","Clopidogrel" ,"Aspirin" , + "Folicacid", + "Coronaryheartdisease", "Myocardialinfarction", "Diabetes","Atrialfibrillation" , "Stroke" , + "Smoking" ,"Leftventricularhypertrophy" ,"Treatedforhypertension", "Treatedforlipids" , + "Drinkbeer", "Drinkwine" ,"Drinkliquor", + "Atrialenlargement","Rightventricularhypertrophy" , + "Rheumatic", "Aorticvalve","Mitralvalve" ,"Arrhythmia", + "Dementia", "Parkinson", + "Adultseizuredisorder","Neurological" , "Thyroid","Endocrine" , + "Renal" , "Gynecologic", "Emphysema" , "Pneumonia" ,"Asthma","Pulmonary", + "Gout" ,"Degenerative", "Rheumatoidarthritis","Musculoskeletal" , + "Gallbladder" , "Gerd" , "Liver" , "Gidisease" ,"Hematologicdisorder" , + "Bleedingdisorder" , "Eye" , "Ent" , "Skin", "Depression" ,"Anxiety" , + "Psychosis" ,"Prostate" ,"Infectious", "Fever" , + "Chronicbronchitis" , "COPD" ) + for(i in c){ + test_meta[,i]<- as.factor(test_meta[,i]) + test_meta[,i]<- as.logical(test_meta[,i] == 1)} + { + label(test_meta$Ejectionfraction) <- "Ejection fraction" + label(test_meta$CoQ10) <- "CoQ 10" + label(test_meta$Omega3) <- "Omega 3" + label(test_meta$AngiotensinIIantagonists) <- "Angiotensin II antagonists" + label(test_meta$Betablocking) <- "Beta blocking" + label(test_meta$Coronaryheartdisease) <- "Coronary heart disease" + label(test_meta$Myocardialinfarction) <- "Myocardial infarction" + label(test_meta$Atrialfibrillation) <- "Atrial fibrillation" + label(test_meta$Bloodglucose) <- "Blood glucose" + label(test_meta$LDLcholesterol) <- "LDL cholesterol" + label(test_meta$Numberofcigarettessmoked) <- "Number of cigarettes smoked" + label(test_meta$Creatinineserum) <- "Creatinine serum" + label(test_meta$Averagediastolicbloodpressure) <- "Average diastolic blood pressure" + label(test_meta$Leftventricularhypertrophy) <- "Left ventricular hypertrophy" + label(test_meta$Fastingbloodglucose) <- "Fasting blood glucose" + label(test_meta$HDLcholesterol) <- "HDL cholesterol" + label(test_meta$Averagesystolicbloodpressure) <- "Average systolic blood pressure" + label(test_meta$Totalcholesterol) <- "Total cholesterol" + label(test_meta$Ventricularrate) <- "Ventricular rate" + label(test_meta$Treatedforhypertension) <- "Treated for hypertension" + label(test_meta$Treatedforlipids) <- "Treated for lipids" + label(test_meta$Drinkbeer) <- "Drink beer" + label(test_meta$Drinkwine) <- "Drink wine" + label(test_meta$Drinkliquor) <- "Drink liquor" + label(test_meta$Albuminurine) <- "Albumin urine" + label(test_meta$Creatinineurine) <- "Creatinine urine" + label(test_meta$HemoglobinA1cwholeblood) <- "Hemoglobin A1c whole blood" + label(test_meta$Atrialenlargement) <- "Atrial enlargement" + label(test_meta$Rightventricularhypertrophy) <- "Right ventricular hypertrophy" + label(test_meta$Aorticvalve) <- "Aortic valve" + label(test_meta$Mitralvalve) <- "Mitral valve" + label(test_meta$Adultseizuredisorder) <- "Adultseizure disorder" + label(test_meta$Hematologicdisorder) <- "Hematologic disorder" + label(test_meta$Bleedingdisorder) <- "Bleeding disorder" + label(test_meta$Chronicbronchitis) <- "Chronic bronchitis" + label(test_meta$Creactiveprotein) <- "C reactive protein" + label(test_meta$CD8T) <- "CD8+ T cell" + label(test_meta$CD4T) <- "CD4+ T cell" + label(test_meta$NK) <- "Natural killer cell" + label(test_meta$Bcell) <- "B cell" + label(test_meta$Mono) <- "Monocyte cell" + label(test_meta$Gran) <- "Granulocytes cell" + } + + { + units(test_meta$Age) <- "years" + units(test_meta$Sleep) <- "hours" + units(test_meta$Numberofcigarettessmoked) <- "day" + units(test_meta$CD8T) <- "proportions" + units(test_meta$CD4T) <- "proportions" + units(test_meta$NK) <- "proportions" + units(test_meta$Bcell) <- "proportions" + units(test_meta$Mono) <- "proportions" + units(test_meta$Gran) <- "proportions" + test_meta$Heartfailure <- factor(test_meta$Heartfailure, levels=c(0, 1, 2), labels=c("NoChf", "Chf", "P-value")) + } + rndr <- function(x, name, ...) { + if (length(x) == 0) { + y <- test_meta[[name]] + s <- rep("", length(render.default(x=y, name=name, ...))) + if (is.numeric(y)) { + p <- wilcox.test(y ~ test_meta$Heartfailure)$p.value #wilcoxon = Mann-Whitney U + } else {#t.test + p <- chisq.test(table(y, droplevels(test_meta$Heartfailure)))$p.value + } + s[2] <- sub("<", "<", format.pval(p, digits=3, eps=0.001)) + s + } else { + render.default(x=x, name=name, ...) + } + } + rndr.strat <- function(label, n, ...) { + ifelse(n==0, label, render.strat.default(label, n, ...)) + } + table1(~ Gender+Age+ + CD8T+CD4T+NK+Bcell+Mono+Gran+ + Smoking+Numberofcigarettessmoked+BMI+Hight+Waist+Weight+ + Fastingbloodglucose+Bloodglucose+LDLcholesterol+HDLcholesterol+Totalcholesterol+ + Triglycerides+Ventricularrate+Averagediastolicbloodpressure+Averagesystolicbloodpressure+ + Treatedforhypertension+Treatedforlipids+ + Ejectionfraction+Creactiveprotein+Creatinineserum+Creatinineurine+ + Albuminurine+HemoglobinA1cwholeblood+ + Drinkbeer+Drinkwine+Drinkliquor+Sleep+ + Omega3+Statin+Thiazides+Diuretic+Potassium+Aldosterone+Amiodarone+Vasodilators+CoQ10+ + Betablocking+AngiotensinIIantagonists+ACEI+Warfarin+Clopidogrel+Aspirin+Folicacid+ + Coronaryheartdisease+Myocardialinfarction+Diabetes+Atrialfibrillation+ + Stroke+Leftventricularhypertrophy+Atrialenlargement+Rightventricularhypertrophy+ + Rheumatic+Aorticvalve+Mitralvalve+Arrhythmia+Dementia+Parkinson+Adultseizuredisorder+ + Neurological+Thyroid+Endocrine+Renal+Gynecologic+Emphysema+Pneumonia+Asthma+Pulmonary+Gout+ + Degenerative+Rheumatoidarthritis+Musculoskeletal+Gallbladder+Gerd+Liver+ + Gidisease+Hematologicdisorder+Bleedingdisorder+Eye+Ent+Skin+Depression+ + Anxiety+Psychosis+Prostate+Infectious+Fever+Chronicbronchitis+COPD | Heartfailure,data=test_meta, + droplevels=F, render=rndr, render.strat=rndr.strat, overall=F) + } + + } + #rm- same of UMN + { + #p value + test_meta <- test_meta[,!colnames(test_meta) %in% c("Ejectionfraction", "Omega3", "Statin", + "Thiazides", "Potassium", + "Aldosterone", "Amiodarone", "Vasodilators", "CoQ10", + "Warfarin", "Clopidogrel", "Folicacid", "Myocardialinfarction", "Stroke", + "Numberofcigarettessmoked", "Smoking", "Leftventricularhypertrophy", "Hight", "Triglycerides", "Ventricularrate", + "Drinkbeer", "Drinkwine", "Drinkliquor", "Sleep", "Creatinineurine", + "Mitralvalve", "Arrhythmia", "Dementia", + "Parkinson", "Adultseizuredisorder", "Neurological", + "Thyroid", "Endocrine", "Renal", "Gynecologic", "Emphysema", + "Pneumonia", "Asthma", "Pulmonary", "Gout", "Degenerative", + "Musculoskeletal", "Gallbladder", "Gerd", "Liver", + "Gidisease", "Hematologicdisorder", "Bleedingdisorder", + "Eye", "Ent", "Skin", "Depression", "Anxiety", "Psychosis", + "Prostate", "Infectious", "Fever", "Chronicbronchitis", + "COPD","CD8T","CD4T","NK","Bcell","Mono","Gran")] + #missing + test_meta <- test_meta[,!colnames(test_meta) %in% c("Diabetes")] + # all 0 + test_meta <- test_meta[,!colnames(test_meta) %in% c("Rightventricularhypertrophy")] + save(test_meta,file="test_meta.Rdata") + } + #impute + { + library(tibble) + library(impute) + + load(file="test_meta_raw.Rdata") + load(file="test_meta.Rdata") + test_meta <- test_meta_raw[,colnames(test_meta_raw) %in% c(colnames(test_meta))] + #same feature is many NA (NA>1%) + Patient_impute_test <- impute.knn(as.matrix(data.frame(t(test_meta)))) + Patient_impute_test <- data.frame(t(Patient_impute_test$data)) + #change 0,1, + for(i in c("LDLcholesterol","Fastingbloodglucose","Atrialenlargement")){ + Patient_impute_test[,i] <- round(Patient_impute_test[,i],0) + } + for(i in c("BMI","Waist","Albuminurine","Creactiveprotein")){ + Patient_impute_test[,i] <- round(Patient_impute_test[,i],2) + } + write.csv(Patient_impute_test,"Patient_impute_test.csv") + } + Patient_impute_test <- read.table("Patient_impute_test.csv",sep=",",header = T,row.names = 1) + colnames(Patient_impute_test)= c("Diuretic","Beta blocking", + "Angiotensin II antagonists","ACEI", + "Aspirin","Coronary heart disease","Heart failure", + "Atrial fibrillation","Gender","Age","Blood glucose","BMI", + "LDL cholesterol","Creatinine serum", + "Average diastolic blood pressure","Fasting blood glucose", + "HDL cholesterol","Average systolic blood pressure", + "Total cholesterol","Waist","Weight", + "Treated for hypertension","Treated for lipids", + "Albumin urine","Hemoglobin a1c whole blood", + "Atrial enlargement","Rheumatic","Aortic valve", + "Rheumatoid arthritis","C reactive protein") + ##rm cor<0.8 feature same UMN + Patient_impute_test <- Patient_impute_test[,!colnames(Patient_impute_test) %in% c("Blood glucose","LDL cholesterol","Waist","Weight")]#删除彼此相关性高,和chf相关性低的特征,还剩下41-5=36 + write.csv(Patient_impute_test,"Patient_impute_test_25.csv") + #norm + { + #normalization-not 0,1 feature + normalization<-function(x){ + return((x-min(x))/(max(x)-min(x)))} + #c= c("AGE8","BMI8","CREAT8","DBP8","FASTING_BG8","HDL8","SBP8","TC8,Albumin_urine","Hemoglobin_A1c_wholeblood","crp") + for(i in c(10:17,20,21,26)){ + Patient_impute_test[,i] <- normalization(Patient_impute_test[,i]) + } + write.csv(Patient_impute_test,"Patient_impute_test_25_nor.csv") + + } +} + +#merge cpg and valid EHR +{ + #beta + load(file= "CorrectedBeta.Rdata") + new_beta_test = data.frame(t(CorrectedBeta)) + new_beta_test[1:4,1:10] + + #valid EHR + Patient_impute <- read.csv("Patient_impute_test_25_nor.csv",head=T,row.names = 1) + #beta+ehr + test_data <- cbind(Patient_impute,new_beta_test) + save(test_data,file="test_data.Rdata") +} \ No newline at end of file