Diff of /5-pipeline-DMP_test.R [000000] .. [d2c46b]

Switch to unified view

a b/5-pipeline-DMP_test.R
1
#merge EHR and CpG
2
{
3
  library(impute)
4
  library(tibble)
5
  library(dplyr)
6
  library(impute)
7
  #JHU
8
  {
9
    setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary")
10
    #cell 
11
    load(file="test/CellFraction.Rdata")
12
    load(file="HFpEF.Rdata")
13
    data_cpg_test = filter(HFpEF,PACKS_SET == "JHU")
14
    data_cpg_chf <- filter(data_cpg_test , chf == 1)
15
    data_cpg_nochf <- filter(data_cpg_test , chf == 0)
16
    data_cpg <- rbind(data_cpg_chf,data_cpg_nochf)
17
    data_cpg$Sample_Name <- paste(rep(c("chf","control"),c(nrow(data_cpg_chf),nrow(data_cpg_nochf))),
18
                                  c(c(1:nrow(data_cpg_chf)),c(1:nrow(data_cpg_nochf))),sep = "")
19
    
20
    test_meta  <- data_cpg[,-c(4:7)]
21
    test_meta <- test_meta[,c(128,1:127)]
22
    test_meta_raw <- cbind(test_meta,CellFraction)
23
    {
24
      colnames(test_meta_raw)= c("Sample_Name","shareid",
25
                                 "Ejectionfraction","PACKS_SET",
26
                                 "Omega3","Omega3amount",
27
                                 "Statin","Statinamount",
28
                                 "Thiazides","Thiazidesamount",
29
                                 "Diuretic","Diureticamount",
30
                                 "Potassium","Potassiumamount",
31
                                 "Aldosterone","Aldosteroneamount",
32
                                 "Amiodarone","Amiodaroneamount",
33
                                 "Vasodilators","Vasodilatorsamount",
34
                                 "CoQ10","CoQ10amount",
35
                                 "Betablocking","Betablockingamount",
36
                                 "AngiotensinIIantagonists","AngiotensinIIantagonistsamount",
37
                                 "ACEI","ACEIamount",
38
                                 "Warfarin","Warfarinamount",
39
                                 "Clopidogrel","Clopidogrelamount",
40
                                 "Aspirin","Aspirinamount",
41
                                 "Folicacid","Folicacidamount",
42
                                 "cvd","Coronaryheartdisease","Heartfailure",
43
                                 "chddate","chfdate","cvddate","midate",
44
                                 "Myocardialinfarction","Diabetes",
45
                                 "Atrialfibrillation","afxdate",
46
                                 "Stroke","strokedate",
47
                                 "DATE8","DATE9",
48
                                 "Gender","Age","Bloodglucose","BMI","LDLcholesterol",
49
                                 "Numberofcigarettessmoked","Creatinineserum",
50
                                 "Smoking","Averagediastolicbloodpressure",
51
                                 "Leftventricularhypertrophy",
52
                                 "Fastingbloodglucose","HDLcholesterol","Hight",
53
                                 "Averagesystolicbloodpressure",
54
                                 "Totalcholesterol","Triglycerides" ,"Ventricularrate",
55
                                 "Waist","Weight","Treatedforhypertension",
56
                                 "Treatedforlipids","aspirin.1",
57
                                 "Drinkbeer","Drinkwine","Drinkliquor","Sleep", 
58
                                 "Albuminurine", "Creatinineurine" ,
59
                                 "HemoglobinA1cwholeblood","Atrialenlargement",
60
                                 "Rightventricularhypertrophy","lvh","Rheumatic",
61
                                 "Aorticvalve","Mitralvalve","other_heart",
62
                                 "Arrhythmia","other_peripheral_vascular_disease",
63
                                 "other_vascular_diagnosis",
64
                                 "Dementia","Parkinson","Adultseizuredisorder" ,
65
                                 "Neurological","Thyroid","Endocrine","Renal","Gynecologic",
66
                                 "Emphysema","Pneumonia","Asthma","Pulmonary","Gout",
67
                                 "Degenerative","Rheumatoidarthritis","Musculoskeletal","Gallbladder",
68
                                 "Gerd","Liver","Gidisease","Hematologicdisorder","Bleedingdisorder",
69
                                 "Eye","Ent","Skin","other","Depression","Anxiety","Psychosis",
70
                                 "other2","Prostate","Infectious","Fever","pneumonia.1",
71
                                 "Chronicbronchitis","emphysema.1","COPD","Creactiveprotein",
72
                                 "CD8T","CD4T","NK","Bcell","Mono","Gran")
73
    }
74
    setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary/test/")
75
    save(test_meta_raw,file="test_meta_raw.Rdata")
76
    test_meta = test_meta_raw[,!colnames(test_meta_raw) %in%  c("PACKS_SET","Omega3amount","Statinamount","Thiazidesamount","Diureticamount",
77
                                                                "Potassiumamount" , "Aldosteroneamount" , "Amiodaroneamount",
78
                                                                "Vasodilatorsamount","CoQ10amount","Betablockingamount", 
79
                                                                "AngiotensinIIantagonistsamount", "ACEIamount" , "Warfarinamount" , 
80
                                                                "Clopidogrelamount" , "Aspirinamount" , "Folicacidamount" ,"chddate" ,
81
                                                                "chfdate" ,"cvddate" ,"midate" ,"afxdate" ,"strokedate" ,"DATE8","lvh","cvd",
82
                                                                "DATE9","aspirin.1","other_heart","other_peripheral_vascular_disease",
83
                                                                "other_vascular_diagnosis","other","other2","pneumonia.1","emphysema.1")]
84
  }
85
}
86
87
#EHR
88
{
89
  #sex
90
  test_meta$Gender <- ifelse(test_meta$Gender == 1,"F","M")
91
  test_meta = test_meta[,-c(1,2)]
92
  {
93
    library(table1)
94
    library(tableone)
95
    {
96
      #0,1 feature 
97
      c = c("Omega3" ,"Statin" , "Thiazides" ,"Diuretic" ,"Potassium" ,"Aldosterone",
98
            "Amiodarone","Vasodilators" , "CoQ10", "Betablocking",
99
            "AngiotensinIIantagonists" , "ACEI", "Warfarin","Clopidogrel" ,"Aspirin" , 
100
            "Folicacid",
101
            "Coronaryheartdisease", "Myocardialinfarction", "Diabetes","Atrialfibrillation" , "Stroke" ,
102
            "Smoking" ,"Leftventricularhypertrophy" ,"Treatedforhypertension", "Treatedforlipids" ,
103
            "Drinkbeer", "Drinkwine" ,"Drinkliquor",
104
            "Atrialenlargement","Rightventricularhypertrophy" ,
105
            "Rheumatic", "Aorticvalve","Mitralvalve" ,"Arrhythmia",
106
            "Dementia", "Parkinson",
107
            "Adultseizuredisorder","Neurological" , "Thyroid","Endocrine" , 
108
            "Renal"  , "Gynecologic", "Emphysema" , "Pneumonia" ,"Asthma","Pulmonary",
109
            "Gout" ,"Degenerative", "Rheumatoidarthritis","Musculoskeletal" ,
110
            "Gallbladder" , "Gerd" ,  "Liver"  , "Gidisease" ,"Hematologicdisorder"  ,
111
            "Bleedingdisorder" , "Eye"  , "Ent" , "Skin", "Depression"  ,"Anxiety" ,
112
            "Psychosis" ,"Prostate" ,"Infectious",  "Fever" , 
113
            "Chronicbronchitis" , "COPD" )
114
      for(i in c){
115
        test_meta[,i]<- as.factor(test_meta[,i])
116
        test_meta[,i]<- as.logical(test_meta[,i] == 1)}
117
      {
118
        label(test_meta$Ejectionfraction)    <- "Ejection fraction" 
119
        label(test_meta$CoQ10)    <- "CoQ 10" 
120
        label(test_meta$Omega3)    <- "Omega 3"
121
        label(test_meta$AngiotensinIIantagonists)    <- "Angiotensin II antagonists"
122
        label(test_meta$Betablocking)    <- "Beta blocking"
123
        label(test_meta$Coronaryheartdisease)    <- "Coronary heart disease" 
124
        label(test_meta$Myocardialinfarction)    <- "Myocardial infarction"
125
        label(test_meta$Atrialfibrillation)    <- "Atrial fibrillation" 
126
        label(test_meta$Bloodglucose)    <- "Blood glucose" 
127
        label(test_meta$LDLcholesterol)    <- "LDL cholesterol" 
128
        label(test_meta$Numberofcigarettessmoked)    <- "Number of cigarettes smoked"
129
        label(test_meta$Creatinineserum)    <- "Creatinine serum" 
130
        label(test_meta$Averagediastolicbloodpressure)    <- "Average diastolic blood pressure" 
131
        label(test_meta$Leftventricularhypertrophy)    <- "Left ventricular hypertrophy" 
132
        label(test_meta$Fastingbloodglucose)    <- "Fasting blood glucose" 
133
        label(test_meta$HDLcholesterol)    <- "HDL cholesterol" 
134
        label(test_meta$Averagesystolicbloodpressure)    <- "Average systolic blood pressure" 
135
        label(test_meta$Totalcholesterol)    <- "Total cholesterol" 
136
        label(test_meta$Ventricularrate)    <- "Ventricular rate" 
137
        label(test_meta$Treatedforhypertension)    <- "Treated for hypertension" 
138
        label(test_meta$Treatedforlipids)    <- "Treated for lipids" 
139
        label(test_meta$Drinkbeer)    <- "Drink beer" 
140
        label(test_meta$Drinkwine)    <- "Drink wine" 
141
        label(test_meta$Drinkliquor)    <- "Drink liquor" 
142
        label(test_meta$Albuminurine)    <- "Albumin urine" 
143
        label(test_meta$Creatinineurine)    <- "Creatinine urine" 
144
        label(test_meta$HemoglobinA1cwholeblood)    <- "Hemoglobin A1c whole blood" 
145
        label(test_meta$Atrialenlargement)    <- "Atrial enlargement" 
146
        label(test_meta$Rightventricularhypertrophy)    <- "Right ventricular hypertrophy" 
147
        label(test_meta$Aorticvalve)    <- "Aortic valve" 
148
        label(test_meta$Mitralvalve)    <- "Mitral valve" 
149
        label(test_meta$Adultseizuredisorder)    <- "Adultseizure disorder"
150
        label(test_meta$Hematologicdisorder)    <- "Hematologic disorder"
151
        label(test_meta$Bleedingdisorder)    <- "Bleeding disorder"
152
        label(test_meta$Chronicbronchitis)    <- "Chronic bronchitis"
153
        label(test_meta$Creactiveprotein)    <- "C reactive protein" 
154
        label(test_meta$CD8T)    <- "CD8+ T cell" 
155
        label(test_meta$CD4T)    <- "CD4+ T cell"
156
        label(test_meta$NK)    <- "Natural killer cell"
157
        label(test_meta$Bcell)    <- "B cell"
158
        label(test_meta$Mono)    <- "Monocyte cell"
159
        label(test_meta$Gran)    <- "Granulocytes cell" 
160
      }
161
      
162
      {
163
        units(test_meta$Age)      <- "years"
164
        units(test_meta$Sleep)      <- "hours"
165
        units(test_meta$Numberofcigarettessmoked)      <- "day"
166
        units(test_meta$CD8T)      <- "proportions"
167
        units(test_meta$CD4T)      <- "proportions"
168
        units(test_meta$NK)      <- "proportions"
169
        units(test_meta$Bcell)      <- "proportions"
170
        units(test_meta$Mono)      <- "proportions"
171
        units(test_meta$Gran)      <- "proportions"
172
        test_meta$Heartfailure <- factor(test_meta$Heartfailure, levels=c(0, 1, 2), labels=c("NoChf", "Chf", "P-value"))
173
      }
174
      rndr <- function(x, name, ...) {
175
        if (length(x) == 0) {
176
          y <- test_meta[[name]]
177
          s <- rep("", length(render.default(x=y, name=name, ...)))
178
          if (is.numeric(y)) {
179
            p <- wilcox.test(y ~ test_meta$Heartfailure)$p.value #wilcoxon = Mann-Whitney U
180
          } else {#t.test
181
            p <- chisq.test(table(y, droplevels(test_meta$Heartfailure)))$p.value
182
          }
183
          s[2] <- sub("<", "&lt;", format.pval(p, digits=3, eps=0.001))
184
          s
185
        } else {
186
          render.default(x=x, name=name, ...)
187
        }
188
      }
189
      rndr.strat <- function(label, n, ...) {
190
        ifelse(n==0, label, render.strat.default(label, n, ...))
191
      }
192
      table1(~ Gender+Age+
193
               CD8T+CD4T+NK+Bcell+Mono+Gran+
194
               Smoking+Numberofcigarettessmoked+BMI+Hight+Waist+Weight+
195
               Fastingbloodglucose+Bloodglucose+LDLcholesterol+HDLcholesterol+Totalcholesterol+
196
               Triglycerides+Ventricularrate+Averagediastolicbloodpressure+Averagesystolicbloodpressure+
197
               Treatedforhypertension+Treatedforlipids+
198
               Ejectionfraction+Creactiveprotein+Creatinineserum+Creatinineurine+
199
               Albuminurine+HemoglobinA1cwholeblood+  
200
               Drinkbeer+Drinkwine+Drinkliquor+Sleep+
201
               Omega3+Statin+Thiazides+Diuretic+Potassium+Aldosterone+Amiodarone+Vasodilators+CoQ10+
202
               Betablocking+AngiotensinIIantagonists+ACEI+Warfarin+Clopidogrel+Aspirin+Folicacid+
203
               Coronaryheartdisease+Myocardialinfarction+Diabetes+Atrialfibrillation+
204
               Stroke+Leftventricularhypertrophy+Atrialenlargement+Rightventricularhypertrophy+
205
               Rheumatic+Aorticvalve+Mitralvalve+Arrhythmia+Dementia+Parkinson+Adultseizuredisorder+
206
               Neurological+Thyroid+Endocrine+Renal+Gynecologic+Emphysema+Pneumonia+Asthma+Pulmonary+Gout+
207
               Degenerative+Rheumatoidarthritis+Musculoskeletal+Gallbladder+Gerd+Liver+
208
               Gidisease+Hematologicdisorder+Bleedingdisorder+Eye+Ent+Skin+Depression+
209
               Anxiety+Psychosis+Prostate+Infectious+Fever+Chronicbronchitis+COPD | Heartfailure,data=test_meta, 
210
             droplevels=F, render=rndr, render.strat=rndr.strat, overall=F)
211
    }
212
    
213
  }
214
  #rm- same of UMN
215
  {
216
    #p value
217
    test_meta <- test_meta[,!colnames(test_meta)  %in% c("Ejectionfraction", "Omega3", "Statin", 
218
                                                         "Thiazides", "Potassium",
219
                                                         "Aldosterone", "Amiodarone", "Vasodilators", "CoQ10",
220
                                                         "Warfarin", "Clopidogrel", "Folicacid", "Myocardialinfarction", "Stroke",
221
                                                         "Numberofcigarettessmoked", "Smoking", "Leftventricularhypertrophy", "Hight", "Triglycerides", "Ventricularrate",
222
                                                         "Drinkbeer", "Drinkwine", "Drinkliquor", "Sleep", "Creatinineurine",
223
                                                         "Mitralvalve", "Arrhythmia", "Dementia",
224
                                                         "Parkinson", "Adultseizuredisorder", "Neurological", 
225
                                                         "Thyroid", "Endocrine", "Renal", "Gynecologic", "Emphysema",
226
                                                         "Pneumonia", "Asthma", "Pulmonary", "Gout", "Degenerative",
227
                                                         "Musculoskeletal", "Gallbladder", "Gerd", "Liver",
228
                                                         "Gidisease", "Hematologicdisorder", "Bleedingdisorder",
229
                                                         "Eye", "Ent", "Skin", "Depression", "Anxiety", "Psychosis",
230
                                                         "Prostate", "Infectious", "Fever", "Chronicbronchitis",
231
                                                         "COPD","CD8T","CD4T","NK","Bcell","Mono","Gran")]
232
    #missing
233
    test_meta <- test_meta[,!colnames(test_meta)  %in% c("Diabetes")]
234
    # all 0
235
    test_meta <- test_meta[,!colnames(test_meta)  %in% c("Rightventricularhypertrophy")]
236
    save(test_meta,file="test_meta.Rdata")
237
  }
238
  #impute
239
  {
240
    library(tibble)
241
    library(impute)
242
    
243
    load(file="test_meta_raw.Rdata")
244
    load(file="test_meta.Rdata")
245
    test_meta <- test_meta_raw[,colnames(test_meta_raw) %in% c(colnames(test_meta))]
246
    #same feature is many NA (NA>1%)
247
    Patient_impute_test <- impute.knn(as.matrix(data.frame(t(test_meta))))
248
    Patient_impute_test <- data.frame(t(Patient_impute_test$data))
249
    #change 0,1,
250
    for(i in c("LDLcholesterol","Fastingbloodglucose","Atrialenlargement")){
251
      Patient_impute_test[,i] <- round(Patient_impute_test[,i],0)
252
    }
253
    for(i in c("BMI","Waist","Albuminurine","Creactiveprotein")){
254
      Patient_impute_test[,i] <- round(Patient_impute_test[,i],2)
255
    }
256
    write.csv(Patient_impute_test,"Patient_impute_test.csv")
257
  }
258
  Patient_impute_test <- read.table("Patient_impute_test.csv",sep=",",header = T,row.names = 1)
259
  colnames(Patient_impute_test)= c("Diuretic","Beta blocking",
260
                                   "Angiotensin II antagonists","ACEI",
261
                                   "Aspirin","Coronary heart disease","Heart failure",
262
                                   "Atrial fibrillation","Gender","Age","Blood glucose","BMI",
263
                                   "LDL cholesterol","Creatinine serum",
264
                                   "Average diastolic blood pressure","Fasting blood glucose",
265
                                   "HDL cholesterol","Average systolic blood pressure",
266
                                   "Total cholesterol","Waist","Weight",
267
                                   "Treated for hypertension","Treated for lipids",
268
                                   "Albumin urine","Hemoglobin a1c whole blood",
269
                                   "Atrial enlargement","Rheumatic","Aortic valve",
270
                                   "Rheumatoid arthritis","C reactive protein")
271
  ##rm cor<0.8 feature same UMN
272
  Patient_impute_test <- Patient_impute_test[,!colnames(Patient_impute_test) %in% c("Blood glucose","LDL cholesterol","Waist","Weight")]#删除彼此相关性高,和chf相关性低的特征,还剩下41-5=36
273
  write.csv(Patient_impute_test,"Patient_impute_test_25.csv")
274
  #norm
275
  {
276
    #normalization-not 0,1 feature
277
    normalization<-function(x){
278
      return((x-min(x))/(max(x)-min(x)))}
279
    #c= c("AGE8","BMI8","CREAT8","DBP8","FASTING_BG8","HDL8","SBP8","TC8,Albumin_urine","Hemoglobin_A1c_wholeblood","crp")
280
    for(i in c(10:17,20,21,26)){ 
281
      Patient_impute_test[,i] <- normalization(Patient_impute_test[,i])
282
    }
283
    write.csv(Patient_impute_test,"Patient_impute_test_25_nor.csv")
284
    
285
  }
286
}
287
288
#merge cpg and valid EHR
289
{
290
  #beta
291
  load(file= "CorrectedBeta.Rdata")
292
  new_beta_test = data.frame(t(CorrectedBeta))
293
  new_beta_test[1:4,1:10]
294
  
295
  #valid EHR
296
  Patient_impute <- read.csv("Patient_impute_test_25_nor.csv",head=T,row.names = 1)
297
  #beta+ehr
298
  test_data <- cbind(Patient_impute,new_beta_test)
299
  save(test_data,file="test_data.Rdata")
300
}