--- a +++ b/1-all_clin.R @@ -0,0 +1,585 @@ +# 20210410 +# Pre-processing of samples +# Get the sample from inside the phs000724, then get the sample with qualified quality and do the grouping of ejection fraction +# The latter will only do HFpEF analysis +# Then get the clinical features that we are concerned about more than 90 +# Then get the time, get the exam8, eight years after the disease sample and control + + +library(rlang) +library(dplyr) +library("readxl") + +#data +{ + setwd("H:/dbgap_CHD/ChildStudyConsentSet_phs000724.Framingham.v7.p10.c1.HMB-IRB-MDS/PhenotypeFiles") + c1=read.table("phs000724.v7.pht004246.v2.p11.c1.Framingham_DNA_Methylation_Sample_Attributes_I.HMB-IRB-MDS.txt",sep="\t",header = T) + setwd("H:/dbgap_CHD/ChildStudyConsentSet_phs000724.Framingham.v7.p10.c2.HMB-IRB-NPU-MDS/PhenotypeFiles") + c2=read.table("phs000724.v7.pht004246.v2.p11.c2.Framingham_DNA_Methylation_Sample_Attributes_I.HMB-IRB-NPU-MDS.txt",sep="\t",header = T) + data1 <- rbind(c1,c2) + data1$SAMPID=strsplit(as.character(data1$SAMPID), "_724") + data1 = data1[,c(2,3,8)] + data1 <- filter(data1, PACKS_SET !="GEN3") + data1 <- filter(data1, LABID !="") + data1$SAMPID <- as.character(data1$SAMPID)#2725 + + setwd("H:/dbgap_CHD/ChildStudyConsentSet_phs000724.Framingham.v7.p10.c1.HMB-IRB-MDS/PhenotypeFiles") + c1=read.table("phs000724.v7.pht004247.v2.p11.c1.Framingham_DNA_Methylation_Sample_Attributes_II.HMB-IRB-MDS.txt",sep="\t",header = T) + setwd("H:/dbgap_CHD/ChildStudyConsentSet_phs000724.Framingham.v7.p10.c2.HMB-IRB-NPU-MDS/PhenotypeFiles") + c2=read.table("phs000724.v7.pht004247.v2.p11.c2.Framingham_DNA_Methylation_Sample_Attributes_II.HMB-IRB-NPU-MDS.txt",sep="\t",header = T) + data2 <- rbind(c1,c2) + data2$SAMPID=strsplit(as.character(data2$SAMPID), "_724") + data2 = data2[,c(2,3,5,11)] + data2$SAMPID = as.character(data2$SAMPID)#2782 + colnames(data2)[3] = "Sample_Well" + data = merge(data1,data2,by="SAMPID",all.x=TRUE,all.y=TRUE)#2782;data2 have rep, data1 no rep +} +#Pre-processing data +{ + #qc-17 + data2_QC = filter(data2,QC_Comment != "") + QC = unique(as.character(data2_QC$SAMPID)) + #rep-29 + index<-duplicated(data2$SAMPID) + data2_index <-data2[index,] + rep = unique(as.character(data2_index$SAMPID)) + data = filter(data, !SAMPID %in% c(rep,QC))#2626 + data$Sentrix_ID <- unlist(lapply(as.character(data$LABID.y),function(x){strsplit(x,'_')[[1]][[1]]})) + #slide: data$Sentrix_ID=gsub("_[[:alnum:]]*", "", data$LABID.y) + data$Sentrix_Position <- unlist(lapply(as.character(data$LABID.y),function(x){strsplit(x,'_')[[1]][[2]]})) + #arrayPos:data$Sentrix_Position=gsub("[0-9]*_", "", data$LABID.y) + data = data[,c(1,3,5,6,7,8)] #2626 +} +save(data,file="data.Rdata") +#EF +{ + #c1 + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + echo <- read_excel("phs000007.v30.pht002572.v6.p11.c1.t_echo_2008_m_0549s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + echo <- data.frame(echo) + echo <- echo[,c(2,38,123)] + #c2 + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + echo_c2 <- read_excel("phs000007.v30.pht002572.v6.p11.c2.t_echo_2008_m_0549s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + echo_c2 <- data.frame(echo_c2) + echo_c2 <- echo_c2[,c(2,38,123)] + #rbind + echo <- rbind(echo,echo_c2) + colnames(echo)[2] <- c("EF") + colnames(echo)[3] <- c("LVSF") + echo_1 <- filter(echo,EF<40) + echo_2 <- filter(echo,EF>50) + echo_3 <- filter(echo,EF<50 & EF>40) +} +HFpEF = merge(echo_2[,1:2],data,by.x="shareid",by.y="SAMPID")#20 +HFrEF = merge(echo_1[,1:2],data,by.x="shareid",by.y="SAMPID")#2334 +HFmrEF = merge(echo_3[,1:2],data,by.x="shareid",by.y="SAMPID")#38 +save(HFpEF,file="HFpEF.Rdata") +#clinical +{ + #cvd + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + cvd_c1 <- read_excel("phs000007.v30.pht003316.v7.p11.c1.vr_survcvd_2014_a_1023s.HMB-IRB-MDS.xlsx",sheet=1, na = "NA", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + cvd_c2 <- read_excel("phs000007.v30.pht003316.v7.p11.c2.vr_survcvd_2014_a_1023s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "NA", skip = 10) + cvd = rbind(cvd_c1,cvd_c2) + cvd <- data.frame(cvd) + cvd <- cvd[,-c(1,3)] + head(cvd) + #mi + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + MI_c1 <- read_excel("phs000007.v30.pht000309.v13.p11.c1.vr_soe_2016_a_1073s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + MI_c2 <- read_excel("phs000007.v30.pht000309.v13.p11.c2.vr_soe_2016_a_1073s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + MI = rbind(MI_c1,MI_c2) + MI <- MI[,-c(1,4,5,6,8)] + MIyes <-filter(MI , EVENT == "1" | EVENT == "2" | EVENT == "3" ) + MIyes <- MIyes[,-2] + MIyes <- MIyes[!duplicated(MIyes$shareid), ] + MIyes$mi <- rep(1,nrow(MIyes)) + head(MIyes) + colnames(MIyes)[2] ="midate" + tmp1=merge(cvd,MIyes,by="shareid",all.x=TRUE,all.y=TRUE) + tmp1$mi <- ifelse(is.na(tmp1$mi) == "FALSE" ,"1","0") + #diba + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + DIAB_c1 <- read_excel("phs000007.v30.pht000041.v7.p11.c1.vr_diab_ex09_1_1002s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + DIAB_c1 <- data.frame(DIAB_c1) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + DIAB_c2 <- read_excel("phs000007.v30.pht000041.v7.p11.c2.vr_diab_ex09_1_1002s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + DIAB_c2 <- data.frame(DIAB_c2) + DIAB = rbind(DIAB_c1,DIAB_c2) + head(DIAB) + DIAByes <- filter(DIAB,DIAB$CURR_DIAB1 == 1| DIAB$CURR_DIAB2 == 1|DIAB$CURR_DIAB3 == 1|DIAB$CURR_DIAB4 == 1|DIAB$CURR_DIAB5 == 1| DIAB$CURR_DIAB6 == 1| DIAB$CURR_DIAB7 == 1 | DIAB$CURR_DIAB8 == 1) + DIAByes <- DIAByes[,c(2,11)] + DIAByes[,2] <- "1" + + DIABno <- filter(DIAB,DIAB$CURR_DIAB1 != 1 & DIAB$CURR_DIAB2 != 1 & DIAB$CURR_DIAB3 != 1 & DIAB$CURR_DIAB4 != 1 & DIAB$CURR_DIAB5 != 1 & DIAB$CURR_DIAB6 != 1 & DIAB$CURR_DIAB7 != 1 & DIAB$CURR_DIAB8 != 1) + DIABno <- DIABno[,c(2,11)] + DIABno[,2] <- "0" + + DIAB <- rbind(DIAByes,DIABno) + DIAB <- DIAB[!duplicated(DIAB$shareid), ] + tmp2 = merge(tmp1,DIAB,by="shareid",all.x=TRUE,all.y=TRUE) + #afx + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + afx_c1 <- read_excel("phs000007.v30.pht003315.v7.p11.c1.vr_survaf_2014_a_0987s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + afx_c2 <- read_excel("phs000007.v30.pht003315.v7.p11.c2.vr_survaf_2014_a_0987s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + afx = rbind(afx_c1,afx_c2) + afx <- data.frame(afx) + afx <- afx[,c(2,4,5)] + head(afx) + tmp3 = merge(tmp2,afx,by="shareid",all.x=TRUE,all.y=TRUE) + #stroke + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + stroke_c1 <- read_excel("phs000007.v30.pht006023.v2.p11.c1.vr_survstk_2014_a_1031s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + stroke_c2 <- read_excel("phs000007.v30.pht006023.v2.p11.c2.vr_survstk_2014_a_1031s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + stroke = rbind(stroke_c1,stroke_c2) + stroke <- data.frame(stroke) + stroke <- stroke[,c(2,4,7)] + head(stroke) + tmp4 = merge(tmp3,stroke,by="shareid",all.x=TRUE,all.y=TRUE) + #meta1 + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta1_c1 <- read_excel("phs000007.v30.pht006027.v2.p11.c1.vr_wkthru_ex09_1_1001s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta1_c2 <- read_excel("phs000007.v30.pht006027.v2.p11.c2.vr_wkthru_ex09_1_1001s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + meta1 = rbind(meta1_c1,meta1_c2) + c=c(1,206,207,3,20,29,38,47,56,62,71,80,89,96,105,114,128,137,146,155,161,170,188,197) #extract exam8 all information + meta1 <- meta1[,c+1] + head(meta1) + meta1 <- data.frame(meta1) + normalization<-function(x){ + return((x-min(x))/(max(x)-min(x)))} + meta1$SEX <- normalization(meta1$SEX) + tmp5 = merge(tmp4,meta1,by="shareid",all.x=TRUE,all.y=TRUE) + #meta2 + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta2_c1 <- read_excel("phs000007.v30.pht000747.v6.p11.c1.ex1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta2_c2 <- read_excel("phs000007.v30.pht000747.v6.p11.c2.ex1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + meta2 = rbind(meta2_c1,meta2_c2) + c=c("shareid","H010","H071","H074","H077","H480") + meta2 <- meta2[,colnames(meta2) %in% c] + names <- c("shareid","aspirin","beer","wine","liquor","sleep") + colnames(meta2) <- names + tmp6 = merge(tmp5,meta2,by="shareid",all.x=TRUE,all.y=TRUE) + #meta3 + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta3_other2_c1 <- read_excel("phs000007.v30.pht000742.v6.p11.c1.fhslab1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta3_other2_c2 <- read_excel("phs000007.v30.pht000742.v6.p11.c2.fhslab1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + meta3_other2 = rbind(meta3_other2_c1,meta3_other2_c2) + meta3_other2 <- meta3_other2[,c(2,10:12)] + colnames(meta3_other2)[1] <- c("SAMPID") + colnames(meta3_other2)[2] <- c("Albumin_urine") + colnames(meta3_other2)[3] <- c("Creatinine_urine") + colnames(meta3_other2)[4] <- c("Hemoglobin_A1c_wholeblood") + meta3_other2 <- data.frame(meta3_other2) + head(meta3_other2) + tmp7 = merge(tmp6,meta3_other2,by.x = "shareid",by.y="SAMPID",all.x=TRUE,all.y=TRUE) + #meta4 + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta3_other1_c1 <- read_excel("phs000007.v30.pht000747.v6.p11.c1.ex1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + meta3_other1_c2 <- read_excel("phs000007.v30.pht000747.v6.p11.c2.ex1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + meta3_other1 = rbind(meta3_other1_c1,meta3_other1_c2) + meta3_other1 <- meta3_other1[,-1] + meta3_other1 <- data.frame(meta3_other1) + meta3_other1 <- meta3_other1[,c(1,251:292,450,451,453,456,459)] + c <- c("ATRIAL_ENLARGEMENT","RVH","LVH","RHEUMATIC","AORTIC_VALVE","MITRAL_VALVE","OTHER_HEART", + "ARRHYTHMIA","OTHER_PERIPHERAL_VASCULAR_DISEASE","OTHER_VASCULAR_DIAGNOSIS","DEMENTIA", + "PARKINSON","ADULT_SEIZURE_DISORDER","NEUROLOGICAL","THYROID","ENDOCRINE","RENAL", + "GYNECOLOGIC","EMPHYSEMA","PNEUMONIA","ASTHMA","PULMONARY","GOUT","DEGENERATIVE","RHEUMATOID_ARTHRITIS", + "MUSCULOSKELETAL","GALLBLADDER","GERD","LIVER","GI_DISEASE","HEMATOLOGIC_DISORDER","BLEEDING_DISORDER","EYE", + "ENT","SKIN","OTHER","DEPRESSION","ANXIETY","PSYCHOSIS","OTHER2","PROSTATE","INFECTIOUS", + "FEVER","PNEUMONIA","CHRONIC_BRONCHITIS","EMPHYSEMA","COPD") + colnames(meta3_other1)[2:48] <- tolower(c) + meta3_other1[1:10,1:10] + colnames(meta3_other1)[1] <- c("SAMPID") + #全部变为0,1的;去除2,3的 + for(i in 2:ncol(meta3_other1)){ + meta3_other1[,i] <- ifelse(meta3_other1[,i] == 0 ,0,1) + } + tmp8 = merge(tmp7,meta3_other1,by.x = "shareid",by.y="SAMPID",all.x=TRUE,all.y=TRUE) + #crp + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + CRP_c1 <- read_excel("phs000007.v30.pht002888.v5.p11.c1.l_crp_2008_m_0477s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + CRP_c1 = CRP_c1[,-5] + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + CRP_c2 <- read_excel("phs000007.v30.pht002888.v5.p11.c2.l_crp_2008_m_0477s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + CRP = rbind(CRP_c1,CRP_c2) + CRP <- CRP[,c(2,4)] + head(CRP) + tmp9 = merge(tmp8,CRP,by="shareid",all.x=TRUE,all.y=TRUE) + #medicine + { + #FOLIC + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + FOLIC_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + FOLIC_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + FOLIC = rbind(FOLIC_c1,FOLIC_c2) + FOLIC <- data.frame(FOLIC) + FOLIC <- FOLIC[,c(2,10:17)] + FOLIC <- filter(FOLIC, phrm_gp1 == "VITAMIN B12 AND FOLIC ACID" & chem_gp1 == "Folic acid and derivatives") + + data <- merge(FOLIC ,tmp9,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "Folic_acid" + colnames(data)[3] = "Folic_acid_amount" + data$Folic_acid_amount <- ifelse(data$Folic_acid == 0 ,"0",data$Folic_acid_amount) + } + #aspirin-only cvd realtivel + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + aspirin_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + aspirin_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + aspirin = rbind(aspirin_c1,aspirin_c2) + aspirin <- data.frame(aspirin) + aspirin <- aspirin[,c(2,10:17)] + aspirin <- filter(aspirin, system1 == "BLOOD AND BLOOD FORMING ORGANS" & + ther_gp1 == "ANTITHROMBOTIC AGENTS" & + chem_gp1 == "Platelet aggregation inhibitors excl. heparin" & chem_nm1 == "ACETYLSALICYLIC ACID") + + data <- merge(aspirin ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "aspirin" + colnames(data)[3] = "aspirin_amount" + data$aspirin_amount <- ifelse(data$aspirin == 0 ,"0",data$aspirin_amount) + } + #clopidogrel + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + clopidogrel_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + clopidogrel_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + clopidogrel = rbind(clopidogrel_c1,clopidogrel_c2) + clopidogrel <- data.frame(clopidogrel) + clopidogrel <- clopidogrel[,c(2,10:17)] + clopidogrel <- filter(clopidogrel, system1 == "BLOOD AND BLOOD FORMING ORGANS" & + ther_gp1 == "ANTITHROMBOTIC AGENTS" & + chem_gp1 == "Platelet aggregation inhibitors excl. heparin" & chem_nm1 == "CLOPIDOGREL") + + data <- merge(clopidogrel ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "clopidogrel" + colnames(data)[3] = "clopidogrel_amount" + data$clopidogrel_amount <- ifelse(data$clopidogrel == 0 ,"0",data$clopidogrel_amount) + } + #warfarin + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + warfarin_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + warfarin_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + warfarin = rbind(warfarin_c1,warfarin_c2) + warfarin <- data.frame(warfarin) + warfarin <- warfarin[,c(2,10:17)] + warfarin <- filter(warfarin, system1 == "BLOOD AND BLOOD FORMING ORGANS" & + ther_gp1 == "ANTITHROMBOTIC AGENTS" & + chem_gp1 == "Vitamin K antagonists" ) + + data <- merge(warfarin ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "warfarin" + colnames(data)[3] = "warfarin_amount" + data$warfarin_amount <- ifelse(data$warfarin == 0 ,"0",data$warfarin_amount) + } + #ACEI + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + ACEI_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + ACEI_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + ACEI = rbind(ACEI_c1,ACEI_c2) + ACEI <- data.frame(ACEI) + ACEI <- ACEI[,c(2,10:17)] + ACEI <- filter(ACEI, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "AGENTS ACTING ON THE RENIN-ANGIOTENSIN SYSTEM" & + phrm_gp1 == "ACE INHIBITORS, PLAIN" ) + + data <- merge(ACEI ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "ACEI" + colnames(data)[3] = "ACEI_amount" + data$ACEI_amount <- ifelse(data$ACEI == 0 ,"0",data$ACEI_amount) + } + #Angiotensin_II_antagonists + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Angiotensin_II_antagonists_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Angiotensin_II_antagonists_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + Angiotensin_II_antagonists = rbind(Angiotensin_II_antagonists_c1,Angiotensin_II_antagonists_c2) + Angiotensin_II_antagonists <- data.frame(Angiotensin_II_antagonists) + Angiotensin_II_antagonists <- Angiotensin_II_antagonists[,c(2,10:17)] + Angiotensin_II_antagonists <- filter(Angiotensin_II_antagonists, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "AGENTS ACTING ON THE RENIN-ANGIOTENSIN SYSTEM" & + phrm_gp1 == "ANGIOTENSIN II ANTAGONISTS, PLAIN" ) + + data <- merge(Angiotensin_II_antagonists ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "Angiotensin_II_antagonists" + colnames(data)[3] = "Angiotensin_II_antagonists_amount" + data$Angiotensin_II_antagonists_amount <- ifelse(data$Angiotensin_II_antagonists == 0 ,"0",data$Angiotensin_II_antagonists_amount) + } + #Beta_blocking + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Beta_blocking_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Beta_blocking_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + Beta_blocking = rbind(Beta_blocking_c1,Beta_blocking_c2) + Beta_blocking <- data.frame(Beta_blocking) + Beta_blocking <- Beta_blocking[,c(2,10:17)] + Beta_blocking <- filter(Beta_blocking, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "BETA BLOCKING AGENTS" ) + + data <- merge(Beta_blocking ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "Beta_blocking" + colnames(data)[3] = "Beta_blocking_amount" + data$Beta_blocking_amount <- ifelse(data$Beta_blocking == 0 ,"0",data$Beta_blocking_amount) + + } + #CO_Q_10 + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + CO_Q_10_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + CO_Q_10_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + CO_Q_10 = rbind(CO_Q_10_c1,CO_Q_10_c2) + CO_Q_10 <- data.frame(CO_Q_10) + CO_Q_10 <- CO_Q_10[,c(2,10:17)] + CO_Q_10 <- filter(CO_Q_10, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "CARDIAC THERAPY" & + phrm_gp1 == "OTHER CARDIAC PREPARATIONS") + + data <- merge(CO_Q_10 ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "CO_Q_10" + colnames(data)[3] = "CO_Q_10_amount" + data$CO_Q_10_amount <- ifelse(data$CO_Q_10 == 0 ,"0",data$CO_Q_10_amount) + } + #vasodilators + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + vasodilators_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + vasodilators_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + vasodilators = rbind(vasodilators_c1,vasodilators_c2) + vasodilators <- data.frame(vasodilators) + vasodilators <- vasodilators[,c(2,10:17)] + vasodilators <- filter(vasodilators, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "CARDIAC THERAPY" & + phrm_gp1 == "VASODILATORS USED IN CARDIAC DISEASES") + + data <- merge(vasodilators ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "vasodilators" + colnames(data)[3] = "vasodilators_amount" + data$vasodilators_amount <- ifelse(data$vasodilators == 0 ,"0",data$vasodilators_amount) + } + #amiodarone + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + amiodarone_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + amiodarone_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + amiodarone = rbind(amiodarone_c1,amiodarone_c2) + amiodarone <- data.frame(amiodarone) + amiodarone <- amiodarone[,c(2,10:17)] + amiodarone <- filter(amiodarone, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "CARDIAC THERAPY" & + phrm_gp1 == "ANTIARRHYTHMICS, CLASS I AND III" & + chem_nm1 == "AMIODARONE") + + data <- merge(amiodarone ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "amiodarone" + colnames(data)[3] = "amiodarone_amount" + data$amiodarone_amount <- ifelse(data$amiodarone == 0 ,"0",data$amiodarone_amount) + } + #Aldosterone antagonists + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Aldosterone_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Aldosterone_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + Aldosterone = rbind(Aldosterone_c1,Aldosterone_c2) + Aldosterone <- data.frame(Aldosterone) + Aldosterone <- Aldosterone[,c(2,10:17)] + Aldosterone <- filter(Aldosterone, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "DIURETICS" & + chem_gp1 == "Aldosterone antagonists") + + data <- merge(vasodilators ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "Aldosterone" + colnames(data)[3] = "Aldosterone_amount" + data$Aldosterone_amount <- ifelse(data$Aldosterone == 0 ,"0",data$Aldosterone_amount) + + } + #potassium_sparing_diuretic + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + potassium_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + potassium_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + potassium = rbind(potassium_c1,potassium_c2) + potassium <- data.frame(potassium) + potassium <- potassium[,c(2,10:17)] + potassium <- filter(potassium, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "DIURETICS" & + chem_gp1 == "Other potassium-sparing agents") + + data <- merge(potassium ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "potassium" + colnames(data)[3] = "potassium_amount" + data$potassium_amount <- ifelse(data$potassium == 0 ,"0",data$potassium_amount) + } + #Sulfonamides + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Sulfonamides_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Sulfonamides_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + Sulfonamides = rbind(Sulfonamides_c1,Sulfonamides_c2) + Sulfonamides <- data.frame(Sulfonamides) + Sulfonamides <- Sulfonamides[,c(2,10:17)] + Sulfonamides <- filter(Sulfonamides, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "DIURETICS" & + chem_gp1 == "Sulfonamides, plain") + + data <- merge(Sulfonamides ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "Sulfonamides" + colnames(data)[3] = "Sulfonamides_amount" + data$Sulfonamides_amount <- ifelse(data$Sulfonamides == 0 ,"0",data$Sulfonamides_amount) + + } + #Thiazides + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Thiazides_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + Thiazides_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + Thiazides = rbind(Thiazides_c1,Thiazides_c2) + Thiazides <- data.frame(Thiazides) + Thiazides <- Thiazides[,c(2,10:17)] + Thiazides <- filter(Thiazides, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "DIURETICS" & + chem_gp1 == "Thiazides, plain") + + data <- merge(Thiazides ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "Thiazides" + colnames(data)[3] = "Thiazides_amount" + data$Thiazides_amount <- ifelse(data$Thiazides == 0 ,"0",data$Thiazides_amount) + + } + #statin + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + STATIN_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + STATIN_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + STATIN = rbind(STATIN_c1,STATIN_c2) + STATIN <- data.frame(STATIN) + STATIN <- STATIN[,c(2,10:17)] + STATIN1 <- filter(STATIN, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "LIPID MODIFYING AGENTS" & + chem_gp1 == "HMG CoA reductase inhibitors") + STATIN2 <- filter(STATIN, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "LIPID MODIFYING AGENTS" & + chem_gp1 == "Other lipid modifying agents" & chem_nm1 == "EZETIMIBE" & MEDNAME == "SIMVASTATIN" |MEDNAME == "SIMVASTATIN)") + STATIN <- rbind(STATIN1,STATIN2) + data <- merge(STATIN ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "STATIN" + colnames(data)[3] = "STATIN_amount" + data$STATIN_amount <- ifelse(data$STATIN == 0 ,"0",data$STATIN_amount) + } + #OMEGA_3 + { + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + OMEGA_3_c1 <- read_excel("phs000007.v30.pht000828.v6.p11.c1.meds1_8s.HMB-IRB-MDS.xlsx",sheet=1, na = "", skip = 10) + setwd("E:/workplace/mywork/methy/dbgap/3_clin") + OMEGA_3_c2 <- read_excel("phs000007.v30.pht000828.v6.p11.c2.meds1_8s.HMB-IRB-NPU-MDS.xlsx",sheet=1, na = "", skip = 10) + OMEGA_3 = rbind(OMEGA_3_c1,OMEGA_3_c2) + OMEGA_3 <- data.frame(OMEGA_3) + OMEGA_3 <- OMEGA_3[,c(2,10:17)] + OMEGA_3 <- filter(OMEGA_3, system1 == "CARDIOVASCULAR SYSTEM" & + ther_gp1 == "LIPID MODIFYING AGENTS" & + chem_gp1 == "Other lipid modifying agents" & chem_nm1 == "OMEGA-3-TRIGLYCERIDES") + data <- merge(OMEGA_3 ,data,by="shareid",all.y=TRUE) + data$chem_gp1 <- ifelse(is.na(data$IDTYPE) == "FALSE" ,"1","0") + data <- data[,-c(2:4,6:8)] + colnames(data)[2] = "OMEGA_3" + colnames(data)[3] = "OMEGA_3_amount" + data$OMEGA_3_amount <- ifelse(data$OMEGA_3 == 0 ,"0",data$OMEGA_3_amount) + } + dim(data) + #15285,125 + } + #duplicated--no among + data <- data[!duplicated(data[,-c(3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33)]),]#15154 125 + save(data,file="E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary/all_clin.Rdata") + data_exam8 = filter(data,DATE8 != "NA")#3000 125 + data_exam8$mi = ifelse(data_exam8$mi == 1 & data_exam8$midate<data_exam8$DATE8,1,0) + data_exam8$cvd = ifelse(data_exam8$cvd == 1 & data_exam8$cvddate<data_exam8$DATE8,1,0) + data_exam8$chd = ifelse(data_exam8$chd == 1 & data_exam8$chddate<data_exam8$DATE8,1,0) + data_exam8$afx = ifelse(data_exam8$afx == 1 & data_exam8$afxdate<data_exam8$DATE8,1,0) + data_exam8$stroke = ifelse(data_exam8$stroke == 1 & data_exam8$strokedate<data_exam8$DATE8,1,0) + save(data_exam8,file="E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary/data_exam8.Rdata") +} +setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary") +load(file="data_exam8.Rdata") +HFpEF_exam8 = merge(HFpEF,data_exam8,by="shareid")#2334 131 +save(HFpEF_exam8,file="HFpEF_exam8.Rdata") +#time +{ + library(dplyr) + library(tibble) + EHR = HFpEF_exam8 + + #clin_data_control <- filter(EHR, chf == 0 ,(chfdate - DATE8 ) >8*365) + clin_data_control <- filter(EHR, (chfdate - DATE8 ) >8*365) + summary((clin_data_control$chfdate - clin_data_control$DATE8)) + out1 = filter(EHR, chf == 1 & chfdate < DATE8 ) + #clin_data_chf <- filter(EHR, chf == 1 & (chfdate >= DATE8)) + clin_data_chf <- filter(EHR, chf == 1 & (chfdate >= DATE8) & (chfdate - DATE8 ) <8*365) + summary((clin_data_chf$chfdate - clin_data_chf$DATE8)) + out2 = filter(EHR, chf == 0 & (chfdate >= DATE8) & (chfdate - DATE8 ) <=8*365 ) + # tmp = filter(EHR, chf == 1 & (chfdate - DATE8 <= 5*365) & (chfdate >= DATE8)) + # summary(tmp$chfdate - tmp$DATE8) + clin_data_chf_control <- rbind(clin_data_chf,clin_data_control)#989 + #20211220 cosoring + clin_data_chf_control <- filter(EHR,!EHR$shareid %in% out1$shareid) +} +table(clin_data_chf_control$PACKS_SET) +#GEN3 JHU UMN +#0 171 797 +setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary") +save(clin_data_chf_control,file="clin_data_chf_control.Rdata") \ No newline at end of file