[a50134]: / Data_preprocessing.R

Download this file

35 lines (31 with data), 804 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
setwd('D:/brca')
library('imputeMissings')
data=read.csv("brca_multitest.csv",row.names = 1)
#################delete missing#######################
sum(is.na(data))
miss=c()
for (i in 1:nrow(data)){
miss=c(miss,sum(is.na(data[i,])))
}
missrate=miss/ncol(data)
data1=data[missrate<0.2,]
data1[is.na(data1)] <- 0
###################delete zero#################
nz=c()
for (i in 1:nrow(data1)){
nz=c(nz,sum(data1[i,]==0))
}
zerorate=nz/ncol(data1)
data2=data1[zerorate<0.2,]
###############impute######################
data3=t(data2)
data3=data.frame(data3)
data3[data3==0]=NA
data4<-impute(data3)
##########normalize##############
data5=t(data4)
data6=data5
data7=t(data6)
data8=scale(data7, center = T, scale = T)
data9=t(data8)
write.csv(data9,'brcatest_go.csv')