|
a |
|
b/Data_preprocessing.R |
|
|
1 |
setwd('D:/brca') |
|
|
2 |
library('imputeMissings') |
|
|
3 |
data=read.csv("brca_multitest.csv",row.names = 1) |
|
|
4 |
|
|
|
5 |
#################delete missing####################### |
|
|
6 |
sum(is.na(data)) |
|
|
7 |
miss=c() |
|
|
8 |
for (i in 1:nrow(data)){ |
|
|
9 |
miss=c(miss,sum(is.na(data[i,]))) |
|
|
10 |
} |
|
|
11 |
missrate=miss/ncol(data) |
|
|
12 |
data1=data[missrate<0.2,] |
|
|
13 |
data1[is.na(data1)] <- 0 |
|
|
14 |
###################delete zero################# |
|
|
15 |
nz=c() |
|
|
16 |
for (i in 1:nrow(data1)){ |
|
|
17 |
nz=c(nz,sum(data1[i,]==0)) |
|
|
18 |
} |
|
|
19 |
zerorate=nz/ncol(data1) |
|
|
20 |
data2=data1[zerorate<0.2,] |
|
|
21 |
###############impute###################### |
|
|
22 |
|
|
|
23 |
data3=t(data2) |
|
|
24 |
data3=data.frame(data3) |
|
|
25 |
data3[data3==0]=NA |
|
|
26 |
data4<-impute(data3) |
|
|
27 |
##########normalize############## |
|
|
28 |
data5=t(data4) |
|
|
29 |
data6=data5 |
|
|
30 |
data7=t(data6) |
|
|
31 |
data8=scale(data7, center = T, scale = T) |
|
|
32 |
data9=t(data8) |
|
|
33 |
write.csv(data9,'brcatest_go.csv') |
|
|
34 |
|