|
a |
|
b/main.m |
|
|
1 |
%********** 基于SVM的具有遗传性疾病和性状的遗传位点分析 **********% |
|
|
2 |
%********** Date£º2017.04.10 Group:DataMing No.5 **********% |
|
|
3 |
%********** Run Time:13 min **********% |
|
|
4 |
close all; |
|
|
5 |
clear; |
|
|
6 |
clc; |
|
|
7 |
%% 加载十进制编码9445个位点的所有属性 |
|
|
8 |
all_feature=textread('nowenary_encoding_feature.dat'); |
|
|
9 |
%train_attr=all_feature; |
|
|
10 |
Pdata=zeros(500,1); |
|
|
11 |
Ndata=ones(500,1); |
|
|
12 |
%% 数据属性归一化处理 |
|
|
13 |
[dataset_scale,ps] = mapminmax(all_feature',0,1); |
|
|
14 |
data_attr = dataset_scale';%归一化处理后的数据属性矩阵 |
|
|
15 |
W=size(data_attr,2);%属性(位点)总数 |
|
|
16 |
%train_label = vertcat( zeros(Pnum,1),ones(Nnum,1) );%ׯĻņĮ¬½Ó£¬¼“phenotype.txt |
|
|
17 |
%% 循环每个位点,利用每列属性进行该疾病的预测,得到预测精度accuracy |
|
|
18 |
accuracy=[]; |
|
|
19 |
for w=1:W |
|
|
20 |
Acc = predictFunc_svm( Pdata, Ndata,data_attr(:,w)); |
|
|
21 |
accuracy=[accuracy,mean(Acc)];%5折交叉得到每列属性的预测精度 |
|
|
22 |
end |
|
|
23 |
dlmwrite('predict_accuracy.txt',accuracy,'delimiter',' '); |
|
|
24 |
%% 对预测结果降序排列,即预测精度 accuracy 从高到低排列 |
|
|
25 |
%accuracy_desc:降序排列的预测结果;org_indices:排序后预测精度对应的位点属性编号 |
|
|
26 |
[accuracy_desc,org_indices]=sort(accuracy,'descend'); |
|
|
27 |
accuracy_result=[accuracy_desc;org_indices]; |
|
|
28 |
dlmwrite('predict_accuracy_desc.txt',accuracy_result,'delimiter',' '); |
|
|
29 |
%predict_accuracy_desc.txt中第一行为降序排列的预测结果,第二行为排序后预测精度对应的位点属性编号 |
|
|
30 |
%% 选出Top n 预测精度及对应预测精度所在的位点 |
|
|
31 |
n=10; |
|
|
32 |
topn_accuracy=accuracy_desc(:,1:n); |
|
|
33 |
topn_accuracy=topn_accuracy'; |
|
|
34 |
disp(topn_accuracy) ; |
|
|
35 |
%输出前10个预测精度 |
|
|
36 |
feature_name=textread('feature_name.txt','%s','delimiter','\n'); |
|
|
37 |
for m=1:n |
|
|
38 |
topn_feature(m)=feature_name(org_indices(:,m),1); |
|
|
39 |
end |
|
|
40 |
disp(topn_feature) ; |
|
|
41 |
%输出前10个预测位点 |
|
|
42 |
|
|
|
43 |
|