|
a |
|
b/Semantic Features/NotUsed/bayesLearning.m |
|
|
1 |
function [ classValue, errorRate ] = bayesLearning(X, Y, category) |
|
|
2 |
%bayesLearning |
|
|
3 |
% Detailed explanation goes here |
|
|
4 |
%1 subtlety, 2 sphericity, 3 margin, 4 lobulation, 5 spiculation, 6 texture, 7 malignancy |
|
|
5 |
|
|
|
6 |
Y = Y(:,category); |
|
|
7 |
|
|
|
8 |
%Perform random sampling by just jumbling up the data then slicing the new |
|
|
9 |
%set into 4ths or nths. |
|
|
10 |
divisions = 10; |
|
|
11 |
numSamples = size(X,1); |
|
|
12 |
testSize = round(numSamples/divisions); |
|
|
13 |
|
|
|
14 |
%get a random order of our rows |
|
|
15 |
randomRows = randsample(numSamples, numSamples); |
|
|
16 |
|
|
|
17 |
%get vector of row order to undo the scrambling of the rows |
|
|
18 |
for i = 1:numSamples |
|
|
19 |
restoreRows(i) = find(i == randomRows); |
|
|
20 |
end |
|
|
21 |
|
|
|
22 |
Xmixed = X(randomRows,:); |
|
|
23 |
Ymixed = Y(randomRows,:); |
|
|
24 |
|
|
|
25 |
|
|
|
26 |
%perform process repeatedly with the test set different each time untill |
|
|
27 |
%all are covered. |
|
|
28 |
classValue = 0; |
|
|
29 |
testrows = cell(divisions,1); |
|
|
30 |
for i = 1:(divisions - 1) %perform all iterations guaranteeed to have a full share |
|
|
31 |
%start with testing at the beginning rows, then cycle down |
|
|
32 |
fprintf('Bayes learning %d0%%\n', i); |
|
|
33 |
testrows{i} = [(i-1)*testSize + 1:i*testSize]; |
|
|
34 |
|
|
|
35 |
Xtest = Xmixed(testrows{i}, :); |
|
|
36 |
Ytest = Ymixed(testrows{i}, :); |
|
|
37 |
|
|
|
38 |
Xtrain = Xmixed; |
|
|
39 |
Xtrain(testrows{i},:) = []; |
|
|
40 |
Ytrain = Ymixed; |
|
|
41 |
Ytrain(testrows{i},:) = []; |
|
|
42 |
|
|
|
43 |
nb = NaiveBayes.fit(Xtrain, Ytrain, 'Distribution', 'kernel'); %'Distribution', 'kernel') or 'mvmn' |
|
|
44 |
[post,classValueTemp] = posterior(nb, Xtest, 'HandleMissing', 'On'); %returns classValue, an N-by-1 vector, containing the class to which each row of test has been assigned. cpre has the same type as nb.CLevels. |
|
|
45 |
classValue = vertcat(classValue, classValueTemp); |
|
|
46 |
end |
|
|
47 |
%collect all the remaining rows. Could be undersized, but eliminates |
|
|
48 |
%problems of some rows getting lost |
|
|
49 |
testrows{divisions} = [(divisions-1)*testSize + 1:numSamples]; |
|
|
50 |
|
|
|
51 |
Xtest = Xmixed(testrows{divisions}, :); |
|
|
52 |
Ytest = Ymixed(testrows{divisions}, :); |
|
|
53 |
|
|
|
54 |
Xtrain = Xmixed; |
|
|
55 |
Xtrain(testrows{divisions},:) = []; |
|
|
56 |
Ytrain = Ymixed; |
|
|
57 |
Ytrain(testrows{divisions},:) = []; |
|
|
58 |
|
|
|
59 |
nb = NaiveBayes.fit(Xtrain, Ytrain, 'Distribution', 'kernel'); |
|
|
60 |
%returns classValue, an N-by-1 vector, containing the class to which each row of test has been assigned. cpre has the same type as nb.CLevels. |
|
|
61 |
%Also told to ignore collumns with NaN results and calculate on the rest of |
|
|
62 |
%the columns |
|
|
63 |
[post,classValueTemp] = posterior(nb, Xtest, 'HandleMissing', 'On'); |
|
|
64 |
classValue = vertcat(classValue(2:end,:), classValueTemp); %Chop off the zero we put at the beginning |
|
|
65 |
|
|
|
66 |
%Resort everything to the original order so we can compare against other |
|
|
67 |
%algorithms |
|
|
68 |
classValue = classValue(restoreRows,:); |
|
|
69 |
|
|
|
70 |
%perform RMSE on allll the samples |
|
|
71 |
errorRate = RMSE(classValue, Y); %RMSE error. Maybe better as an array so we can combine in the future |
|
|
72 |
|
|
|
73 |
|
|
|
74 |
end |
|
|
75 |
|
|
|
76 |
|