|
a |
|
b/Semantic Features/MLSuite.m |
|
|
1 |
function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec ] = MLSuite(X, Y, settings) |
|
|
2 |
%MLSuite Runs all the base level machine learning methods on the X and Y matrices |
|
|
3 |
%given using the set of options given |
|
|
4 |
|
|
|
5 |
categories = settings.categories; |
|
|
6 |
numCategories = length(categories); |
|
|
7 |
|
|
|
8 |
clError = zeros(1, numCategories); %To hold classification success/error rates |
|
|
9 |
clHeader = cell(1,1); |
|
|
10 |
|
|
|
11 |
classValue = 0; |
|
|
12 |
errorRate = 0; |
|
|
13 |
errorHeaders = 0; |
|
|
14 |
confusionMatrix = 0; %Code exists, but not used since changing from classes to numbers for the labels |
|
|
15 |
sens = 0; %Code exists, but not used since changing from classes to numbers for the labels |
|
|
16 |
spec = 0; %Code exists, but not used since changing from classes to numbers for the labels |
|
|
17 |
|
|
|
18 |
%Note: each algo, each run will test on different rows |
|
|
19 |
|
|
|
20 |
classValue = zeros(size(X,1), numCategories, 1); |
|
|
21 |
%------------------------------------------------------------------- |
|
|
22 |
% Standard Learning |
|
|
23 |
%------------------------------------------------------------------- |
|
|
24 |
if settings.doLearning == 1 |
|
|
25 |
if settings.doTrees == 1 |
|
|
26 |
%------------------------------------------------------------------- |
|
|
27 |
% Decion tree learning |
|
|
28 |
%------------------------------------------------------------------- |
|
|
29 |
fprintf('\nDecision tree learning from mean of each group vs mean of features \n'); |
|
|
30 |
clHeader = vertcat(clHeader, {'DT Mean to Mean'}); |
|
|
31 |
errorVector = zeros(1, numCategories); |
|
|
32 |
for i = 1:numCategories |
|
|
33 |
fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); |
|
|
34 |
[dtClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), @classregtree, @(X, trainedStruct) eval(trainedStruct, X)); |
|
|
35 |
%[dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i); |
|
|
36 |
fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) ); |
|
|
37 |
end |
|
|
38 |
clError = vertcat(clError, errorVector); |
|
|
39 |
classValue = cat(3, classValue, dtClassValue); |
|
|
40 |
end |
|
|
41 |
%------------------------------------------------------------------- |
|
|
42 |
% Neural Network Learning |
|
|
43 |
%------------------------------------------------------------------- |
|
|
44 |
if settings.doNN == 1 |
|
|
45 |
fprintf('\nNeural Network training.\n'); |
|
|
46 |
clHeader = vertcat(clHeader, {'NN'}); |
|
|
47 |
|
|
|
48 |
errorVector = zeros(1, numCategories); |
|
|
49 |
for i = 1:numCategories |
|
|
50 |
fprintf('NN on category: %s ', str2mat(categories(i))); |
|
|
51 |
[nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i); |
|
|
52 |
fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) ); |
|
|
53 |
%fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut); |
|
|
54 |
end |
|
|
55 |
clError = vertcat(clError, errorVector); |
|
|
56 |
classValue = cat(3, classValue, nnClassValue); |
|
|
57 |
end |
|
|
58 |
|
|
|
59 |
%------------------------------------------------------------------- |
|
|
60 |
% SVM Learning |
|
|
61 |
%------------------------------------------------------------------- |
|
|
62 |
%Multiclass/Class & Probability/Continuous numerical predictions by SVM |
|
|
63 |
%never got to work reliably and was removed. I hear scikit for python |
|
|
64 |
%works good |
|
|
65 |
|
|
|
66 |
%------------------------------------------------------------------- |
|
|
67 |
% Bayesian Learning |
|
|
68 |
%------------------------------------------------------------------- |
|
|
69 |
%Does not work! Works 99%, but <1% of the samples will result in a NaN |
|
|
70 |
%prediction thus destroying all the math steps that come afterwords. |
|
|
71 |
%You can try to figure out how to fix it, or just convert NaN into 0s |
|
|
72 |
%and be fine with failing those guys. |
|
|
73 |
if settings.doBayes == 1 |
|
|
74 |
fprintf('\nNaieve Bayes Learning.\n'); |
|
|
75 |
clHeader = vertcat(clHeader, {'Bayes'}); |
|
|
76 |
errorVector = zeros(1, numCategories); |
|
|
77 |
|
|
|
78 |
for i = 1:numCategories |
|
|
79 |
fprintf('Bayes on category: %s ', str2mat(categories(i))); |
|
|
80 |
[bayesClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), ... |
|
|
81 |
@(X, Y) NaiveBayes.fit(X, Y, 'Distribution', 'kernel'),... |
|
|
82 |
@(X, trainedStruct) posterior(trainedStruct, X, 'HandleMissing', 'On') ... |
|
|
83 |
); |
|
|
84 |
fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) ); |
|
|
85 |
end |
|
|
86 |
clError = vertcat(clError, errorVector); |
|
|
87 |
classValue = cat(3, classValue, nnClassValue); |
|
|
88 |
end |
|
|
89 |
|
|
|
90 |
%------------------------------------------------------------------- |
|
|
91 |
% Ensemble Learning (Still a first layer classifier) |
|
|
92 |
%------------------------------------------------------------------- |
|
|
93 |
if settings.doBagging == 1 |
|
|
94 |
%------------------------------------------------------------------- |
|
|
95 |
% Bagging |
|
|
96 |
%------------------------------------------------------------------- |
|
|
97 |
fprintf('\nPerforming Bagging\n'); |
|
|
98 |
clHeader = vertcat(clHeader, {'Bagging Trees'}); |
|
|
99 |
for i = 1:numCategories |
|
|
100 |
fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); |
|
|
101 |
tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15); |
|
|
102 |
%errorArray = oobError(tBag); |
|
|
103 |
%errorVector(i) = errorArray(end); |
|
|
104 |
|
|
|
105 |
%bagClassValue(:,i) = predict(tBag, X); %tBag consists of |
|
|
106 |
%trees that don't apply to all data. Some trees were |
|
|
107 |
%trained with certain points! Therefore not all trees can |
|
|
108 |
%be applied to each observation. |
|
|
109 |
bagClassValue(:,i) = oobPredict(tBag); |
|
|
110 |
errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i)); |
|
|
111 |
fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) ); |
|
|
112 |
%For regression bagClassProb is just the standard |
|
|
113 |
%deviation across all the trees, so its kind of an |
|
|
114 |
%indication |
|
|
115 |
if settings.doPlot == 1 |
|
|
116 |
%plot the change over time. Real slow so might as well look at something |
|
|
117 |
figure; |
|
|
118 |
plot(oobError(tBag)); |
|
|
119 |
xlabel('number of grown trees'); |
|
|
120 |
ylabel('out-of-bag regression error'); |
|
|
121 |
title(str2mat(categories(i))); |
|
|
122 |
end |
|
|
123 |
end |
|
|
124 |
clError = vertcat(clError, errorVector); |
|
|
125 |
classValue = cat(3, classValue, bagClassValue); |
|
|
126 |
end |
|
|
127 |
end |
|
|
128 |
|
|
|
129 |
%------------------------------------------------------------------- |
|
|
130 |
% Compile results |
|
|
131 |
%------------------------------------------------------------------- |
|
|
132 |
%clError = horzcat(clError, mean(clError, 2)); |
|
|
133 |
%bagError = horzcat(bagError, mean(bagError, 2)); |
|
|
134 |
%nnError = horzcat(nnError, mean(nnError, 2)); |
|
|
135 |
%allErrors = vertcat(clError, bagError); |
|
|
136 |
%allErrors = vertcat(allErrors, nnError); |
|
|
137 |
%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)]; |
|
|
138 |
%Remove blank lines at the beginning. This was the easiest way I found to |
|
|
139 |
%1. Guarantee variable initialized to keep matlab from complaining and 2. |
|
|
140 |
%Have an unspecified length, so you don't have to rework the code if you |
|
|
141 |
%add more classifiers to the mix, or turn some of them off in the settings. |
|
|
142 |
errorRate = clError(2:end,:); |
|
|
143 |
errorHeaders = clHeader(2:end); |
|
|
144 |
classValue = classValue(:,:,2:end); |
|
|
145 |
|
|
|
146 |
|