[b4b313]: / Semantic Features / MLSuite.m

Download this file

147 lines (132 with data), 7.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec ] = MLSuite(X, Y, settings)
%MLSuite Runs all the base level machine learning methods on the X and Y matrices
%given using the set of options given
categories = settings.categories;
numCategories = length(categories);
clError = zeros(1, numCategories); %To hold classification success/error rates
clHeader = cell(1,1);
classValue = 0;
errorRate = 0;
errorHeaders = 0;
confusionMatrix = 0; %Code exists, but not used since changing from classes to numbers for the labels
sens = 0; %Code exists, but not used since changing from classes to numbers for the labels
spec = 0; %Code exists, but not used since changing from classes to numbers for the labels
%Note: each algo, each run will test on different rows
classValue = zeros(size(X,1), numCategories, 1);
%-------------------------------------------------------------------
% Standard Learning
%-------------------------------------------------------------------
if settings.doLearning == 1
if settings.doTrees == 1
%-------------------------------------------------------------------
% Decion tree learning
%-------------------------------------------------------------------
fprintf('\nDecision tree learning from mean of each group vs mean of features \n');
clHeader = vertcat(clHeader, {'DT Mean to Mean'});
errorVector = zeros(1, numCategories);
for i = 1:numCategories
fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i)));
[dtClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), @classregtree, @(X, trainedStruct) eval(trainedStruct, X));
%[dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i);
fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) );
end
clError = vertcat(clError, errorVector);
classValue = cat(3, classValue, dtClassValue);
end
%-------------------------------------------------------------------
% Neural Network Learning
%-------------------------------------------------------------------
if settings.doNN == 1
fprintf('\nNeural Network training.\n');
clHeader = vertcat(clHeader, {'NN'});
errorVector = zeros(1, numCategories);
for i = 1:numCategories
fprintf('NN on category: %s ', str2mat(categories(i)));
[nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i);
fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) );
%fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut);
end
clError = vertcat(clError, errorVector);
classValue = cat(3, classValue, nnClassValue);
end
%-------------------------------------------------------------------
% SVM Learning
%-------------------------------------------------------------------
%Multiclass/Class & Probability/Continuous numerical predictions by SVM
%never got to work reliably and was removed. I hear scikit for python
%works good
%-------------------------------------------------------------------
% Bayesian Learning
%-------------------------------------------------------------------
%Does not work! Works 99%, but <1% of the samples will result in a NaN
%prediction thus destroying all the math steps that come afterwords.
%You can try to figure out how to fix it, or just convert NaN into 0s
%and be fine with failing those guys.
if settings.doBayes == 1
fprintf('\nNaieve Bayes Learning.\n');
clHeader = vertcat(clHeader, {'Bayes'});
errorVector = zeros(1, numCategories);
for i = 1:numCategories
fprintf('Bayes on category: %s ', str2mat(categories(i)));
[bayesClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), ...
@(X, Y) NaiveBayes.fit(X, Y, 'Distribution', 'kernel'),...
@(X, trainedStruct) posterior(trainedStruct, X, 'HandleMissing', 'On') ...
);
fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) );
end
clError = vertcat(clError, errorVector);
classValue = cat(3, classValue, nnClassValue);
end
%-------------------------------------------------------------------
% Ensemble Learning (Still a first layer classifier)
%-------------------------------------------------------------------
if settings.doBagging == 1
%-------------------------------------------------------------------
% Bagging
%-------------------------------------------------------------------
fprintf('\nPerforming Bagging\n');
clHeader = vertcat(clHeader, {'Bagging Trees'});
for i = 1:numCategories
fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i)));
tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15);
%errorArray = oobError(tBag);
%errorVector(i) = errorArray(end);
%bagClassValue(:,i) = predict(tBag, X); %tBag consists of
%trees that don't apply to all data. Some trees were
%trained with certain points! Therefore not all trees can
%be applied to each observation.
bagClassValue(:,i) = oobPredict(tBag);
errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i));
fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) );
%For regression bagClassProb is just the standard
%deviation across all the trees, so its kind of an
%indication
if settings.doPlot == 1
%plot the change over time. Real slow so might as well look at something
figure;
plot(oobError(tBag));
xlabel('number of grown trees');
ylabel('out-of-bag regression error');
title(str2mat(categories(i)));
end
end
clError = vertcat(clError, errorVector);
classValue = cat(3, classValue, bagClassValue);
end
end
%-------------------------------------------------------------------
% Compile results
%-------------------------------------------------------------------
%clError = horzcat(clError, mean(clError, 2));
%bagError = horzcat(bagError, mean(bagError, 2));
%nnError = horzcat(nnError, mean(nnError, 2));
%allErrors = vertcat(clError, bagError);
%allErrors = vertcat(allErrors, nnError);
%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)];
%Remove blank lines at the beginning. This was the easiest way I found to
%1. Guarantee variable initialized to keep matlab from complaining and 2.
%Have an unspecified length, so you don't have to rework the code if you
%add more classifiers to the mix, or turn some of them off in the settings.
errorRate = clError(2:end,:);
errorHeaders = clHeader(2:end);
classValue = classValue(:,:,2:end);