Switch to unified view

a b/Semantic Features/MLSuite.m
1
function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec  ] = MLSuite(X, Y, settings)
2
%MLSuite Runs all the base level machine learning methods on the X and Y matrices
3
%given using the set of options given
4
5
categories = settings.categories;
6
numCategories = length(categories);
7
8
clError         = zeros(1, numCategories); %To hold classification success/error rates
9
clHeader        = cell(1,1);
10
11
classValue   = 0;
12
errorRate    = 0;
13
errorHeaders = 0;
14
confusionMatrix = 0; %Code exists, but not used since changing from classes to numbers for the labels
15
sens = 0; %Code exists, but not used since changing from classes to numbers for the labels
16
spec = 0; %Code exists, but not used since changing from classes to numbers for the labels
17
18
%Note: each algo, each run will test on different rows
19
20
classValue = zeros(size(X,1), numCategories, 1);
21
%-------------------------------------------------------------------
22
% Standard Learning
23
%-------------------------------------------------------------------
24
if settings.doLearning == 1
25
    if settings.doTrees == 1
26
        %-------------------------------------------------------------------
27
        % Decion tree learning
28
        %-------------------------------------------------------------------      
29
        fprintf('\nDecision tree learning from mean of each group vs mean of features \n');
30
        clHeader = vertcat(clHeader, {'DT Mean to Mean'});
31
        errorVector = zeros(1, numCategories);
32
        for i = 1:numCategories
33
            fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i)));
34
            [dtClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), @classregtree, @(X, trainedStruct) eval(trainedStruct, X));
35
            %[dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i);
36
            fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) );
37
        end
38
        clError = vertcat(clError, errorVector);
39
        classValue = cat(3, classValue, dtClassValue);
40
    end
41
    %-------------------------------------------------------------------
42
    % Neural Network Learning
43
    %-------------------------------------------------------------------
44
    if settings.doNN == 1
45
        fprintf('\nNeural Network training.\n');
46
        clHeader = vertcat(clHeader, {'NN'});
47
       
48
        errorVector = zeros(1, numCategories);
49
        for i = 1:numCategories
50
            fprintf('NN on category: %s ', str2mat(categories(i)));
51
            [nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i);
52
            fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) );
53
            %fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut);
54
        end
55
        clError = vertcat(clError, errorVector);
56
        classValue = cat(3, classValue, nnClassValue); 
57
    end
58
59
    %-------------------------------------------------------------------
60
    % SVM Learning
61
    %-------------------------------------------------------------------
62
    %Multiclass/Class & Probability/Continuous numerical predictions by SVM
63
    %never got to work reliably and was removed. I hear scikit for python
64
    %works good
65
    
66
    %-------------------------------------------------------------------
67
    % Bayesian Learning
68
    %-------------------------------------------------------------------
69
    %Does not work! Works 99%, but <1% of the samples will result in a NaN
70
    %prediction thus destroying all the math steps that come afterwords.
71
    %You can try to figure out how to fix it, or just convert NaN into 0s
72
    %and be fine with failing those guys. 
73
    if settings.doBayes == 1   
74
        fprintf('\nNaieve Bayes Learning.\n');
75
        clHeader = vertcat(clHeader, {'Bayes'});        
76
        errorVector = zeros(1, numCategories);
77
        
78
        for i = 1:numCategories
79
            fprintf('Bayes on category: %s ', str2mat(categories(i)));
80
            [bayesClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), ...
81
                @(X, Y) NaiveBayes.fit(X, Y, 'Distribution', 'kernel'),...
82
                @(X, trainedStruct) posterior(trainedStruct, X, 'HandleMissing', 'On') ...
83
                );
84
            fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) );
85
        end
86
        clError = vertcat(clError, errorVector);
87
        classValue = cat(3, classValue, nnClassValue); 
88
    end
89
90
    %-------------------------------------------------------------------
91
    % Ensemble Learning (Still a first layer classifier)
92
    %-------------------------------------------------------------------
93
    if settings.doBagging == 1
94
        %-------------------------------------------------------------------
95
        % Bagging
96
        %-------------------------------------------------------------------
97
        fprintf('\nPerforming Bagging\n');
98
        clHeader = vertcat(clHeader, {'Bagging Trees'});
99
        for i = 1:numCategories
100
            fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i)));
101
            tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15);
102
            %errorArray = oobError(tBag);
103
            %errorVector(i) = errorArray(end);
104
105
            %bagClassValue(:,i) = predict(tBag, X); %tBag consists of
106
            %trees that don't apply to all data. Some trees were
107
            %trained with certain points! Therefore not all trees can
108
            %be applied to each observation. 
109
            bagClassValue(:,i) = oobPredict(tBag);
110
            errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i));
111
            fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) );
112
            %For regression bagClassProb is just the standard
113
                %deviation across all the trees, so its kind of an
114
                %indication
115
            if settings.doPlot == 1
116
                %plot the change over time. Real slow so might as well look at something
117
                figure;
118
                plot(oobError(tBag));
119
                xlabel('number of grown trees');
120
                ylabel('out-of-bag regression error');
121
                title(str2mat(categories(i)));
122
            end
123
        end
124
    clError = vertcat(clError, errorVector);
125
    classValue = cat(3, classValue, bagClassValue);
126
    end
127
end
128
129
%-------------------------------------------------------------------
130
% Compile results
131
%-------------------------------------------------------------------
132
%clError = horzcat(clError, mean(clError, 2));
133
%bagError = horzcat(bagError, mean(bagError, 2));
134
%nnError = horzcat(nnError, mean(nnError, 2));
135
%allErrors = vertcat(clError, bagError);
136
%allErrors = vertcat(allErrors, nnError);
137
%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)];
138
%Remove blank lines at the beginning. This was the easiest way I found to
139
%1. Guarantee variable initialized to keep matlab from complaining and 2.
140
%Have an unspecified length, so you don't have to rework the code if you
141
%add more classifiers to the mix, or turn some of them off in the settings.
142
errorRate = clError(2:end,:);
143
errorHeaders = clHeader(2:end);
144
classValue = classValue(:,:,2:end);
145
146