Switch to unified view

a b/Semantic Features/NotUsed/MLSuite.asv
1
function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec  ] = MLSuite(X, Y, settings)
2
%MLSuite Runs all the machine learning methods on the X and Y matrices
3
%given using the set of options given
4
5
categories = settings.categories;
6
numCategories = length(categories);
7
8
clError         = zeros(1, numCategories); %To hold classification success/error rates
9
clHeader        = cell(1,1);
10
11
classValue = 0;
12
errorRate = 0;
13
errorHeaders = 0;
14
confusionMatrix = 0;
15
sens = 0;
16
spec = 0;
17
18
%Redo so all learning done with same training samples? 
19
%Temp eliminating all types except ave to rounded ave and using same train
20
%set for each. 
21
22
option = 3; %Real regression
23
24
classValue = zeros(size(X,1), numCategories, 1);
25
%-------------------------------------------------------------------
26
% Standard Learning
27
%-------------------------------------------------------------------
28
if settings.doLearning == 1
29
    if settings.doTrees == 1
30
        %-------------------------------------------------------------------
31
        % Decion tree learning
32
        %-------------------------------------------------------------------
33
        %fprintf('\nDecision tree learning as if all data are from different nodules\n');
34
        %j = 1;
35
        %clHeader = {'DT 1 to 1'};
36
        %for i = 1:numCategories
37
        %    fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i)));
38
        %    [classValue(:,i),1, clError(j,i)] = decisionTreeLearning(Xraw, Yraw, i, 0);
39
        %    fprintf('Error: %f\n', clError(j,i));
40
        %end
41
                
42
        %fprintf('\nDecision tree learning from mean of each group\n'); %has zero biases in the X matrix
43
        %clHeader = vertcat(clHeader, {'DT Group to Mean'});
44
        %for i = 1:numCategories
45
        %    fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i)));
46
        %    [dtClassValue(:,i), errorVector(i)] = decisionTreeLearning(Xgrouped, Yaverage, i, 1);
47
        %    fprintf('Error: %f\n', errorVector(i));
48
        %end
49
        %clError = vertcat(clError, errorVector);
50
        %classValue = cat(3, classValue, dtClassValue);
51
        
52
        fprintf('\nDecision tree learning from mean of each group vs mean of features \n');
53
        clHeader = vertcat(clHeader, {'DT Mean to Mean'});
54
        errorVector = zeros(1, numCategories);
55
        for i = 1:numCategories
56
            fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i)));
57
            [dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i);
58
            fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) );
59
        end
60
        clError = vertcat(clError, errorVector);
61
        classValue = cat(3, classValue, dtClassValue);
62
    end
63
    %-------------------------------------------------------------------
64
    % Neural Network Learning
65
    %-------------------------------------------------------------------
66
    if settings.doNN == 1
67
        fprintf('\nNeural Network training.\n');
68
        clHeader = vertcat(clHeader, {'NN'});
69
        %option = 0; %vector regression
70
        %for i = 1:numCategories
71
        %    fprintf('Learning and evaluating success on category: %s ', str2mat(categories(i)));
72
        %    nnEvaluation(i) = nnLearning(Xgrouped, Ygrouped, i, option);
73
        %    fprintf('RMS: %.4f\n', nnEvaluation(i).Eout);
74
        %end
75
        
76
        errorVector = zeros(1, numCategories);
77
        for i = 1:numCategories
78
            fprintf('NN on category: %s ', str2mat(categories(i)));
79
            [nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i);
80
            fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) );
81
            %fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut);
82
        end
83
        clError = vertcat(clError, errorVector);
84
        classValue = cat(3, classValue, nnClassValue); 
85
    end
86
87
    %-------------------------------------------------------------------
88
    % SVM Learning
89
    %-------------------------------------------------------------------
90
    %Yeah boooy, multiclass SVMs are go
91
    if settings.doSVM
92
        fprintf('Performing SVM learning\n');
93
        kernel = 'polynomial'; %Kernel method used 'mlp'
94
        %'polyorder' 
95
        
96
        svmError = zeros(1, numCategories);
97
        svmErrorDelta = zeros (3, numCategories);
98
        for testCat = 1:numCategories 
99
            fprintf('Learning on category %d %s\n', testCat, categories{testCat});
100
 
101
            [svmClassValue(:,(testCat),1), svmError(testCat), svmConfusionMatrix(:,:,(testCat)), svmSens(testCat,:), svmSpec(testCat,:) ] ...
102
                = SVMMultiBagged(X, round(Y(:,testCat)), settings.kernel, settings.minVote, settings.maxIter);
103
            fprintf('Broken clock accuracy: %.1f\n', settings.histos(testCat,7) * 100); 
104
            fprintf('SVM method 2 error rate: %.1f\n\n', svmError(testCat));
105
            svmError(1,testCat) = svmError(testCat);
106
        end
107
        %svmError = vertcat(svmError, settings.histos(:,7)' * 100);
108
        for i = 1:size(svmError,1)
109
            svmErrorDelta(i,:) = svmError(i,:) - (100 - (settings.histos(:,7)' * 100));
110
        end
111
        %Copy to the outgoing variables
112
        classValue = cat(3, classValue(:,:,1), svmClassValue);
113
        clError = vertcat(clError, svmError);
114
        clHeader = vertcat(clHeader, {'SVM'});%Need to be altered to mesh with other ml types data
115
        confusionMatrix = svmConfusionMatrix;
116
        sens = svmSens;
117
        spec = svmSpec;
118
        clear kernel minVote maxIter
119
    end
120
    
121
    
122
    %-------------------------------------------------------------------
123
    % Bayesian Learning
124
    %-------------------------------------------------------------------
125
    if settings.doBayes    
126
        fprintf('\nNaieve Bayes Learning.\n');
127
        clHeader = vertcat(clHeader, {'Bayes'});        
128
        errorVector = zeros(1, numCategories);
129
        
130
        for i = 1:numCategories
131
            fprintf('Bayes on category: %s ', str2mat(categories(i)));
132
            [bayesClassValue(:,i), errorVector(i)] = bayesLearning(X, Y, i);
133
            fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) );
134
        end
135
        clError = vertcat(clError, errorVector);
136
        classValue = cat(3, classValue, nnClassValue); 
137
    end
138
139
    %-------------------------------------------------------------------
140
    % Ensemble Learning
141
    %-------------------------------------------------------------------
142
    if settings.doBagging == 1
143
        %-------------------------------------------------------------------
144
        % Bagging
145
        %-------------------------------------------------------------------
146
        fprintf('\nPerforming Bagging\n');
147
148
        %fprintf('Bagging as if all data are from different nodules\n');
149
        %clHeader = vertcat(clHeader, {'Bagging 1 to 1'});
150
        %for i = 1:numCategories
151
        %    fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i)));
152
        %    tBag = TreeBagger(settings.numTrees, Xraw, Yraw(:,i), 'OOBPred', 'on', 'NPrint', 10);
153
        %    errorArray = oobError(tBag);
154
        %    errorVector(i) = errorArray(end);
155
        %    fprintf('Error: %f\n', errorVector(i));
156
            
157
       %     bagClassValue(:,i) = predict(tBag, Xraw(trainRows,i));
158
159
        %    if settings.doPlot == 1
160
                %plot the change. Real slow so might as well look at something
161
       %         figure;
162
       %         plot(oobError(tBag));
163
       %         xlabel('number of grown trees');
164
       %         ylabel('out-of-bag classification error');
165
       %         title(str2mat(categories(i)));
166
       %     end
167
       % end
168
       % clError = vertcat(clError, errorVector);
169
170
       % fprintf('\nBagging the mean of each group\n'); %zero bias
171
       % clHeader = vertcat(clHeader, {'Bagging Group to Mean'});
172
       % for i = 1:numCategories
173
       %     fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i)));
174
       %     %tBag = TreeBagger(numTrees, Xaverage, Yaverage(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 10);
175
       %     tBag = TreeBagger(settings.numTrees, Xgrouped, round(Yaverage(:,i)), 'OOBPred', 'on', 'NPrint', 10);
176
       %     errorArray = oobError(tBag);
177
       %     errorVector(i) = errorArray(end);
178
       %     fprintf('Error: %f\n', errorVector(i));
179
       %     
180
       %     bagClassValue(:,i) = predict(tBag, Xraw(trainRows,i));
181
182
       %     if settings.doPlot == 1
183
       %         %plot the change. Real slow so might as well look at something
184
       %         figure;
185
       %         plot(oobError(tBag));
186
       %         xlabel('number of grown trees');
187
       %         ylabel('out-of-bag classification error');
188
       %         title(str2mat(categories(i)));
189
       %     end
190
       % end
191
       % clError = vertcat(clError, errorVector);
192
193
        fprintf('\nBagging the mean of each group vs mean of features\n'); 
194
        clHeader = vertcat(clHeader, {'Bagging Mean to Mean'});
195
        if option == 3 %Really do regression
196
            for i = 1:numCategories
197
                fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i)));
198
                tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15);
199
                %tBag = TreeBagger(settings.numTrees, X, round(Y(:,i)), 'OOBPred', 'on', 'NPrint', 10);
200
                %errorArray = oobError(tBag);
201
                %errorVector(i) = errorArray(end);
202
203
                %bagClassValue(:,i) = predict(tBag, X); %tBag consists of
204
                %trees that don't apply to all data. Some trees were
205
                %trained with certain points! Therefore not all trees can
206
                %be applied to each observation. 
207
                bagClassValue(:,i) = oobPredict(tBag);
208
                errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i));
209
                fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) );
210
                %For regression bagClassProb is just the standard
211
                    %deviation across all the trees, so its kind of an
212
                    %indication
213
                if settings.doPlot == 1
214
                    %plot the change. Real slow so might as well look at something
215
                    figure;
216
                    plot(oobError(tBag));
217
                    xlabel('number of grown trees');
218
                    ylabel('out-of-bag regression error');
219
                    title(str2mat(categories(i)));
220
                end
221
            end
222
            clError = vertcat(clError, errorVector);
223
            classValue = cat(3, classValue, bagClassValue);
224
            
225
        else
226
            for i = 1:numCategories
227
            fprintf('***Obsolete code. Learning and evaluating success on category: %s\n', str2mat(categories(i)));
228
            %tBag = TreeBagger(numTrees, Xaverage, Yaverage(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 10);
229
            tBag = TreeBagger(settings.numTrees, X, round(Y(:,i)), 'OOBPred', 'on', 'NPrint', 10);
230
            errorArray = oobError(tBag);
231
            errorVector(i) = errorArray(end);
232
            fprintf('Error: %f\n', errorVector(i));
233
            
234
            [bagClassValue(:,i), bagClassProb(:,i)] = str2num(cell2mat(predict(tBag, X))); 
235
236
            if settings.doPlot == 1
237
                %plot the change. Real slow so might as well look at something
238
                figure;
239
                plot(oobError(tBag));
240
                xlabel('number of grown trees');
241
                ylabel('out-of-bag classification error');
242
                title(str2mat(categories(i)));
243
            end
244
        end
245
        clError = vertcat(clError, errorVector);
246
        classValue = cat(3, classValue, bagClassValue);
247
        end
248
    end
249
end
250
251
%-------------------------------------------------------------------
252
% Compile results
253
%-------------------------------------------------------------------
254
%clError = horzcat(clError, mean(clError, 2));
255
%bagError = horzcat(bagError, mean(bagError, 2));
256
%nnError = horzcat(nnError, mean(nnError, 2));
257
%allErrors = vertcat(clError, bagError);
258
%allErrors = vertcat(allErrors, nnError);
259
%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)];
260
errorRate = clError(2:end,:);
261
errorHeaders = clHeader(2:end);
262
classValue = classValue(:,:,2:end);
263
264
%-------------------------------------------------------------------
265
% investigate metafeatures
266
%-------------------------------------------------------------------
267
if(settings.runMeta == 1)
268
    %investigate metafeatures
269
    %build array of the ratings blocks (before 0s are added)
270
    allRatings = cell(16555,1);
271
    j = 1;
272
    i = 1;
273
    while(i <= size(datax,1))
274
        rating = GetRadiologistsRatings(datax, i);
275
        allRatings{j} = rating;
276
        %fprintf('Size %d start %d finish %d i %d t %d next %d name %s\n', rating.totalRatings, rating.rows(1), rating.rows(2), i, rating.totalRatings, i + ratings.totalRatings, rating.noduleID); 
277
        i = i + rating.totalRatings;
278
        j = j + 1;
279
    end
280
281
    %compare stdevs 
282
    %stdevList try this? sum((patient(1,1,2).test));Similarly, add all the billing fields in the patient array total = sum([patient.billing]);
283
    for i = 1:size(allRatings,1)
284
        stdevList(i,:) = allRatings{i}.stdev;
285
    end
286
    metaFeatures.stdev = mean(stdevList); %Average stdev for each of the 4 ratings
287
end
288
289
290
%[ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec  ]
291
292
293
%clear unecessary data
294
%clear rebuildMatrices reloadFeatures minAgreement doLearning doTrees doBagging doNN  ...
295
%    doSVM  doPlot  doSave numTrees errorVector i testCat runMeta row numCategories ...
296
%    categories j option tBag newRowOrder nnError
297
298
299