|
a |
|
b/Semantic Features/NotUsed/MLSuite.asv |
|
|
1 |
function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec ] = MLSuite(X, Y, settings) |
|
|
2 |
%MLSuite Runs all the machine learning methods on the X and Y matrices |
|
|
3 |
%given using the set of options given |
|
|
4 |
|
|
|
5 |
categories = settings.categories; |
|
|
6 |
numCategories = length(categories); |
|
|
7 |
|
|
|
8 |
clError = zeros(1, numCategories); %To hold classification success/error rates |
|
|
9 |
clHeader = cell(1,1); |
|
|
10 |
|
|
|
11 |
classValue = 0; |
|
|
12 |
errorRate = 0; |
|
|
13 |
errorHeaders = 0; |
|
|
14 |
confusionMatrix = 0; |
|
|
15 |
sens = 0; |
|
|
16 |
spec = 0; |
|
|
17 |
|
|
|
18 |
%Redo so all learning done with same training samples? |
|
|
19 |
%Temp eliminating all types except ave to rounded ave and using same train |
|
|
20 |
%set for each. |
|
|
21 |
|
|
|
22 |
option = 3; %Real regression |
|
|
23 |
|
|
|
24 |
classValue = zeros(size(X,1), numCategories, 1); |
|
|
25 |
%------------------------------------------------------------------- |
|
|
26 |
% Standard Learning |
|
|
27 |
%------------------------------------------------------------------- |
|
|
28 |
if settings.doLearning == 1 |
|
|
29 |
if settings.doTrees == 1 |
|
|
30 |
%------------------------------------------------------------------- |
|
|
31 |
% Decion tree learning |
|
|
32 |
%------------------------------------------------------------------- |
|
|
33 |
%fprintf('\nDecision tree learning as if all data are from different nodules\n'); |
|
|
34 |
%j = 1; |
|
|
35 |
%clHeader = {'DT 1 to 1'}; |
|
|
36 |
%for i = 1:numCategories |
|
|
37 |
% fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); |
|
|
38 |
% [classValue(:,i),1, clError(j,i)] = decisionTreeLearning(Xraw, Yraw, i, 0); |
|
|
39 |
% fprintf('Error: %f\n', clError(j,i)); |
|
|
40 |
%end |
|
|
41 |
|
|
|
42 |
%fprintf('\nDecision tree learning from mean of each group\n'); %has zero biases in the X matrix |
|
|
43 |
%clHeader = vertcat(clHeader, {'DT Group to Mean'}); |
|
|
44 |
%for i = 1:numCategories |
|
|
45 |
% fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); |
|
|
46 |
% [dtClassValue(:,i), errorVector(i)] = decisionTreeLearning(Xgrouped, Yaverage, i, 1); |
|
|
47 |
% fprintf('Error: %f\n', errorVector(i)); |
|
|
48 |
%end |
|
|
49 |
%clError = vertcat(clError, errorVector); |
|
|
50 |
%classValue = cat(3, classValue, dtClassValue); |
|
|
51 |
|
|
|
52 |
fprintf('\nDecision tree learning from mean of each group vs mean of features \n'); |
|
|
53 |
clHeader = vertcat(clHeader, {'DT Mean to Mean'}); |
|
|
54 |
errorVector = zeros(1, numCategories); |
|
|
55 |
for i = 1:numCategories |
|
|
56 |
fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); |
|
|
57 |
[dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i); |
|
|
58 |
fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) ); |
|
|
59 |
end |
|
|
60 |
clError = vertcat(clError, errorVector); |
|
|
61 |
classValue = cat(3, classValue, dtClassValue); |
|
|
62 |
end |
|
|
63 |
%------------------------------------------------------------------- |
|
|
64 |
% Neural Network Learning |
|
|
65 |
%------------------------------------------------------------------- |
|
|
66 |
if settings.doNN == 1 |
|
|
67 |
fprintf('\nNeural Network training.\n'); |
|
|
68 |
clHeader = vertcat(clHeader, {'NN'}); |
|
|
69 |
%option = 0; %vector regression |
|
|
70 |
%for i = 1:numCategories |
|
|
71 |
% fprintf('Learning and evaluating success on category: %s ', str2mat(categories(i))); |
|
|
72 |
% nnEvaluation(i) = nnLearning(Xgrouped, Ygrouped, i, option); |
|
|
73 |
% fprintf('RMS: %.4f\n', nnEvaluation(i).Eout); |
|
|
74 |
%end |
|
|
75 |
|
|
|
76 |
errorVector = zeros(1, numCategories); |
|
|
77 |
for i = 1:numCategories |
|
|
78 |
fprintf('NN on category: %s ', str2mat(categories(i))); |
|
|
79 |
[nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i); |
|
|
80 |
fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) ); |
|
|
81 |
%fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut); |
|
|
82 |
end |
|
|
83 |
clError = vertcat(clError, errorVector); |
|
|
84 |
classValue = cat(3, classValue, nnClassValue); |
|
|
85 |
end |
|
|
86 |
|
|
|
87 |
%------------------------------------------------------------------- |
|
|
88 |
% SVM Learning |
|
|
89 |
%------------------------------------------------------------------- |
|
|
90 |
%Yeah boooy, multiclass SVMs are go |
|
|
91 |
if settings.doSVM |
|
|
92 |
fprintf('Performing SVM learning\n'); |
|
|
93 |
kernel = 'polynomial'; %Kernel method used 'mlp' |
|
|
94 |
%'polyorder' |
|
|
95 |
|
|
|
96 |
svmError = zeros(1, numCategories); |
|
|
97 |
svmErrorDelta = zeros (3, numCategories); |
|
|
98 |
for testCat = 1:numCategories |
|
|
99 |
fprintf('Learning on category %d %s\n', testCat, categories{testCat}); |
|
|
100 |
|
|
|
101 |
[svmClassValue(:,(testCat),1), svmError(testCat), svmConfusionMatrix(:,:,(testCat)), svmSens(testCat,:), svmSpec(testCat,:) ] ... |
|
|
102 |
= SVMMultiBagged(X, round(Y(:,testCat)), settings.kernel, settings.minVote, settings.maxIter); |
|
|
103 |
fprintf('Broken clock accuracy: %.1f\n', settings.histos(testCat,7) * 100); |
|
|
104 |
fprintf('SVM method 2 error rate: %.1f\n\n', svmError(testCat)); |
|
|
105 |
svmError(1,testCat) = svmError(testCat); |
|
|
106 |
end |
|
|
107 |
%svmError = vertcat(svmError, settings.histos(:,7)' * 100); |
|
|
108 |
for i = 1:size(svmError,1) |
|
|
109 |
svmErrorDelta(i,:) = svmError(i,:) - (100 - (settings.histos(:,7)' * 100)); |
|
|
110 |
end |
|
|
111 |
%Copy to the outgoing variables |
|
|
112 |
classValue = cat(3, classValue(:,:,1), svmClassValue); |
|
|
113 |
clError = vertcat(clError, svmError); |
|
|
114 |
clHeader = vertcat(clHeader, {'SVM'});%Need to be altered to mesh with other ml types data |
|
|
115 |
confusionMatrix = svmConfusionMatrix; |
|
|
116 |
sens = svmSens; |
|
|
117 |
spec = svmSpec; |
|
|
118 |
clear kernel minVote maxIter |
|
|
119 |
end |
|
|
120 |
|
|
|
121 |
|
|
|
122 |
%------------------------------------------------------------------- |
|
|
123 |
% Bayesian Learning |
|
|
124 |
%------------------------------------------------------------------- |
|
|
125 |
if settings.doBayes |
|
|
126 |
fprintf('\nNaieve Bayes Learning.\n'); |
|
|
127 |
clHeader = vertcat(clHeader, {'Bayes'}); |
|
|
128 |
errorVector = zeros(1, numCategories); |
|
|
129 |
|
|
|
130 |
for i = 1:numCategories |
|
|
131 |
fprintf('Bayes on category: %s ', str2mat(categories(i))); |
|
|
132 |
[bayesClassValue(:,i), errorVector(i)] = bayesLearning(X, Y, i); |
|
|
133 |
fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) ); |
|
|
134 |
end |
|
|
135 |
clError = vertcat(clError, errorVector); |
|
|
136 |
classValue = cat(3, classValue, nnClassValue); |
|
|
137 |
end |
|
|
138 |
|
|
|
139 |
%------------------------------------------------------------------- |
|
|
140 |
% Ensemble Learning |
|
|
141 |
%------------------------------------------------------------------- |
|
|
142 |
if settings.doBagging == 1 |
|
|
143 |
%------------------------------------------------------------------- |
|
|
144 |
% Bagging |
|
|
145 |
%------------------------------------------------------------------- |
|
|
146 |
fprintf('\nPerforming Bagging\n'); |
|
|
147 |
|
|
|
148 |
%fprintf('Bagging as if all data are from different nodules\n'); |
|
|
149 |
%clHeader = vertcat(clHeader, {'Bagging 1 to 1'}); |
|
|
150 |
%for i = 1:numCategories |
|
|
151 |
% fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); |
|
|
152 |
% tBag = TreeBagger(settings.numTrees, Xraw, Yraw(:,i), 'OOBPred', 'on', 'NPrint', 10); |
|
|
153 |
% errorArray = oobError(tBag); |
|
|
154 |
% errorVector(i) = errorArray(end); |
|
|
155 |
% fprintf('Error: %f\n', errorVector(i)); |
|
|
156 |
|
|
|
157 |
% bagClassValue(:,i) = predict(tBag, Xraw(trainRows,i)); |
|
|
158 |
|
|
|
159 |
% if settings.doPlot == 1 |
|
|
160 |
%plot the change. Real slow so might as well look at something |
|
|
161 |
% figure; |
|
|
162 |
% plot(oobError(tBag)); |
|
|
163 |
% xlabel('number of grown trees'); |
|
|
164 |
% ylabel('out-of-bag classification error'); |
|
|
165 |
% title(str2mat(categories(i))); |
|
|
166 |
% end |
|
|
167 |
% end |
|
|
168 |
% clError = vertcat(clError, errorVector); |
|
|
169 |
|
|
|
170 |
% fprintf('\nBagging the mean of each group\n'); %zero bias |
|
|
171 |
% clHeader = vertcat(clHeader, {'Bagging Group to Mean'}); |
|
|
172 |
% for i = 1:numCategories |
|
|
173 |
% fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); |
|
|
174 |
% %tBag = TreeBagger(numTrees, Xaverage, Yaverage(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 10); |
|
|
175 |
% tBag = TreeBagger(settings.numTrees, Xgrouped, round(Yaverage(:,i)), 'OOBPred', 'on', 'NPrint', 10); |
|
|
176 |
% errorArray = oobError(tBag); |
|
|
177 |
% errorVector(i) = errorArray(end); |
|
|
178 |
% fprintf('Error: %f\n', errorVector(i)); |
|
|
179 |
% |
|
|
180 |
% bagClassValue(:,i) = predict(tBag, Xraw(trainRows,i)); |
|
|
181 |
|
|
|
182 |
% if settings.doPlot == 1 |
|
|
183 |
% %plot the change. Real slow so might as well look at something |
|
|
184 |
% figure; |
|
|
185 |
% plot(oobError(tBag)); |
|
|
186 |
% xlabel('number of grown trees'); |
|
|
187 |
% ylabel('out-of-bag classification error'); |
|
|
188 |
% title(str2mat(categories(i))); |
|
|
189 |
% end |
|
|
190 |
% end |
|
|
191 |
% clError = vertcat(clError, errorVector); |
|
|
192 |
|
|
|
193 |
fprintf('\nBagging the mean of each group vs mean of features\n'); |
|
|
194 |
clHeader = vertcat(clHeader, {'Bagging Mean to Mean'}); |
|
|
195 |
if option == 3 %Really do regression |
|
|
196 |
for i = 1:numCategories |
|
|
197 |
fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); |
|
|
198 |
tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15); |
|
|
199 |
%tBag = TreeBagger(settings.numTrees, X, round(Y(:,i)), 'OOBPred', 'on', 'NPrint', 10); |
|
|
200 |
%errorArray = oobError(tBag); |
|
|
201 |
%errorVector(i) = errorArray(end); |
|
|
202 |
|
|
|
203 |
%bagClassValue(:,i) = predict(tBag, X); %tBag consists of |
|
|
204 |
%trees that don't apply to all data. Some trees were |
|
|
205 |
%trained with certain points! Therefore not all trees can |
|
|
206 |
%be applied to each observation. |
|
|
207 |
bagClassValue(:,i) = oobPredict(tBag); |
|
|
208 |
errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i)); |
|
|
209 |
fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) ); |
|
|
210 |
%For regression bagClassProb is just the standard |
|
|
211 |
%deviation across all the trees, so its kind of an |
|
|
212 |
%indication |
|
|
213 |
if settings.doPlot == 1 |
|
|
214 |
%plot the change. Real slow so might as well look at something |
|
|
215 |
figure; |
|
|
216 |
plot(oobError(tBag)); |
|
|
217 |
xlabel('number of grown trees'); |
|
|
218 |
ylabel('out-of-bag regression error'); |
|
|
219 |
title(str2mat(categories(i))); |
|
|
220 |
end |
|
|
221 |
end |
|
|
222 |
clError = vertcat(clError, errorVector); |
|
|
223 |
classValue = cat(3, classValue, bagClassValue); |
|
|
224 |
|
|
|
225 |
else |
|
|
226 |
for i = 1:numCategories |
|
|
227 |
fprintf('***Obsolete code. Learning and evaluating success on category: %s\n', str2mat(categories(i))); |
|
|
228 |
%tBag = TreeBagger(numTrees, Xaverage, Yaverage(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 10); |
|
|
229 |
tBag = TreeBagger(settings.numTrees, X, round(Y(:,i)), 'OOBPred', 'on', 'NPrint', 10); |
|
|
230 |
errorArray = oobError(tBag); |
|
|
231 |
errorVector(i) = errorArray(end); |
|
|
232 |
fprintf('Error: %f\n', errorVector(i)); |
|
|
233 |
|
|
|
234 |
[bagClassValue(:,i), bagClassProb(:,i)] = str2num(cell2mat(predict(tBag, X))); |
|
|
235 |
|
|
|
236 |
if settings.doPlot == 1 |
|
|
237 |
%plot the change. Real slow so might as well look at something |
|
|
238 |
figure; |
|
|
239 |
plot(oobError(tBag)); |
|
|
240 |
xlabel('number of grown trees'); |
|
|
241 |
ylabel('out-of-bag classification error'); |
|
|
242 |
title(str2mat(categories(i))); |
|
|
243 |
end |
|
|
244 |
end |
|
|
245 |
clError = vertcat(clError, errorVector); |
|
|
246 |
classValue = cat(3, classValue, bagClassValue); |
|
|
247 |
end |
|
|
248 |
end |
|
|
249 |
end |
|
|
250 |
|
|
|
251 |
%------------------------------------------------------------------- |
|
|
252 |
% Compile results |
|
|
253 |
%------------------------------------------------------------------- |
|
|
254 |
%clError = horzcat(clError, mean(clError, 2)); |
|
|
255 |
%bagError = horzcat(bagError, mean(bagError, 2)); |
|
|
256 |
%nnError = horzcat(nnError, mean(nnError, 2)); |
|
|
257 |
%allErrors = vertcat(clError, bagError); |
|
|
258 |
%allErrors = vertcat(allErrors, nnError); |
|
|
259 |
%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)]; |
|
|
260 |
errorRate = clError(2:end,:); |
|
|
261 |
errorHeaders = clHeader(2:end); |
|
|
262 |
classValue = classValue(:,:,2:end); |
|
|
263 |
|
|
|
264 |
%------------------------------------------------------------------- |
|
|
265 |
% investigate metafeatures |
|
|
266 |
%------------------------------------------------------------------- |
|
|
267 |
if(settings.runMeta == 1) |
|
|
268 |
%investigate metafeatures |
|
|
269 |
%build array of the ratings blocks (before 0s are added) |
|
|
270 |
allRatings = cell(16555,1); |
|
|
271 |
j = 1; |
|
|
272 |
i = 1; |
|
|
273 |
while(i <= size(datax,1)) |
|
|
274 |
rating = GetRadiologistsRatings(datax, i); |
|
|
275 |
allRatings{j} = rating; |
|
|
276 |
%fprintf('Size %d start %d finish %d i %d t %d next %d name %s\n', rating.totalRatings, rating.rows(1), rating.rows(2), i, rating.totalRatings, i + ratings.totalRatings, rating.noduleID); |
|
|
277 |
i = i + rating.totalRatings; |
|
|
278 |
j = j + 1; |
|
|
279 |
end |
|
|
280 |
|
|
|
281 |
%compare stdevs |
|
|
282 |
%stdevList try this? sum((patient(1,1,2).test));Similarly, add all the billing fields in the patient array total = sum([patient.billing]); |
|
|
283 |
for i = 1:size(allRatings,1) |
|
|
284 |
stdevList(i,:) = allRatings{i}.stdev; |
|
|
285 |
end |
|
|
286 |
metaFeatures.stdev = mean(stdevList); %Average stdev for each of the 4 ratings |
|
|
287 |
end |
|
|
288 |
|
|
|
289 |
|
|
|
290 |
%[ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec ] |
|
|
291 |
|
|
|
292 |
|
|
|
293 |
%clear unecessary data |
|
|
294 |
%clear rebuildMatrices reloadFeatures minAgreement doLearning doTrees doBagging doNN ... |
|
|
295 |
% doSVM doPlot doSave numTrees errorVector i testCat runMeta row numCategories ... |
|
|
296 |
% categories j option tBag newRowOrder nnError |
|
|
297 |
|
|
|
298 |
|
|
|
299 |
|