|
a |
|
b/Semantic Features/NotUsed/learnScript.asv |
|
|
1 |
fprintf('*********************************************************************\n'); |
|
|
2 |
fprintf('Beginning Execution\n'); |
|
|
3 |
tic; |
|
|
4 |
close all; |
|
|
5 |
clear all; |
|
|
6 |
load('sourceData.mat'); |
|
|
7 |
|
|
|
8 |
%Variables for data set |
|
|
9 |
rebuildRTMatrices = 0; %flag that we are loading saved X and Y matrices to reduce repeating old calculations |
|
|
10 |
reloadWSMatrices = 0; |
|
|
11 |
doWSEns = 1; |
|
|
12 |
minAgreement = 1; %Ignore data if fewer than this many radiologists rated it |
|
|
13 |
%Variable for data output |
|
|
14 |
doSave = 0; |
|
|
15 |
doPlot = 0; |
|
|
16 |
|
|
|
17 |
%Options for learning which will be passed |
|
|
18 |
settings.doLearning = 1; |
|
|
19 |
settings.doTrees = 1; |
|
|
20 |
settings.doBagging = 1; |
|
|
21 |
settings.doNN = 1; |
|
|
22 |
settings.doSVM = 0; |
|
|
23 |
settings.runMeta = 0; |
|
|
24 |
%Options for learning parameters |
|
|
25 |
settings.numTrees = 100; %100; %100; %Bagging number of trees to make |
|
|
26 |
|
|
|
27 |
settings.hiddenLayer = 50; % 50; %Neural Network hidden nodes 60 was the max the ram could handle. More always gave improvement. |
|
|
28 |
|
|
|
29 |
settings.maxIter = 30; %SVM (Abandoned) |
|
|
30 |
settings.minVote = 10; |
|
|
31 |
settings.kernel = 'polynomial'; |
|
|
32 |
%Misc Data to pass |
|
|
33 |
settings.categories = {'Subtlety', 'Sphericity', 'Margin ', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy'}; |
|
|
34 |
numCategories = length(settings.categories); |
|
|
35 |
|
|
|
36 |
%misc code fragments |
|
|
37 |
%Cross validation |
|
|
38 |
%cv = fitensemble(X,Y,'Bag',200,'Tree','type','classification','kfold',5) |
|
|
39 |
%Make bags |
|
|
40 |
%bag = fitensemble(Xtrain,Ytrain,'Bag',200,'Tree', 'type','classification |
|
|
41 |
%Test and train sets |
|
|
42 |
%cvpart = cvpartition(Y,'holdout',0.3); |
|
|
43 |
%Xtrain = X(training(cvpart),:); |
|
|
44 |
%Ytrain = Y(training(cvpart),:); |
|
|
45 |
%Xtest = X(test(cvpart),:); |
|
|
46 |
%Ytest = Y(test(cvpart),:); |
|
|
47 |
|
|
|
48 |
|
|
|
49 |
%------------------------------------------------------------------- |
|
|
50 |
% Prepare the X and Y matrices for each method type we want to perform |
|
|
51 |
%------------------------------------------------------------------- |
|
|
52 |
%Build X and Y from radiologists' data |
|
|
53 |
if rebuildRTMatrices == 1 |
|
|
54 |
[Xraw, Yraw, instanceID, settings.ratingRow, data, histos ] = GetRadData(minAgreement); |
|
|
55 |
fprintf('Getting the average rating for each nodule\n'); |
|
|
56 |
[Yaverage, instanceOrder] = GetAveragedMatrix(Yraw, settings.ratingRow, instanceID); |
|
|
57 |
fprintf('Getting the image features from the largest outline of each nodule\n'); |
|
|
58 |
load('idstouse.mat'); |
|
|
59 |
Xlargest = GetLargestMatrix(Xraw, IDsToUse, instanceID); |
|
|
60 |
%get Xaverage for the rand comparison |
|
|
61 |
[Xaverage, instanceOrder] = GetAveragedMatrix(Xraw, settings.ratingRow, instanceID); |
|
|
62 |
end |
|
|
63 |
|
|
|
64 |
%Build X from weakly segmented features |
|
|
65 |
if reloadWSMatrices == 1 |
|
|
66 |
[Xws, numSegments] = GetXws(instanceID); %raw version |
|
|
67 |
[XwsMulti, YwsMulti] = GetXwsSampleConcat(Xws, Yaverage, numSegments); |
|
|
68 |
end |
|
|
69 |
|
|
|
70 |
settings.histos = histos; |
|
|
71 |
settings.doPlot = doPlot; |
|
|
72 |
|
|
|
73 |
fprintf('\nPerforming Relative Truth Learning'); |
|
|
74 |
[ rtClassValue, rtRegError, rtErrorHeaders, rtConfusionMatrix, rtSens, rtSpec ] = MLSuite(Xlargest, Yaverage, settings); |
|
|
75 |
|
|
|
76 |
%fprintf('\nPerforming learning on segment features'); |
|
|
77 |
%[ wsClassValue, wsRegError, wsErrorHeaders, wsConfusionMatrix, wsSens, wsSpec ] = MLSuite(Xws, Yaverage, settings); |
|
|
78 |
|
|
|
79 |
if doWSEns == 1 |
|
|
80 |
fprintf('\nPerforming WS learning, each segmentation sent to a different classifier'); %loop over segments sets |
|
|
81 |
clear wsMultiClassValue wsMultiRegError wsMultiErrorHeaders wsMultiConfusionMatrix wsMultiSens wsMultiSpec |
|
|
82 |
|
|
|
83 |
startRow = 0; |
|
|
84 |
finishRow = 0; |
|
|
85 |
setLength = size(Xws,1); |
|
|
86 |
for i = 1:numSegments |
|
|
87 |
fprintf('\nTraining on segmentation #: %d', i); |
|
|
88 |
startRow = 1 + (i-1) * setLength; |
|
|
89 |
if i == numSegments %Last set has unusual length |
|
|
90 |
finishRow = size(XwsMulti,1); |
|
|
91 |
else |
|
|
92 |
finishRow = i * setLength; |
|
|
93 |
end |
|
|
94 |
[ wsMultiClassValue(:,:,:,i), wsMultiRegError(:,:,i), wsMultiErrorHeaders(:,i), wsMultiConfusionMatrix(:,:,:,i), wsMultiSens(:,i), wsMultiSpec(:,i) ] = MLSuite(XwsMulti(startRow:finishRow,:), YwsMulti(startRow:finishRow,:), settings); |
|
|
95 |
end |
|
|
96 |
end |
|
|
97 |
|
|
|
98 |
%add a random matrix for comparison |
|
|
99 |
fprintf('\nPerforming Learning on randomized features (garbage)'); |
|
|
100 |
Xrand = rand(size(Xaverage,1), size(Xaverage,2)); |
|
|
101 |
[ randClassValue, randRegError, randErrorHeaders, randConfusionMatrix, randSens, randSpec ] = MLSuite(Xrand, Yaverage, settings); |
|
|
102 |
|
|
|
103 |
%------------------------------------------------------------------- |
|
|
104 |
% Summarize L0 Results |
|
|
105 |
%------------------------------------------------------------------- |
|
|
106 |
clear rtOverRandReg rtOverWSReg rtOverRandSuccess rtOverWSSuccess rtClassSuccess |
|
|
107 |
clear wsClassSuccess randClassSuccess |
|
|
108 |
|
|
|
109 |
|
|
|
110 |
%sum(round(rtClassValue(:,:,3)) == round(Yaverage))/size(Yaverage,1) |
|
|
111 |
%tabulate() |
|
|
112 |
%sum(num2RadClass(rtClassValue(:,:,3)) == num2RadClass(Yaverage))/size(Yaverage,1) |
|
|
113 |
|
|
|
114 |
%Final comparsions. Turn back into classifications and compare to the |
|
|
115 |
%targets. (Best after ensemble learning) |
|
|
116 |
for i = 1:size(rtRegError,1) |
|
|
117 |
%rtClassSuccess(i,:) = sum(num2RadClass(rtClassValue(:,:,i)) == num2RadClass(Yaverage))/size(Yaverage,1); |
|
|
118 |
rtClassSuccess(i,:) = sum(round(rtClassValue(:,:,i)) == round(Yaverage))/size(Yaverage,1); |
|
|
119 |
end |
|
|
120 |
|
|
|
121 |
%for i = 1:size(randRegError,1) |
|
|
122 |
%wsClassSuccess(i,:) = sum(num2RadClass(wsClassValue(:,:,i)) == num2RadClass(Yws))/size(Yws,1); |
|
|
123 |
% wsClassSuccess(i,:) = sum(round(wsClassValue(:,:,i)) == round(Yaverage))/size(Yaverage,1); |
|
|
124 |
%end |
|
|
125 |
|
|
|
126 |
%for i = 1:size(randRegError,1) |
|
|
127 |
%wsClassSuccess(i,:) = sum(num2RadClass(wsClassValue(:,:,i)) == num2RadClass(Yws))/size(Yws,1); |
|
|
128 |
% wsSConcatClassSuccess(i,:) = sum(round(wsSConcatClassValue(:,:,i)) == round(YwsMulti))/size(YwsMulti,1); |
|
|
129 |
%end |
|
|
130 |
|
|
|
131 |
%wsMulti only has value after stacked ensemble learning, but estimate |
|
|
132 |
%anyway |
|
|
133 |
for segment = 1:size(wsMultiClassValue,4) |
|
|
134 |
for i = 1:size(wsMultiRegError,1) |
|
|
135 |
%randClassSuccess(i,:) = sum(num2RadClass(randClassValue(:,:,i)) == num2RadClass(Yaverage))/size(Yaverage,1); |
|
|
136 |
wsMultiClassSuccess(i,:,segment) = sum(round(wsMultiClassValue(:,:,i,segment)) == round(Yaverage))/size(Yaverage,1); |
|
|
137 |
end |
|
|
138 |
end |
|
|
139 |
|
|
|
140 |
for i = 1:size(randRegError,1) |
|
|
141 |
%randClassSuccess(i,:) = sum(num2RadClass(randClassValue(:,:,i)) == num2RadClass(Yaverage))/size(Yaverage,1); |
|
|
142 |
randClassSuccess(i,:) = sum(round(randClassValue(:,:,i)) == round(Yaverage))/size(Yaverage,1); |
|
|
143 |
end |
|
|
144 |
|
|
|
145 |
%rtOverRandSuccess = rtClassSuccess - randClassSuccess; |
|
|
146 |
%rtOverWSSuccess = rtClassSuccess - wsClassSuccess; |
|
|
147 |
|
|
|
148 |
%rtOverRandReg = randRegError - rtRegError; |
|
|
149 |
%rtOverWSReg = wsRegError - rtRegError; |
|
|
150 |
|
|
|
151 |
%------------------------------------------------------------------- |
|
|
152 |
% Stacking Ensemble Learning |
|
|
153 |
%------------------------------------------------------------------- |
|
|
154 |
clear XwsMultiEns |
|
|
155 |
%reformat the data for ensemble use |
|
|
156 |
for i = 1:numCategories |
|
|
157 |
%XwsEns(:,:,i) = reshape(wsClassValue(:,i,:),size(wsClassValue,1), []); |
|
|
158 |
%XwsSConcatEns(:,:,i) = reshape(wsSConcatClassValue(:,i,:),size(wsSConcatClassValue,1), []); |
|
|
159 |
XrtEns(:,:,i) = reshape(rtClassValue(:,i,:),size(rtClassValue,1), []); |
|
|
160 |
XwsMultiEns(:,:,i) = reshape(wsMultiClassValue(:,i,:,:), size(wsMultiClassValue,1), []); %Concats all the estimated ratings for a rating category from each segmentation of the same ROI. Puts them on the same row |
|
|
161 |
XrandEns(:,:,i) = reshape(randClassValue(:,i,:),size(randClassValue,1), []); |
|
|
162 |
end |
|
|
163 |
|
|
|
164 |
for i = 1:numCategories |
|
|
165 |
%[wsEnsPred(:,i), wsEnsError(i), wsEnsSuccess(i), wsEnsBeta(:,i)] = StackEns(XwsEns, Yaverage, i ); |
|
|
166 |
%[wsSConcatEnsPred(:,i), wsSConcatEnsError(i), wsSConcatEnsSuccess(i), wsSConcatEnsBeta(:,i)] = StackEns(XwsSConcatEns, YwsMulti, i ); |
|
|
167 |
[wsMultiEnsPred(:,i), wsMultiEnsError(i), wsMultiEnsSuccess(i), wsMultiEnsBeta(:,i)] = StackEns(XwsMultiEns, Yaverage, i ); |
|
|
168 |
[rtEnsPred(:,i), rtEnsError(i), rtEnsSuccess(i), rtEnsBeta(:,i)] = StackEns(XrtEns, Yaverage, i ); |
|
|
169 |
[randEnsPred(:,i), randEnsError(i), randEnsSuccess(i), randEnsBeta(:,i)] = StackEns(XrandEns, Yaverage, i ); |
|
|
170 |
end |
|
|
171 |
%[wsMultiClassSuccess(:,:,1);wsMultiClassSuccess(:,:,2);wsMultiClassSuccess(:,:,3);wsMultiClassSuccess(:,:,4)]; |
|
|
172 |
%[wsSConcatRegError;wsRegError;rtRegError;randRegError]; |
|
|
173 |
%[wsSConcatClassSuccess;wsClassSuccess;rtClassSuccess;randClassSuccess]; |
|
|
174 |
|
|
|
175 |
%[wsSConcatEnsError;wsEnsError;wsMultiEnsError;rtEnsError;randEnsError]; |
|
|
176 |
%[wsSConcatEnsSuccess;wsEnsSuccess;wsMultiEnsSuccess;rtEnsSuccess;randEnsSuccess]; |
|
|
177 |
|
|
|
178 |
|
|
|
179 |
%------------------------------------------------------------------- |
|
|
180 |
% Significance testing |
|
|
181 |
%------------------------------------------------------------------ |
|
|
182 |
ttestScript |
|
|
183 |
|
|
|
184 |
%------------------------------------------------------------------- |
|
|
185 |
% Save results to Excel |
|
|
186 |
%------------------------------------------------------------------- |
|
|
187 |
if doSave == 1 |
|
|
188 |
saveResults; |
|
|
189 |
end |
|
|
190 |
%------------------------------------------------------------------- |
|
|
191 |
% Plot Performance |
|
|
192 |
%------------------------------------------------------------------- |
|
|
193 |
if doPlot == 3 %never happens |
|
|
194 |
%Plot |
|
|
195 |
%bar(clError) |
|
|
196 |
xPlot = [1:numCategories]; |
|
|
197 |
scrsz = get(0,'ScreenSize'); |
|
|
198 |
|
|
|
199 |
figure('Position',[scrsz(4)/4, scrsz(4)/2, scrsz(3)/1.5, scrsz(4)/2]-50) |
|
|
200 |
plot(xPlot, clError(1,:) * 100, '-ro'); |
|
|
201 |
hold on |
|
|
202 |
plot(xPlot, clError(2,:) * 100, '-bo'); |
|
|
203 |
plot(xPlot, clError(3,:) * 100, '-go'); |
|
|
204 |
plot(xPlot, clError(4,:) * 100, '-ko'); |
|
|
205 |
plot(xPlot, clError(5,:) * 100, '-mo'); |
|
|
206 |
hold off |
|
|
207 |
xlabel('Category'); |
|
|
208 |
ylabel('Percent Error'); |
|
|
209 |
set(gca,'XTickLabel',categories, 'YLim',[0 80]); |
|
|
210 |
title('Classification Success Rate'); |
|
|
211 |
%legend('DT 1 to 1','DT Largest Area', 'DT Group to Mean', 'DT Mean to Mean', 'NN'); |
|
|
212 |
legend(clHeader{1}, clHeader{2}, clHeader{3}, clHeader{4}, clHeader{5}); |
|
|
213 |
|
|
|
214 |
%make a new figure %Chech the contents of this graph. Probably changed |
|
|
215 |
%figure('Position',[scrsz(4)/4, 0, scrsz(3)/1.5, scrsz(4)/2]-50) |
|
|
216 |
%plot(xPlot, nnError, '-ro'); |
|
|
217 |
%set(gca,'XTickLabel',categories); |
|
|
218 |
%title('Multiple Classification ??'); |
|
|
219 |
%legend('NN'); |
|
|
220 |
|
|
|
221 |
%sim(net,Xaverage')' |
|
|
222 |
end |
|
|
223 |
|
|
|
224 |
%clear unecessary data |
|
|
225 |
clear rebuildMatrices reloadFeatures minAgreement doLearning doTrees doBagging doNN ... |
|
|
226 |
doSVM doPlot doSave numTrees errorVector i testCat runMeta row numCategories ... |
|
|
227 |
categories j option tBag newRowOrder nnError |
|
|
228 |
|
|
|
229 |
%Save X and Y data for repeated runs |
|
|
230 |
save('sourceData.mat', 'Xraw', 'Yraw', 'instanceID', 'settings', 'data', 'histos', 'Xlargest', 'Yaverage', 'Xws', 'XwsMulti', 'YwsMulti', 'numSegments'); |
|
|
231 |
|
|
|
232 |
%send message to email |
|
|
233 |
if toc/60 > 5 |
|
|
234 |
message = strcat('Script Done. ', num2str(toc/60)); |
|
|
235 |
matlabMail('ethansmi@hawaii.edu', message); |
|
|
236 |
end |
|
|
237 |
fprintf('*********************************************************************\n'); |