ComputerVision-LungCancer / Git / [b4b313] /Semantic Features/learnScript.m

Models:
SCallahan/
ComputerVision-LungCancer
Downloads: 1
[b4b313]: / Semantic Features / learnScript.m
History
Download this file
242 lines (207 with data), 11.2 kB

%This is the main file to run for the weak segmentation semantic
%predictions. Assumes you have feature data already extracted from images
%and stored in csv files. This reads them and feeds them to a suite of
%machine learning algorithms, then uses the results to perform stacked
%generalization to merge them all into a single value. 
fprintf('*********************************************************************\n');
fprintf('Beginning Execution\n');
tic;
close all;
%clear all;
%load('sourceData.mat'); %Just a shortcut if you rerun the script. Wont exist the first run

%Variables for data set
rebuildRTMatrices = 0; %flag that we are loading saved X and Y matrices to reduce repeating old calculations
reloadWSMatrices  = 0;
doWSEns           = 1;
minAgreement      = 1; %Ignore data if fewer than this many radiologists rated it
%Variable for data output
doSave             = 0;
doPlot             = 0; %Will need fixing if you want it working again. 

%Options for learning which will be passed
settings.doLearning  = 1;
settings.doTrees     = 1;
settings.doBagging   = 1;
settings.doNN        = 0;
settings.doBayes     = 0; %not 100% working
%Options for learning parameters
settings.numTrees    = 10; %100; %Bagging number of trees to make

settings.hiddenLayer = 5; % 50; %Neural Network hidden nodes 60 was the max the ram could handle. More always gave improvement. 

%Misc Data to pass
settings.categories      = {'Subtlety', 'Sphericity', 'Margin   ', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy'};
numCategories = length(settings.categories);

%-------------------------------------------------------------------
% Prepare the X and Y matrices for each method type we want to perform
%-------------------------------------------------------------------
%Build X and Y from radiologists' data
if rebuildRTMatrices == 1
    [Xraw, Yraw, instanceID, settings.ratingRow, data, histos ] = GetRadData(minAgreement);
    fprintf('Getting the average rating for each nodule\n'); 
    [Yaverage, instanceOrder] = GetAveragedMatrix(Yraw, settings.ratingRow, instanceID);
    fprintf('Getting the image features from the largest outline of each nodule\n'); 
    load('idstouse.mat');
    Xlargest = GetLargestMatrix(Xraw, IDsToUse, instanceID);
    %get Xaverage for the rand comparison
    [Xaverage, instanceOrder] = GetAveragedMatrix(Xraw, settings.ratingRow, instanceID);
end

%Build X from weakly segmented features
if reloadWSMatrices == 1
    [Xws, numSegments] = GetXws(instanceID); %raw version
    [XwsMulti, YwsMulti] = GetXwsSampleConcat(Xws, Yaverage, numSegments);
end

settings.histos = histos;
settings.doPlot = doPlot;

fprintf('\nPerforming Relative Truth Learning');
[ rtClassValue, rtRegError, rtErrorHeaders, rtConfusionMatrix, rtSens, rtSpec  ] = MLSuite(Xlargest, Yaverage, settings);

%fprintf('\nPerforming learning on segment features'); %With 10
%segmentations, this data is just too big for NN to handle. WSEns has good
%performance anyhow. 
%[ wsClassValue, wsRegError, wsErrorHeaders, wsConfusionMatrix, wsSens, wsSpec  ] = MLSuite(Xws, Yaverage, settings);

if doWSEns == 1
    fprintf('\nPerforming WS learning, each segmentation sent to a different classifier'); %loop over segments sets
    clear wsMultiClassValue wsMultiRegError wsMultiErrorHeaders wsMultiConfusionMatrix wsMultiSens wsMultiSpec

    startRow = 0;
    finishRow = 0;
    setLength = size(Xws,1);
    for i = 1:numSegments
        fprintf('\nTraining on segmentation #: %d', i);
        startRow = 1 + (i-1) * setLength;
        if i == numSegments %Last set has unusual length
            finishRow = size(XwsMulti,1);
        else
            finishRow = i * setLength;
        end
        [ wsMultiClassValue(:,:,:,i), wsMultiRegError(:,:,i), wsMultiErrorHeaders(:,i), wsMultiConfusionMatrix(:,:,:,i), wsMultiSens(:,i), wsMultiSpec(:,i)  ] = MLSuite(XwsMulti(startRow:finishRow,:), YwsMulti(startRow:finishRow,:), settings);
    end
end

%add a random matrix for comparison
fprintf('\nPerforming Learning on randomized features (garbage)');
Xrand = rand(size(Xaverage,1), size(Xaverage,2));  
[ randClassValue, randRegError, randErrorHeaders, randConfusionMatrix, randSens, randSpec  ] = MLSuite(Xrand, Yaverage, settings);

%-------------------------------------------------------------------
% Summarize L0 Results
%-------------------------------------------------------------------
clear rtOverRandReg rtOverWSReg rtOverRandSuccess rtOverWSSuccess rtClassSuccess
clear wsClassSuccess randClassSuccess


%sum(round(rtClassValue(:,:,3)) == round(Yaverage))/size(Yaverage,1)
%tabulate()
%sum(num2RadClass(rtClassValue(:,:,3)) == num2RadClass(Yaverage))/size(Yaverage,1)

%Final comparsions. Turn back into classifications and compare to the
%targets. (Best after ensemble learning)
for i = 1:size(rtRegError,1)
    %rtClassSuccess(i,:) = sum(num2RadClass(rtClassValue(:,:,i)) == num2RadClass(Yaverage))/size(Yaverage,1);
    rtClassSuccess(i,:) = sum(round(rtClassValue(:,:,i)) == round(Yaverage))/size(Yaverage,1);
end

%for i = 1:size(randRegError,1)
    %wsClassSuccess(i,:) = sum(num2RadClass(wsClassValue(:,:,i)) == num2RadClass(Yws))/size(Yws,1);
%    wsClassSuccess(i,:) = sum(round(wsClassValue(:,:,i)) == round(Yaverage))/size(Yaverage,1);
%end

%for i = 1:size(randRegError,1)
    %wsClassSuccess(i,:) = sum(num2RadClass(wsClassValue(:,:,i)) == num2RadClass(Yws))/size(Yws,1);
%    wsSConcatClassSuccess(i,:) = sum(round(wsSConcatClassValue(:,:,i)) == round(YwsMulti))/size(YwsMulti,1);
%end

%wsMulti only has value after stacked ensemble learning, but estimate
%anyway
for segment = 1:size(wsMultiClassValue,4)
    for i = 1:size(wsMultiRegError,1)
        %randClassSuccess(i,:) = sum(num2RadClass(randClassValue(:,:,i)) == num2RadClass(Yaverage))/size(Yaverage,1);
        wsMultiClassSuccess(i,:,segment) = sum(round(wsMultiClassValue(:,:,i,segment)) == round(Yaverage))/size(Yaverage,1);
    end
end

for i = 1:size(randRegError,1)
    %randClassSuccess(i,:) = sum(num2RadClass(randClassValue(:,:,i)) == num2RadClass(Yaverage))/size(Yaverage,1);
    randClassSuccess(i,:) = sum(round(randClassValue(:,:,i)) == round(Yaverage))/size(Yaverage,1);
end

%rtOverRandSuccess = rtClassSuccess - randClassSuccess;
%rtOverWSSuccess = rtClassSuccess - wsClassSuccess;

%rtOverRandReg = randRegError - rtRegError;
%rtOverWSReg = wsRegError - rtRegError;

%-------------------------------------------------------------------
% Stacking Ensemble Learning
%-------------------------------------------------------------------
clear XwsMultiEns
%reformat the data for ensemble use
for i = 1:numCategories
    %XwsEns(:,:,i) = reshape(wsClassValue(:,i,:),size(wsClassValue,1), []);
    %XwsSConcatEns(:,:,i) = reshape(wsSConcatClassValue(:,i,:),size(wsSConcatClassValue,1), []);
    XrtEns(:,:,i) = reshape(rtClassValue(:,i,:),size(rtClassValue,1), []);
    XwsMultiEns(:,:,i) = reshape(wsMultiClassValue(:,i,:,:), size(wsMultiClassValue,1), []); %Concats all the estimated ratings for a rating category from each segmentation of the same ROI. Puts them on the same row
    XrandEns(:,:,i) = reshape(randClassValue(:,i,:),size(randClassValue,1), []);
end

for i = 1:numCategories
    %[wsEnsPred(:,i), wsEnsError(i), wsEnsSuccess(i), wsEnsBeta(:,i)] = StackEns(XwsEns, Yaverage, i );
    %[wsSConcatEnsPred(:,i), wsSConcatEnsError(i), wsSConcatEnsSuccess(i), wsSConcatEnsBeta(:,i)] = StackEns(XwsSConcatEns, YwsMulti, i );
    [wsMultiEnsPred(:,i), wsMultiEnsError(i), wsMultiEnsSuccess(i), wsMultiEnsBeta(:,i)] = StackEns(XwsMultiEns, Yaverage, i );
    [rtEnsPred(:,i), rtEnsError(i), rtEnsSuccess(i), rtEnsBeta(:,i)] = StackEns(XrtEns, Yaverage, i );
    [randEnsPred(:,i), randEnsError(i), randEnsSuccess(i), randEnsBeta(:,i)] = StackEns(XrandEns, Yaverage, i );
end
%Code fragments useful to check results 
%[wsMultiClassSuccess(:,:,1);wsMultiClassSuccess(:,:,2);wsMultiClassSuccess(:,:,3);wsMultiClassSuccess(:,:,4)];
%[wsSConcatRegError;wsRegError;rtRegError;randRegError];
%[wsSConcatClassSuccess;wsClassSuccess;rtClassSuccess;randClassSuccess];

%[wsSConcatEnsError;wsEnsError;wsMultiEnsError;rtEnsError;randEnsError];
%[wsSConcatEnsSuccess;wsEnsSuccess;wsMultiEnsSuccess;rtEnsSuccess;randEnsSuccess];

%reshape(mean(wsMultiEnsBeta,2), 3,[])

%-------------------------------------------------------------------
% Significance testing
%------------------------------------------------------------------
ttestScript

%-------------------------------------------------------------------
% Save results to Excel
%-------------------------------------------------------------------
if doSave == 1
    saveResults;
end
%-------------------------------------------------------------------
% Plot Performance
%-------------------------------------------------------------------
if doPlot == 3 %never happens
    %Plot
    %bar(clError)
    xPlot = [1:numCategories];
    scrsz = get(0,'ScreenSize');

    figure('Position',[scrsz(4)/4, scrsz(4)/2, scrsz(3)/1.5, scrsz(4)/2]-50)
    plot(xPlot, clError(1,:) * 100, '-ro');
    hold on
    plot(xPlot, clError(2,:) * 100, '-bo');
    plot(xPlot, clError(3,:) * 100, '-go');
    plot(xPlot, clError(4,:) * 100, '-ko');
    plot(xPlot, clError(5,:) * 100, '-mo');
    hold off
    xlabel('Category');
    ylabel('Percent Error');
    set(gca,'XTickLabel',categories, 'YLim',[0 80]);
    title('Classification Success Rate');
    %legend('DT 1 to 1','DT Largest Area', 'DT Group to Mean', 'DT Mean to Mean', 'NN');
    legend(clHeader{1}, clHeader{2}, clHeader{3}, clHeader{4}, clHeader{5});

    %make a new figure %Chech the contents of this graph. Probably changed
    %figure('Position',[scrsz(4)/4, 0, scrsz(3)/1.5, scrsz(4)/2]-50)
    %plot(xPlot, nnError, '-ro');
    %set(gca,'XTickLabel',categories);
    %title('Multiple Classification ??');
    %legend('NN');

    %sim(net,Xaverage')'
end

%clear unecessary data
clear rebuildMatrices reloadFeatures minAgreement doLearning doTrees doBagging doNN  ...
    doSVM  doPlot  doSave numTrees errorVector i testCat runMeta row numCategories ...
    categories j option tBag newRowOrder nnError

%Save X and Y data for repeated runs
save('sourceData.mat', 'Xraw', 'Yraw', 'instanceID', 'settings', 'data', 'histos', 'Xlargest', 'Yaverage', 'Xws', 'XwsMulti', 'YwsMulti', 'numSegments'); 

%send message to email
if toc/60 > 5
    targetAddress = 'yourEmail@google.com'; %put your email here
    message = strcat('Script Done. ', num2str(toc/60));
    matlabMail(targetAddress, message);
end
fprintf('*********************************************************************\n');

%misc code fragments
%Cross validation
%cv = fitensemble(X,Y,'Bag',200,'Tree','type','classification','kfold',5)
%Make bags
%bag = fitensemble(Xtrain,Ytrain,'Bag',200,'Tree', 'type','classification
%Test and train sets
%cvpart = cvpartition(Y,'holdout',0.3);
%Xtrain = X(training(cvpart),:);
%Ytrain = Y(training(cvpart),:);
%Xtest = X(test(cvpart),:);
%Ytest = Y(test(cvpart),:);