--- a +++ b/Semantic Features/NotUsed/MLSuite.asv @@ -0,0 +1,299 @@ +function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec ] = MLSuite(X, Y, settings) +%MLSuite Runs all the machine learning methods on the X and Y matrices +%given using the set of options given + +categories = settings.categories; +numCategories = length(categories); + +clError = zeros(1, numCategories); %To hold classification success/error rates +clHeader = cell(1,1); + +classValue = 0; +errorRate = 0; +errorHeaders = 0; +confusionMatrix = 0; +sens = 0; +spec = 0; + +%Redo so all learning done with same training samples? +%Temp eliminating all types except ave to rounded ave and using same train +%set for each. + +option = 3; %Real regression + +classValue = zeros(size(X,1), numCategories, 1); +%------------------------------------------------------------------- +% Standard Learning +%------------------------------------------------------------------- +if settings.doLearning == 1 + if settings.doTrees == 1 + %------------------------------------------------------------------- + % Decion tree learning + %------------------------------------------------------------------- + %fprintf('\nDecision tree learning as if all data are from different nodules\n'); + %j = 1; + %clHeader = {'DT 1 to 1'}; + %for i = 1:numCategories + % fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); + % [classValue(:,i),1, clError(j,i)] = decisionTreeLearning(Xraw, Yraw, i, 0); + % fprintf('Error: %f\n', clError(j,i)); + %end + + %fprintf('\nDecision tree learning from mean of each group\n'); %has zero biases in the X matrix + %clHeader = vertcat(clHeader, {'DT Group to Mean'}); + %for i = 1:numCategories + % fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); + % [dtClassValue(:,i), errorVector(i)] = decisionTreeLearning(Xgrouped, Yaverage, i, 1); + % fprintf('Error: %f\n', errorVector(i)); + %end + %clError = vertcat(clError, errorVector); + %classValue = cat(3, classValue, dtClassValue); + + fprintf('\nDecision tree learning from mean of each group vs mean of features \n'); + clHeader = vertcat(clHeader, {'DT Mean to Mean'}); + errorVector = zeros(1, numCategories); + for i = 1:numCategories + fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i))); + [dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i); + fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) ); + end + clError = vertcat(clError, errorVector); + classValue = cat(3, classValue, dtClassValue); + end + %------------------------------------------------------------------- + % Neural Network Learning + %------------------------------------------------------------------- + if settings.doNN == 1 + fprintf('\nNeural Network training.\n'); + clHeader = vertcat(clHeader, {'NN'}); + %option = 0; %vector regression + %for i = 1:numCategories + % fprintf('Learning and evaluating success on category: %s ', str2mat(categories(i))); + % nnEvaluation(i) = nnLearning(Xgrouped, Ygrouped, i, option); + % fprintf('RMS: %.4f\n', nnEvaluation(i).Eout); + %end + + errorVector = zeros(1, numCategories); + for i = 1:numCategories + fprintf('NN on category: %s ', str2mat(categories(i))); + [nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i); + fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) ); + %fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut); + end + clError = vertcat(clError, errorVector); + classValue = cat(3, classValue, nnClassValue); + end + + %------------------------------------------------------------------- + % SVM Learning + %------------------------------------------------------------------- + %Yeah boooy, multiclass SVMs are go + if settings.doSVM + fprintf('Performing SVM learning\n'); + kernel = 'polynomial'; %Kernel method used 'mlp' + %'polyorder' + + svmError = zeros(1, numCategories); + svmErrorDelta = zeros (3, numCategories); + for testCat = 1:numCategories + fprintf('Learning on category %d %s\n', testCat, categories{testCat}); + + [svmClassValue(:,(testCat),1), svmError(testCat), svmConfusionMatrix(:,:,(testCat)), svmSens(testCat,:), svmSpec(testCat,:) ] ... + = SVMMultiBagged(X, round(Y(:,testCat)), settings.kernel, settings.minVote, settings.maxIter); + fprintf('Broken clock accuracy: %.1f\n', settings.histos(testCat,7) * 100); + fprintf('SVM method 2 error rate: %.1f\n\n', svmError(testCat)); + svmError(1,testCat) = svmError(testCat); + end + %svmError = vertcat(svmError, settings.histos(:,7)' * 100); + for i = 1:size(svmError,1) + svmErrorDelta(i,:) = svmError(i,:) - (100 - (settings.histos(:,7)' * 100)); + end + %Copy to the outgoing variables + classValue = cat(3, classValue(:,:,1), svmClassValue); + clError = vertcat(clError, svmError); + clHeader = vertcat(clHeader, {'SVM'});%Need to be altered to mesh with other ml types data + confusionMatrix = svmConfusionMatrix; + sens = svmSens; + spec = svmSpec; + clear kernel minVote maxIter + end + + + %------------------------------------------------------------------- + % Bayesian Learning + %------------------------------------------------------------------- + if settings.doBayes + fprintf('\nNaieve Bayes Learning.\n'); + clHeader = vertcat(clHeader, {'Bayes'}); + errorVector = zeros(1, numCategories); + + for i = 1:numCategories + fprintf('Bayes on category: %s ', str2mat(categories(i))); + [bayesClassValue(:,i), errorVector(i)] = bayesLearning(X, Y, i); + fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) ); + end + clError = vertcat(clError, errorVector); + classValue = cat(3, classValue, nnClassValue); + end + + %------------------------------------------------------------------- + % Ensemble Learning + %------------------------------------------------------------------- + if settings.doBagging == 1 + %------------------------------------------------------------------- + % Bagging + %------------------------------------------------------------------- + fprintf('\nPerforming Bagging\n'); + + %fprintf('Bagging as if all data are from different nodules\n'); + %clHeader = vertcat(clHeader, {'Bagging 1 to 1'}); + %for i = 1:numCategories + % fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); + % tBag = TreeBagger(settings.numTrees, Xraw, Yraw(:,i), 'OOBPred', 'on', 'NPrint', 10); + % errorArray = oobError(tBag); + % errorVector(i) = errorArray(end); + % fprintf('Error: %f\n', errorVector(i)); + + % bagClassValue(:,i) = predict(tBag, Xraw(trainRows,i)); + + % if settings.doPlot == 1 + %plot the change. Real slow so might as well look at something + % figure; + % plot(oobError(tBag)); + % xlabel('number of grown trees'); + % ylabel('out-of-bag classification error'); + % title(str2mat(categories(i))); + % end + % end + % clError = vertcat(clError, errorVector); + + % fprintf('\nBagging the mean of each group\n'); %zero bias + % clHeader = vertcat(clHeader, {'Bagging Group to Mean'}); + % for i = 1:numCategories + % fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); + % %tBag = TreeBagger(numTrees, Xaverage, Yaverage(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 10); + % tBag = TreeBagger(settings.numTrees, Xgrouped, round(Yaverage(:,i)), 'OOBPred', 'on', 'NPrint', 10); + % errorArray = oobError(tBag); + % errorVector(i) = errorArray(end); + % fprintf('Error: %f\n', errorVector(i)); + % + % bagClassValue(:,i) = predict(tBag, Xraw(trainRows,i)); + + % if settings.doPlot == 1 + % %plot the change. Real slow so might as well look at something + % figure; + % plot(oobError(tBag)); + % xlabel('number of grown trees'); + % ylabel('out-of-bag classification error'); + % title(str2mat(categories(i))); + % end + % end + % clError = vertcat(clError, errorVector); + + fprintf('\nBagging the mean of each group vs mean of features\n'); + clHeader = vertcat(clHeader, {'Bagging Mean to Mean'}); + if option == 3 %Really do regression + for i = 1:numCategories + fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i))); + tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15); + %tBag = TreeBagger(settings.numTrees, X, round(Y(:,i)), 'OOBPred', 'on', 'NPrint', 10); + %errorArray = oobError(tBag); + %errorVector(i) = errorArray(end); + + %bagClassValue(:,i) = predict(tBag, X); %tBag consists of + %trees that don't apply to all data. Some trees were + %trained with certain points! Therefore not all trees can + %be applied to each observation. + bagClassValue(:,i) = oobPredict(tBag); + errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i)); + fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) ); + %For regression bagClassProb is just the standard + %deviation across all the trees, so its kind of an + %indication + if settings.doPlot == 1 + %plot the change. Real slow so might as well look at something + figure; + plot(oobError(tBag)); + xlabel('number of grown trees'); + ylabel('out-of-bag regression error'); + title(str2mat(categories(i))); + end + end + clError = vertcat(clError, errorVector); + classValue = cat(3, classValue, bagClassValue); + + else + for i = 1:numCategories + fprintf('***Obsolete code. Learning and evaluating success on category: %s\n', str2mat(categories(i))); + %tBag = TreeBagger(numTrees, Xaverage, Yaverage(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 10); + tBag = TreeBagger(settings.numTrees, X, round(Y(:,i)), 'OOBPred', 'on', 'NPrint', 10); + errorArray = oobError(tBag); + errorVector(i) = errorArray(end); + fprintf('Error: %f\n', errorVector(i)); + + [bagClassValue(:,i), bagClassProb(:,i)] = str2num(cell2mat(predict(tBag, X))); + + if settings.doPlot == 1 + %plot the change. Real slow so might as well look at something + figure; + plot(oobError(tBag)); + xlabel('number of grown trees'); + ylabel('out-of-bag classification error'); + title(str2mat(categories(i))); + end + end + clError = vertcat(clError, errorVector); + classValue = cat(3, classValue, bagClassValue); + end + end +end + +%------------------------------------------------------------------- +% Compile results +%------------------------------------------------------------------- +%clError = horzcat(clError, mean(clError, 2)); +%bagError = horzcat(bagError, mean(bagError, 2)); +%nnError = horzcat(nnError, mean(nnError, 2)); +%allErrors = vertcat(clError, bagError); +%allErrors = vertcat(allErrors, nnError); +%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)]; +errorRate = clError(2:end,:); +errorHeaders = clHeader(2:end); +classValue = classValue(:,:,2:end); + +%------------------------------------------------------------------- +% investigate metafeatures +%------------------------------------------------------------------- +if(settings.runMeta == 1) + %investigate metafeatures + %build array of the ratings blocks (before 0s are added) + allRatings = cell(16555,1); + j = 1; + i = 1; + while(i <= size(datax,1)) + rating = GetRadiologistsRatings(datax, i); + allRatings{j} = rating; + %fprintf('Size %d start %d finish %d i %d t %d next %d name %s\n', rating.totalRatings, rating.rows(1), rating.rows(2), i, rating.totalRatings, i + ratings.totalRatings, rating.noduleID); + i = i + rating.totalRatings; + j = j + 1; + end + + %compare stdevs + %stdevList try this? sum((patient(1,1,2).test));Similarly, add all the billing fields in the patient array total = sum([patient.billing]); + for i = 1:size(allRatings,1) + stdevList(i,:) = allRatings{i}.stdev; + end + metaFeatures.stdev = mean(stdevList); %Average stdev for each of the 4 ratings +end + + +%[ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec ] + + +%clear unecessary data +%clear rebuildMatrices reloadFeatures minAgreement doLearning doTrees doBagging doNN ... +% doSVM doPlot doSave numTrees errorVector i testCat runMeta row numCategories ... +% categories j option tBag newRowOrder nnError + + +