ComputerVision-LungCancer / Git / Diff of /Semantic Features/MLSuite.m

Models:
SCallahan/
ComputerVision-LungCancer
Downloads: 1
Diff of /Semantic Features/MLSuite.m [000000] .. [b4b313]
Switch to side-by-side view

--- a
+++ b/Semantic Features/MLSuite.m
@@ -0,0 +1,146 @@
+function [ classValue, errorRate, errorHeaders, confusionMatrix, sens, spec  ] = MLSuite(X, Y, settings)
+%MLSuite Runs all the base level machine learning methods on the X and Y matrices
+%given using the set of options given
+
+categories = settings.categories;
+numCategories = length(categories);
+
+clError         = zeros(1, numCategories); %To hold classification success/error rates
+clHeader        = cell(1,1);
+
+classValue   = 0;
+errorRate    = 0;
+errorHeaders = 0;
+confusionMatrix = 0; %Code exists, but not used since changing from classes to numbers for the labels
+sens = 0; %Code exists, but not used since changing from classes to numbers for the labels
+spec = 0; %Code exists, but not used since changing from classes to numbers for the labels
+
+%Note: each algo, each run will test on different rows
+
+classValue = zeros(size(X,1), numCategories, 1);
+%-------------------------------------------------------------------
+% Standard Learning
+%-------------------------------------------------------------------
+if settings.doLearning == 1
+    if settings.doTrees == 1
+        %-------------------------------------------------------------------
+        % Decion tree learning
+        %-------------------------------------------------------------------      
+        fprintf('\nDecision tree learning from mean of each group vs mean of features \n');
+        clHeader = vertcat(clHeader, {'DT Mean to Mean'});
+        errorVector = zeros(1, numCategories);
+        for i = 1:numCategories
+            fprintf('Learning and evaluating success on category: %s\t\t', str2mat(categories(i)));
+            [dtClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), @classregtree, @(X, trainedStruct) eval(trainedStruct, X));
+            %[dtClassValue(:,i), errorVector(i)] = dtLearning(X, Y, i);
+            fprintf('Error: %f class Success %f\n', errorVector(i), GetClassSuccessRate(dtClassValue(:,i), Y(:,i)) );
+        end
+        clError = vertcat(clError, errorVector);
+        classValue = cat(3, classValue, dtClassValue);
+    end
+    %-------------------------------------------------------------------
+    % Neural Network Learning
+    %-------------------------------------------------------------------
+    if settings.doNN == 1
+        fprintf('\nNeural Network training.\n');
+        clHeader = vertcat(clHeader, {'NN'});
+       
+        errorVector = zeros(1, numCategories);
+        for i = 1:numCategories
+            fprintf('NN on category: %s ', str2mat(categories(i)));
+            [nnClassValue(:,i), errorVector(i)] = nnLearning(X, Y, settings.hiddenLayer, i);
+            fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(nnClassValue(:,i), Y(:,i)) );
+            %fprintf('Average Error: %.4f Pstdev: %.4f\n', nnEvaluation(i).Eout, nnEvaluation(i).sigmaOut);
+        end
+        clError = vertcat(clError, errorVector);
+        classValue = cat(3, classValue, nnClassValue); 
+    end
+
+    %-------------------------------------------------------------------
+    % SVM Learning
+    %-------------------------------------------------------------------
+    %Multiclass/Class & Probability/Continuous numerical predictions by SVM
+    %never got to work reliably and was removed. I hear scikit for python
+    %works good
+    
+    %-------------------------------------------------------------------
+    % Bayesian Learning
+    %-------------------------------------------------------------------
+    %Does not work! Works 99%, but <1% of the samples will result in a NaN
+    %prediction thus destroying all the math steps that come afterwords.
+    %You can try to figure out how to fix it, or just convert NaN into 0s
+    %and be fine with failing those guys. 
+    if settings.doBayes == 1   
+        fprintf('\nNaieve Bayes Learning.\n');
+        clHeader = vertcat(clHeader, {'Bayes'});        
+        errorVector = zeros(1, numCategories);
+        
+        for i = 1:numCategories
+            fprintf('Bayes on category: %s ', str2mat(categories(i)));
+            [bayesClassValue(:,i), errorVector(i), ~] = CrossValLearn(X, Y(:,i), ...
+                @(X, Y) NaiveBayes.fit(X, Y, 'Distribution', 'kernel'),...
+                @(X, trainedStruct) posterior(trainedStruct, X, 'HandleMissing', 'On') ...
+                );
+            fprintf('\t\tAverage Error: %.4f Class Success %f\n', errorVector(i), GetClassSuccessRate(bayesClassValue(:,i), Y(:,i)) );
+        end
+        clError = vertcat(clError, errorVector);
+        classValue = cat(3, classValue, nnClassValue); 
+    end
+
+    %-------------------------------------------------------------------
+    % Ensemble Learning (Still a first layer classifier)
+    %-------------------------------------------------------------------
+    if settings.doBagging == 1
+        %-------------------------------------------------------------------
+        % Bagging
+        %-------------------------------------------------------------------
+        fprintf('\nPerforming Bagging\n');
+        clHeader = vertcat(clHeader, {'Bagging Trees'});
+        for i = 1:numCategories
+            fprintf('Learning and evaluating success on category: %s\n', str2mat(categories(i)));
+            tBag = TreeBagger(settings.numTrees, X, Y(:,i), 'method', 'regression', 'OOBPred', 'on', 'NPrint', 25);% , 'minleaf', 15);
+            %errorArray = oobError(tBag);
+            %errorVector(i) = errorArray(end);
+
+            %bagClassValue(:,i) = predict(tBag, X); %tBag consists of
+            %trees that don't apply to all data. Some trees were
+            %trained with certain points! Therefore not all trees can
+            %be applied to each observation. 
+            bagClassValue(:,i) = oobPredict(tBag);
+            errorVector(i) = RMSE(bagClassValue(:,i), Y(:,i));
+            fprintf('Error: %f classSuccess %f\n', errorVector(i), GetClassSuccessRate(bagClassValue(:,i), Y(:,i)) );
+            %For regression bagClassProb is just the standard
+                %deviation across all the trees, so its kind of an
+                %indication
+            if settings.doPlot == 1
+                %plot the change over time. Real slow so might as well look at something
+                figure;
+                plot(oobError(tBag));
+                xlabel('number of grown trees');
+                ylabel('out-of-bag regression error');
+                title(str2mat(categories(i)));
+            end
+        end
+    clError = vertcat(clError, errorVector);
+    classValue = cat(3, classValue, bagClassValue);
+    end
+end
+
+%-------------------------------------------------------------------
+% Compile results
+%-------------------------------------------------------------------
+%clError = horzcat(clError, mean(clError, 2));
+%bagError = horzcat(bagError, mean(bagError, 2));
+%nnError = horzcat(nnError, mean(nnError, 2));
+%allErrors = vertcat(clError, bagError);
+%allErrors = vertcat(allErrors, nnError);
+%averageError = [mean(clError, 2);mean(bagError, 2);mean(nnError, 2)];
+%Remove blank lines at the beginning. This was the easiest way I found to
+%1. Guarantee variable initialized to keep matlab from complaining and 2.
+%Have an unspecified length, so you don't have to rework the code if you
+%add more classifiers to the mix, or turn some of them off in the settings.
+errorRate = clError(2:end,:);
+errorHeaders = clHeader(2:end);
+classValue = classValue(:,:,2:end);
+
+