Switch to side-by-side view

--- a
+++ b/combinedDeepLearningActiveContour/minFunc/minFunc.m
@@ -0,0 +1,1145 @@
+function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin)
+% minFunc(funObj,x0,options,varargin)
+%
+% Unconstrained optimizer using a line search strategy
+%
+% Uses an interface very similar to fminunc
+%   (it doesn't support all of the optimization toolbox options,
+%       but supports many other options).
+%
+% It computes descent directions using one of ('Method'):
+%   - 'sd': Steepest Descent
+%       (no previous information used, not recommended)
+%   - 'csd': Cyclic Steepest Descent
+%       (uses previous step length for a fixed length cycle)
+%   - 'bb': Barzilai and Borwein Gradient
+%       (uses only previous step)
+%   - 'cg': Non-Linear Conjugate Gradient
+%       (uses only previous step and a vector beta)
+%   - 'scg': Scaled Non-Linear Conjugate Gradient
+%       (uses previous step and a vector beta, 
+%           and Hessian-vector products to initialize line search)
+%   - 'pcg': Preconditionined Non-Linear Conjugate Gradient
+%       (uses only previous step and a vector beta, preconditioned version)
+%   - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating
+%       (default: uses a predetermined nunber of previous steps to form a 
+%           low-rank Hessian approximation)
+%   - 'newton0': Hessian-Free Newton
+%       (numerically computes Hessian-Vector products)
+%   - 'pnewton0': Preconditioned Hessian-Free Newton 
+%       (numerically computes Hessian-Vector products, preconditioned
+%       version)
+%   - 'qnewton': Quasi-Newton Hessian approximation
+%       (uses dense Hessian approximation)
+%   - 'mnewton': Newton's method with Hessian calculation after every
+%   user-specified number of iterations
+%       (needs user-supplied Hessian matrix)
+%   - 'newton': Newton's method with Hessian calculation every iteration
+%       (needs user-supplied Hessian matrix)
+%   - 'tensor': Tensor
+%       (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives)
+%
+% Several line search strategies are available for finding a step length satisfying
+%   the termination criteria ('LS'):
+%   - 0: Backtrack w/ Step Size Halving
+%   - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values
+%   - 2: Backtrack w/ Cubic Interpolation from new function + gradient
+%   values (default for 'bb' and 'sd')
+%   - 3: Bracketing w/ Step Size Doubling and Bisection
+%   - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function +
+%   gradient values (default for all except 'bb' and 'sd')
+%   - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation
+%   - 6: Use Matlab Optimization Toolbox's line search
+%           (requires Matlab's linesearch.m to be added to the path)
+%
+%   Above, the first three find a point satisfying the Armijo conditions,
+%   while the last four search for find a point satisfying the Wolfe
+%   conditions.  If the objective function overflows, it is recommended
+%   to use one of the first 3.
+%   The first three can be used to perform a non-monotone
+%   linesearch by changing the option 'Fref'.
+%
+% Several strategies for choosing the initial step size are avaiable ('LS_init'):
+%   - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd')
+%       (t = 1)
+%   - 1: Use a step similar to the previous step (default for 'cg' and 'sd')
+%       (t = t_old*min(2,g'd/g_old'd_old))
+%   - 2: Quadratic Initialization using previous function value and new
+%   function value/gradient (use this if steps tend to be very long)
+%       (t = min(1,2*(f-f_old)/g))
+%   - 3: The minimum between 1 and twice the previous step length
+%       (t = min(1,2*t)
+%   - 4: The scaled conjugate gradient step length (may accelerate
+%   conjugate gradient methods, but requires a Hessian-vector product)
+%       (t = g'd/d'Hd)
+%
+% Inputs:
+%   funObj is a function handle
+%   x0 is a starting vector;
+%   options is a struct containing parameters
+%  (defaults are used for non-existent or blank fields)
+%   all other arguments are passed to funObj
+%
+% Outputs:
+%   x is the minimum value found
+%   f is the function value at the minimum found
+%   exitflag returns an exit condition
+%   output returns a structure with other information
+%
+% Supported Input Options
+%   Display - Level of display [ off | final | (iter) | full | excessive ]
+%   MaxFunEvals - Maximum number of function evaluations allowed (1000)
+%   MaxIter - Maximum number of iterations allowed (500)
+%   TolFun - Termination tolerance on the first-order optimality (1e-5)
+%   TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9)
+%   Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 |
+%       qnewton | mnewton | newton | tensor ]
+%   c1 - Sufficient Decrease for Armijo condition (1e-4)
+%   c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise)
+%   LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise)
+%   LS - Line Search type -see above (2 for bb, 4 otherwise)
+%   Fref - Setting this to a positive integer greater than 1
+%       will use non-monotone Armijo objective in the line search.
+%       (20 for bb, 10 for csd, 1 for all others)
+%   numDiff - compute derivative numerically
+%       (default: 0) (this option has a different effect for 'newton', see below)
+%   useComplex - if 1, use complex differentials when computing numerical derivatives
+%       to get very accurate values (default: 0)
+%   DerivativeCheck - if 'on', computes derivatives numerically at initial
+%       point and compares to user-supplied derivative (default: 'off')
+%   outputFcn - function to run after each iteration (default: []).  It
+%       should have the following interface:
+%       outputFcn(x,infoStruct,state,varargin{:})
+%   useMex - where applicable, use mex files to speed things up (default: 1)
+%
+% Method-specific input options:
+%   newton:
+%       HessianModify - type of Hessian modification for direct solvers to
+%       use if the Hessian is not positive definite (default: 0)
+%           0: Minimum Euclidean norm s.t. eigenvalues sufficiently large
+%           (requires eigenvalues on iterations where matrix is not pd)
+%           1: Start with (1/2)*||A||_F and increment until Cholesky succeeds
+%           (an approximation to method 0, does not require eigenvalues)
+%           2: Modified LDL factorization
+%           (only 1 generalized Cholesky factorization done and no eigenvalues required)
+%           3: Modified Spectral Decomposition
+%           (requires eigenvalues)
+%           4: Modified Symmetric Indefinite Factorization
+%           5: Uses the eigenvector of the smallest eigenvalue as negative
+%           curvature direction
+%       cgSolve - use conjugate gradient instead of direct solver (default: 0)
+%           0: Direct Solver
+%           1: Conjugate Gradient
+%           2: Conjugate Gradient with Diagonal Preconditioner
+%           3: Conjugate Gradient with LBFGS Preconditioner
+%           x: Conjugate Graident with Symmetric Successive Over Relaxation
+%           Preconditioner with parameter x
+%               (where x is a real number in the range [0,2])
+%           x: Conjugate Gradient with Incomplete Cholesky Preconditioner
+%           with drop tolerance -x
+%               (where x is a real negative number)
+%       numDiff - compute Hessian numerically
+%                 (default: 0, done with complex differentials if useComplex = 1)
+%       LS_saveHessiancomp - when on, only computes the Hessian at the
+%       first and last iteration of the line search (default: 1)
+%   mnewton:
+%       HessianIter - number of iterations to use same Hessian (default: 5)
+%   qnewton:
+%       initialHessType - scale initial Hessian approximation (default: 1)
+%       qnUpdate - type of quasi-Newton update (default: 3):
+%           0: BFGS
+%           1: SR1 (when it is positive-definite, otherwise BFGS)
+%           2: Hoshino
+%           3: Self-Scaling BFGS
+%           4: Oren's Self-Scaling Variable Metric method 
+%           5: McCormick-Huang asymmetric update
+%       Damped - use damped BFGS update (default: 1)
+%   newton0/pnewton0:
+%       HvFunc - user-supplied function that returns Hessian-vector products
+%           (by default, these are computed numerically using autoHv)
+%           HvFunc should have the following interface: HvFunc(v,x,varargin{:})
+%       useComplex - use a complex perturbation to get high accuracy
+%           Hessian-vector products (default: 0)
+%           (the increased accuracy can make the method much more efficient,
+%               but gradient code must properly support complex inputs)
+%       useNegCurv - a negative curvature direction is used as the descent
+%           direction if one is encountered during the cg iterations
+%           (default: 1)
+%       precFunc (for pnewton0 only) - user-supplied preconditioner
+%           (by default, an L-BFGS preconditioner is used)
+%           precFunc should have the following interfact:
+%           precFunc(v,x,varargin{:})
+%   lbfgs:
+%       Corr - number of corrections to store in memory (default: 100)
+%           (higher numbers converge faster but use more memory)
+%       Damped - use damped update (default: 0)
+%   pcg:
+%       cgUpdate - type of update (default: 2)
+%   cg/scg/pcg:
+%       cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1)
+%           0: Fletcher Reeves
+%           1: Polak-Ribiere
+%           2: Hestenes-Stiefel (not supported for pcg)
+%           3: Gilbert-Nocedal
+%       HvFunc (for scg only)- user-supplied function that returns Hessian-vector 
+%           products
+%           (by default, these are computed numerically using autoHv)
+%           HvFunc should have the following interface:
+%           HvFunc(v,x,varargin{:})
+%       precFunc (for pcg only) - user-supplied preconditioner
+%           (by default, an L-BFGS preconditioner is used)
+%           precFunc should have the following interfact:
+%           precFunc(v,x,varargin{:})
+%   bb:
+%       bbType - type of bb step (default: 1)
+%           0: min_alpha ||delta_x - alpha delta_g||_2
+%           1: min_alpha ||alpha delta_x - delta_g||_2
+%           2: Conic BB
+%           3: Gradient method with retards
+%   csd:
+%       cycle - length of cycle (default: 3)
+%
+% Supported Output Options
+%   iterations - number of iterations taken
+%   funcCount - number of function evaluations
+%   algorithm - algorithm used
+%   firstorderopt - first-order optimality
+%   message - exit message
+%   trace.funccount - function evaluations after each iteration
+%   trace.fval - function value after each iteration
+%
+% Author: Mark Schmidt (2006)
+% Web: http://www.cs.ubc.ca/~schmidtm
+%
+% Sources (in order of how much the source material contributes):
+%   J. Nocedal and S.J. Wright.  1999.  "Numerical Optimization".  Springer Verlag.
+%   R. Fletcher.  1987.  "Practical Methods of Optimization".  Wiley.
+%   J. Demmel.  1997.  "Applied Linear Algebra.  SIAM.
+%   R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R.
+%   Pozo, C. Romine, and H. Van der Vost.  1994.  "Templates for the Solution of
+%   Linear Systems: Building Blocks for Iterative Methods".  SIAM.
+%   J. More and D. Thuente.  "Line search algorithms with guaranteed
+%   sufficient decrease".  ACM Trans. Math. Softw. vol 20, 286-307, 1994.
+%   M. Raydan.  "The Barzilai and Borwein gradient method for the large
+%   scale unconstrained minimization problem".  SIAM J. Optim., 7, 26-33,
+%   (1997).
+%   "Mathematical Optimization".  The Computational Science Education
+%   Project.  1995.
+%   C. Kelley.  1999.  "Iterative Methods for Optimization".  Frontiers in
+%   Applied Mathematics.  SIAM.
+
+if nargin < 3
+    options = [];
+end
+
+% Get Parameters
+[verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,...
+    corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,...
+    HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
+    DerivativeCheck,Damped,HvFunc,bbType,cycle,...
+    HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ...
+    minFunc_processInputOptions(options);
+
+if isfield(options, 'logfile')
+    logfile = options.logfile;
+else
+    logfile = [];
+end
+
+% Constants
+SD = 0;
+CSD = 1;
+BB = 2;
+CG = 3;
+PCG = 4;
+LBFGS = 5;
+QNEWTON = 6;
+NEWTON0 = 7;
+NEWTON = 8;
+TENSOR = 9;
+
+% Initialize
+p = length(x0);
+d = zeros(p,1);
+x = x0;
+t = 1;
+
+% If necessary, form numerical differentiation functions
+funEvalMultiplier = 1;
+if numDiff && method ~= TENSOR
+    varargin(3:end+2) = varargin(1:end);
+    varargin{1} = useComplex;
+    varargin{2} = funObj;
+    if method ~= NEWTON
+        if debug
+            if useComplex
+                fprintf('Using complex differentials for gradient computation\n');
+            else
+                fprintf('Using finite differences for gradient computation\n');
+            end
+        end
+        funObj = @autoGrad;
+    else
+        if debug
+            if useComplex
+                fprintf('Using complex differentials for gradient computation\n');
+            else
+                fprintf('Using finite differences for gradient computation\n');
+            end
+        end
+        funObj = @autoHess;
+    end
+
+    if method == NEWTON0 && useComplex == 1
+        if debug
+            fprintf('Turning off the use of complex differentials\n');
+        end
+        useComplex = 0;
+    end
+
+    if useComplex
+        funEvalMultiplier = p;
+    else
+        funEvalMultiplier = p+1;
+    end
+end
+
+% Evaluate Initial Point
+if method < NEWTON
+    [f,g] = feval(funObj, x, varargin{:});
+else
+    [f,g,H] = feval(funObj, x, varargin{:});
+    computeHessian = 1;
+end
+funEvals = 1;
+
+if strcmp(DerivativeCheck,'on')
+    if numDiff
+        fprintf('Can not do derivative checking when numDiff is 1\n');
+    end
+    % Check provided gradient/hessian function using numerical derivatives
+    fprintf('Checking Gradient:\n');
+    [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:});
+
+    fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2)));
+    if max(abs(g-g2)) > 1e-4
+        fprintf('User NumDif:\n');
+        [g g2]
+        diff = abs(g-g2)
+        pause;
+    end
+
+    if method >= NEWTON
+        fprintf('Check Hessian:\n');
+        [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:});
+
+        fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:))));
+        if max(abs(H(:)-H2(:))) > 1e-4
+            H
+            H2
+            diff = abs(H-H2)
+            pause;
+        end
+    end
+end
+
+% Output Log
+if verboseI
+    fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
+end
+
+if logfile
+    fid = fopen(logfile, 'a');
+    if (fid > 0)
+        fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
+        fclose(fid);
+    end
+end
+
+% Output Function
+if ~isempty(outputFcn)
+    callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:});
+end
+
+% Initialize Trace
+trace.fval = f;
+trace.funcCount = funEvals;
+
+% Check optimality of initial point
+if sum(abs(g)) <= tolFun
+    exitflag=1;
+    msg = 'Optimality Condition below TolFun';
+    if verbose
+        fprintf('%s\n',msg);
+    end
+    if nargout > 3
+        output = struct('iterations',0,'funcCount',1,...
+            'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
+    end
+    return;
+end
+
+% Perform up to a maximum of 'maxIter' descent steps:
+for i = 1:maxIter
+
+    % ****************** COMPUTE DESCENT DIRECTION *****************
+
+    switch method
+        case SD % Steepest Descent
+            d = -g;
+
+        case CSD % Cyclic Steepest Descent
+
+            if mod(i,cycle) == 1 % Use Steepest Descent
+                alpha = 1;
+                LS_init = 2;
+                LS = 4; % Precise Line Search
+            elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step
+                alpha = t;
+                LS_init = 0;
+                LS = 2; % Non-monotonic line search
+            end
+            d = -alpha*g;
+
+        case BB % Steepest Descent with Barzilai and Borwein Step Length
+
+            if i == 1
+                d = -g;
+            else
+                y = g-g_old;
+                s = t*d;
+                if bbType == 0
+                    yy = y'*y;
+                    alpha = (s'*y)/(yy);
+                    if alpha <= 1e-10 || alpha > 1e10
+                        alpha = 1;
+                    end
+                elseif bbType == 1
+                    sy = s'*y;
+                    alpha = (s'*s)/sy;
+                    if alpha <= 1e-10 || alpha > 1e10
+                        alpha = 1;
+                    end
+                elseif bbType == 2 % Conic Interpolation ('Modified BB')
+                    sy = s'*y;
+                    ss = s'*s;
+                    alpha = ss/sy;
+                    if alpha <= 1e-10 || alpha > 1e10
+                        alpha = 1;
+                    end
+                    alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s);
+                    if alphaConic > .001*alpha && alphaConic < 1000*alpha
+                        alpha = alphaConic;
+                    end
+                elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step)
+                    sy = s'*y;
+                    alpha = (s'*s)/sy;
+                    if alpha <= 1e-10 || alpha > 1e10
+                        alpha = 1;
+                    end
+                    v(1+mod(i-2,5)) = alpha;
+                    alpha = v(ceil(rand*length(v)));
+                end
+                d = -alpha*g;
+            end
+            g_old = g;
+            myF_old = f;
+
+
+        case CG % Non-Linear Conjugate Gradient
+
+            if i == 1
+                d = -g; % Initially use steepest descent direction
+            else
+                gtgo = g'*g_old;
+                gotgo = g_old'*g_old;
+
+                if cgUpdate == 0
+                    % Fletcher-Reeves
+                    beta = (g'*g)/(gotgo);
+                elseif cgUpdate == 1
+                    % Polak-Ribiere
+                    beta = (g'*(g-g_old)) /(gotgo);
+                elseif cgUpdate == 2
+                    % Hestenes-Stiefel
+                    beta = (g'*(g-g_old))/((g-g_old)'*d);
+                else
+                    % Gilbert-Nocedal
+                    beta_FR = (g'*(g-g_old)) /(gotgo);
+                    beta_PR = (g'*g-gtgo)/(gotgo);
+                    beta = max(-beta_FR,min(beta_PR,beta_FR));
+                end
+
+                d = -g + beta*d;
+
+                % Restart if not a direction of sufficient descent
+                if g'*d > -tolX
+                    if debug
+                        fprintf('Restarting CG\n');
+                    end
+                    beta = 0;
+                    d = -g;
+                end
+
+                % Old restart rule:
+                %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0
+
+            end
+            g_old = g;
+
+        case PCG % Preconditioned Non-Linear Conjugate Gradient
+
+            % Apply preconditioner to negative gradient
+            if isempty(precFunc)
+                % Use L-BFGS Preconditioner
+                if i == 1
+                    old_dirs = zeros(length(g),0);
+                    old_stps = zeros(length(g),0);
+                    Hdiag = 1;
+                    s = -g;
+                else
+                    [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
+
+                    if useMex
+                        s = lbfgsC(-g,old_dirs,old_stps,Hdiag);
+                    else
+                        s = lbfgs(-g,old_dirs,old_stps,Hdiag);
+                    end
+                end
+            else % User-supplied preconditioner
+                s = precFunc(-g,x,varargin{:});
+            end
+
+            if i == 1
+                d = s;
+            else
+
+                if cgUpdate == 0
+                    % Preconditioned Fletcher-Reeves
+                    beta = (g'*s)/(g_old'*s_old);
+                elseif cgUpdate < 3
+                    % Preconditioned Polak-Ribiere
+                    beta = (g'*(s-s_old))/(g_old'*s_old);
+                else
+                    % Preconditioned Gilbert-Nocedal
+                    beta_FR = (g'*s)/(g_old'*s_old);
+                    beta_PR = (g'*(s-s_old))/(g_old'*s_old);
+                    beta = max(-beta_FR,min(beta_PR,beta_FR));
+                end
+                d = s + beta*d;
+
+                if g'*d > -tolX
+                    if debug
+                        fprintf('Restarting CG\n');
+                    end
+                    beta = 0;
+                    d = s;
+                end
+
+            end
+            g_old = g;
+            s_old = s;
+        case LBFGS % L-BFGS
+
+            % Update the direction and step sizes
+
+            if i == 1
+                d = -g; % Initially use steepest descent direction
+                old_dirs = zeros(length(g),0);
+                old_stps = zeros(length(d),0);
+                Hdiag = 1;
+            else
+                if Damped
+                    [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
+                else
+                    [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
+                end
+
+                if useMex
+                    d = lbfgsC(-g,old_dirs,old_stps,Hdiag);
+                else
+                    d = lbfgs(-g,old_dirs,old_stps,Hdiag);
+                end
+            end
+            g_old = g;
+
+        case QNEWTON % Use quasi-Newton Hessian approximation
+
+            if i == 1
+                d = -g;
+            else
+                % Compute difference vectors
+                y = g-g_old;
+                s = t*d;
+
+                if i == 2
+                    % Make initial Hessian approximation
+                    if initialHessType == 0
+                        % Identity
+                        if qnUpdate <= 1
+                            R = eye(length(g));
+                        else
+                            H = eye(length(g));
+                        end
+                    else
+                        % Scaled Identity
+                        if debug
+                            fprintf('Scaling Initial Hessian Approximation\n');
+                        end
+                        if qnUpdate <= 1
+                            % Use Cholesky of Hessian approximation
+                            R = sqrt((y'*y)/(y'*s))*eye(length(g));
+                        else
+                            % Use Inverse of Hessian approximation
+                            H = eye(length(g))*(y'*s)/(y'*y);
+                        end
+                    end
+                end
+
+                if qnUpdate == 0 % Use BFGS updates
+                    Bs = R'*(R*s);
+                    if Damped
+                        eta = .02;
+                        if y'*s < eta*s'*Bs
+                            if debug
+                                fprintf('Damped Update\n');
+                            end
+                            theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
+                            y = theta*y + (1-theta)*Bs;
+                        end
+                        R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
+                    else
+                        if y'*s > 1e-10
+                            R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
+                        else
+                            if debug
+                                fprintf('Skipping Update\n');
+                            end
+                        end
+                    end
+                elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness
+
+                    Bs = R'*(R*s);
+                    ymBs = y-Bs;
+                    if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10
+                        R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-');
+                    else
+                        if debug
+                            fprintf('SR1 not positive-definite, doing BFGS Update\n');
+                        end
+                        if Damped
+                            eta = .02;
+                            if y'*s < eta*s'*Bs
+                                if debug
+                                    fprintf('Damped Update\n');
+                                end
+                                theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
+                                y = theta*y + (1-theta)*Bs;
+                            end
+                            R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
+                        else
+                            if y'*s > 1e-10
+                                R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
+                            else
+                                if debug
+                                    fprintf('Skipping Update\n');
+                                end
+                            end
+                        end
+                    end
+                elseif qnUpdate == 2 % Use Hoshino update
+                    v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
+                    phi = 1/(1 + (y'*H*y)/(s'*y));
+                    H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v';
+
+                elseif qnUpdate == 3 % Self-Scaling BFGS update
+                    ys = y'*s;
+                    Hy = H*y;
+                    yHy = y'*Hy;
+                    gamma = ys/yHy;
+                    v = sqrt(yHy)*(s/ys - Hy/yHy);
+                    H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys;
+                elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update
+
+                    % Oren's method
+                    if (s'*y)/(y'*H*y) > 1
+                        phi = 1; % BFGS
+                        omega = 0;
+                    elseif (s'*(H\s))/(s'*y) < 1
+                        phi = 0; % DFP
+                        omega = 1;
+                    else
+                        phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2);
+                        omega = phi;
+                    end
+
+                    gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y);
+                    v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
+                    H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y);
+
+                elseif qnUpdate == 5 % McCormick-Huang asymmetric update
+                    theta = 1;
+                    phi = 0;
+                    psi = 1;
+                    omega = 0;
+                    t1 = s*(theta*s + phi*H'*y)';
+                    t2 = (theta*s + phi*H'*y)'*y;
+                    t3 = H*y*(psi*s + omega*H'*y)';
+                    t4 = (psi*s + omega*H'*y)'*y;
+                    H = H + t1/t2 - t3/t4;
+                end
+
+                if qnUpdate <= 1
+                    d = -R\(R'\g);
+                else
+                    d = -H*g;
+                end
+
+            end
+            g_old = g;
+
+        case NEWTON0 % Hessian-Free Newton
+
+            cgMaxIter = min(p,maxFunEvals-funEvals);
+            cgForce = min(0.5,sqrt(norm(g)))*norm(g);
+
+            % Set-up preconditioner
+            precondFunc = [];
+            precondArgs = [];
+            if cgSolve == 1
+                if isempty(precFunc) % Apply L-BFGS preconditioner
+                    if i == 1
+                        old_dirs = zeros(length(g),0);
+                        old_stps = zeros(length(g),0);
+                        Hdiag = 1;
+                    else
+                        [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
+                        if useMex
+                            precondFunc = @lbfgsC;
+                        else
+                            precondFunc = @lbfgs;
+                        end
+                        precondArgs = {old_dirs,old_stps,Hdiag};
+                    end
+                    g_old = g;
+                else
+                    % Apply user-defined preconditioner
+                    precondFunc = precFunc;
+                    precondArgs = {x,varargin{:}};
+                end
+            end
+
+            % Solve Newton system using cg and hessian-vector products
+            if isempty(HvFunc)
+                % No user-supplied Hessian-vector function,
+                % use automatic differentiation
+                HvFun = @autoHv;
+                HvArgs = {x,g,useComplex,funObj,varargin{:}};
+            else
+                % Use user-supplid Hessian-vector function
+                HvFun = HvFunc;
+                HvArgs = {x,varargin{:}};
+            end
+            
+            if useNegCurv
+                [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
+            else
+                [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
+            end
+
+            funEvals = funEvals+cgIter;
+            if debug
+                fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes);
+
+            end
+
+            if useNegCurv
+                if ~isempty(negCurv)
+                    %if debug
+                    fprintf('Using negative curvature direction\n');
+                    %end
+                    d = negCurv/norm(negCurv);
+                    d = d/sum(abs(g));
+                end
+            end
+
+        case NEWTON % Newton search direction
+
+            if cgSolve == 0
+                if HessianModify == 0
+                    % Attempt to perform a Cholesky factorization of the Hessian
+                    [R,posDef] = chol(H);
+
+                    % If the Cholesky factorization was successful, then the Hessian is
+                    % positive definite, solve the system
+                    if posDef == 0
+                        d = -R\(R'\g);
+
+                    else
+                        % otherwise, adjust the Hessian to be positive definite based on the
+                        % minimum eigenvalue, and solve with QR
+                        % (expensive, we don't want to do this very much)
+                        if debug
+                            fprintf('Adjusting Hessian\n');
+                        end
+                        H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H))));
+                        d = -H\g;
+                    end
+                elseif HessianModify == 1
+                    % Modified Incomplete Cholesky
+                    R = mcholinc(H,debug);
+                    d = -R\(R'\g);
+                elseif HessianModify == 2
+                    % Modified Generalized Cholesky
+                    if useMex
+                        [L D perm] = mcholC(H);
+                    else
+                        [L D perm] = mchol(H);
+                    end
+                    d(perm) = -L' \ ((D.^-1).*(L \ g(perm)));
+
+                elseif HessianModify == 3
+                    % Modified Spectral Decomposition
+                    [V,D] = eig((H+H')/2);
+                    D = diag(D);
+                    D = max(abs(D),max(max(abs(D)),1)*1e-12);
+                    d = -V*((V'*g)./D);
+                elseif HessianModify == 4
+                    % Modified Symmetric Indefinite Factorization
+                    [L,D,perm] = ldl(H,'vector');
+                    [blockPos junk] = find(triu(D,1));
+                    for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1)
+                        if D(diagInd,diagInd) < 1e-12
+                            D(diagInd,diagInd) = 1e-12;
+                        end
+                    end
+                    for blockInd = blockPos'
+                        block = D(blockInd:blockInd+1,blockInd:blockInd+1);
+                        block_a = block(1);
+                        block_b = block(2);
+                        block_d = block(4);
+                        lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2;
+                        D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12);
+                    end
+                    d(perm) = -L' \ (D \ (L \ g(perm)));
+                else
+                    % Take Newton step if Hessian is pd,
+                    % otherwise take a step with negative curvature
+                    [R,posDef] = chol(H);
+                    if posDef == 0
+                        d = -R\(R'\g);
+                    else
+                        if debug
+                            fprintf('Taking Direction of Negative Curvature\n');
+                        end
+                        [V,D] = eig(H);
+                        u = V(:,1);
+                        d = -sign(u'*g)*u;
+                    end
+                end
+
+            else
+                % Solve with Conjugate Gradient
+                cgMaxIter = p;
+                cgForce = min(0.5,sqrt(norm(g)))*norm(g);
+
+                % Select Preconditioner
+                if cgSolve == 1
+                    % No preconditioner
+                    precondFunc = [];
+                    precondArgs = [];
+                elseif cgSolve == 2
+                    % Diagonal preconditioner
+                    precDiag = diag(H);
+                    precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag);
+                    precondFunc = @precondDiag;
+                    precondArgs = {precDiag.^-1};
+                elseif cgSolve == 3
+                    % L-BFGS preconditioner
+                    if i == 1
+                        old_dirs = zeros(length(g),0);
+                        old_stps = zeros(length(g),0);
+                        Hdiag = 1;
+                    else
+                        [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
+                    end
+                    g_old = g;
+                    if useMex
+                        precondFunc = @lbfgsC;
+                    else
+                        precondFunc = @lbfgs;
+                    end
+                    precondArgs = {old_dirs,old_stps,Hdiag};
+                elseif cgSolve > 0
+                    % Symmetric Successive Overelaxation Preconditioner
+                    omega = cgSolve;
+                    D = diag(H);
+                    D(D < 1e-12) = 1e-12 - min(D);
+                    precDiag = (omega/(2-omega))*D.^-1;
+                    precTriu = diag(D/omega) + triu(H,1);
+                    precondFunc = @precondTriuDiag;
+                    precondArgs = {precTriu,precDiag.^-1};
+                else
+                    % Incomplete Cholesky Preconditioner
+                    opts.droptol = -cgSolve;
+                    opts.rdiag = 1;
+                    R = cholinc(sparse(H),opts);
+                    if min(diag(R)) < 1e-12
+                        R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts);
+                    end
+                    precondFunc = @precondTriu;
+                    precondArgs = {R};
+                end
+
+                % Run cg with the appropriate preconditioner
+                if isempty(HvFunc)
+                    % No user-supplied Hessian-vector function
+                    [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs);
+                else
+                    % Use user-supplied Hessian-vector function
+                    [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}});
+                end
+                if debug
+                    fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes);
+                    %funEvals = funEvals + cgIter;
+                end
+            end
+
+        case TENSOR % Tensor Method
+
+            if numDiff
+                % Compute 3rd-order Tensor Numerically
+                [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:});
+            else
+                % Use user-supplied 3rd-derivative Tensor
+                [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:});
+            end
+            options_sub.Method = 'newton';
+            options_sub.Display = 'none';
+            options_sub.TolX = tolX;
+            options_sub.TolFun = tolFun;
+            d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T);
+
+            if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX
+                if debug
+                    fprintf('Using 2nd-Order Step\n');
+                end
+                [V,D] = eig((H+H')/2);
+                D = diag(D);
+                D = max(abs(D),max(max(abs(D)),1)*1e-12);
+                d = -V*((V'*g)./D);
+            else
+                if debug
+                    fprintf('Using 3rd-Order Step\n');
+                end
+            end
+    end
+
+    if ~isLegal(d)
+        fprintf('Step direction is illegal!\n');
+        pause;
+        return
+    end
+
+    % ****************** COMPUTE STEP LENGTH ************************
+
+    % Directional Derivative
+    gtd = g'*d;
+
+    % Check that progress can be made along direction
+    if gtd > -tolX
+        exitflag=2;
+        msg = 'Directional Derivative below TolX';
+        break;
+    end
+
+    % Select Initial Guess
+    if i == 1
+        if method < NEWTON0
+            t = min(1,1/sum(abs(g)));
+        else
+            t = 1;
+        end
+    else
+        if LS_init == 0
+            % Newton step
+            t = 1;
+        elseif LS_init == 1
+            % Close to previous step length
+            t = t*min(2,(gtd_old)/(gtd));
+        elseif LS_init == 2
+            % Quadratic Initialization based on {f,g} and previous f
+            t = min(1,2*(f-f_old)/(gtd));
+        elseif LS_init == 3
+            % Double previous step length
+            t = min(1,t*2);
+        elseif LS_init == 4
+            % Scaled step length if possible
+            if isempty(HvFunc)
+                % No user-supplied Hessian-vector function,
+                % use automatic differentiation
+                dHd = d'*autoHv(d,x,g,0,funObj,varargin{:});
+            else
+                % Use user-supplid Hessian-vector function
+                dHd = d'*HvFunc(d,x,varargin{:});
+            end
+
+            funEvals = funEvals + 1;
+            if dHd > 0
+                t = -gtd/(dHd);
+            else
+                t = min(1,2*(f-f_old)/(gtd));
+            end
+        end
+
+        if t <= 0
+            t = 1;
+        end
+    end
+    f_old = f;
+    gtd_old = gtd;
+
+    % Compute reference fr if using non-monotone objective
+    if Fref == 1
+        fr = f;
+    else
+        if i == 1
+            old_fvals = repmat(-inf,[Fref 1]);
+        end
+
+        if i <= Fref
+            old_fvals(i) = f;
+        else
+            old_fvals = [old_fvals(2:end);f];
+        end
+        fr = max(old_fvals);
+    end
+
+    computeHessian = 0;
+    if method >= NEWTON
+        if HessianIter == 1
+            computeHessian = 1;
+        elseif i > 1 && mod(i-1,HessianIter) == 0
+            computeHessian = 1;
+        end
+    end
+
+    % Line Search
+    f_old = f;
+    if LS < 3 % Use Armijo Bactracking
+        % Perform Backtracking line search
+        if computeHessian
+            [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
+        else
+            [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:});
+        end
+        funEvals = funEvals + LSfunEvals;
+
+    elseif LS < 6
+        % Find Point satisfying Wolfe
+
+        if computeHessian
+            [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
+        else
+            [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:});
+        end
+        funEvals = funEvals + LSfunEvals;
+        x = x + t*d;
+
+    else
+        % Use Matlab optim toolbox line search
+        [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=...
+            lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,...
+            tolX,[],[],[],varargin{:});
+        funEvals = funEvals + LSiter;
+        if isempty(t)
+            exitflag = -2;
+            msg = 'Matlab LineSearch failed';
+            break;
+        end
+
+        if method >= NEWTON
+            [f_new,g_new,H] = funObj(x + t*d,varargin{:});
+            funEvals = funEvals + 1;
+        end
+        x = x + t*d;
+        f = f_new;
+        g = g_new;
+    end
+
+    % Output iteration information
+    if verboseI
+        fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
+    end
+
+    if logfile
+        fid = fopen(logfile, 'a');
+        if (fid > 0)
+            fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
+            fclose(fid);
+        end
+    end
+
+    
+    % Output Function
+    if ~isempty(outputFcn)
+        callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
+    end
+
+    % Update Trace
+    trace.fval(end+1,1) = f;
+    trace.funcCount(end+1,1) = funEvals;
+
+    % Check Optimality Condition
+    if sum(abs(g)) <= tolFun
+        exitflag=1;
+        msg = 'Optimality Condition below TolFun';
+        break;
+    end
+
+    % ******************* Check for lack of progress *******************
+
+    if sum(abs(t*d)) <= tolX
+        exitflag=2;
+        msg = 'Step Size below TolX';
+        break;
+    end
+
+
+    if abs(f-f_old) < tolX
+        exitflag=2;
+        msg = 'Function Value changing by less than TolX';
+        break;
+    end
+
+    % ******** Check for going over iteration/evaluation limit *******************
+
+    if funEvals*funEvalMultiplier > maxFunEvals
+        exitflag = 0;
+        msg = 'Exceeded Maximum Number of Function Evaluations';
+        break;
+    end
+
+    if i == maxIter
+        exitflag = 0;
+        msg='Exceeded Maximum Number of Iterations';
+        break;
+    end
+
+end
+
+if verbose
+    fprintf('%s\n',msg);
+end
+if nargout > 3
+    output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,...
+        'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
+end
+
+% Output Function
+if ~isempty(outputFcn)
+    callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
+end
+
+end
+