--- a +++ b/combinedDeepLearningActiveContour/minFunc/minFunc.m @@ -0,0 +1,1145 @@ +function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin) +% minFunc(funObj,x0,options,varargin) +% +% Unconstrained optimizer using a line search strategy +% +% Uses an interface very similar to fminunc +% (it doesn't support all of the optimization toolbox options, +% but supports many other options). +% +% It computes descent directions using one of ('Method'): +% - 'sd': Steepest Descent +% (no previous information used, not recommended) +% - 'csd': Cyclic Steepest Descent +% (uses previous step length for a fixed length cycle) +% - 'bb': Barzilai and Borwein Gradient +% (uses only previous step) +% - 'cg': Non-Linear Conjugate Gradient +% (uses only previous step and a vector beta) +% - 'scg': Scaled Non-Linear Conjugate Gradient +% (uses previous step and a vector beta, +% and Hessian-vector products to initialize line search) +% - 'pcg': Preconditionined Non-Linear Conjugate Gradient +% (uses only previous step and a vector beta, preconditioned version) +% - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating +% (default: uses a predetermined nunber of previous steps to form a +% low-rank Hessian approximation) +% - 'newton0': Hessian-Free Newton +% (numerically computes Hessian-Vector products) +% - 'pnewton0': Preconditioned Hessian-Free Newton +% (numerically computes Hessian-Vector products, preconditioned +% version) +% - 'qnewton': Quasi-Newton Hessian approximation +% (uses dense Hessian approximation) +% - 'mnewton': Newton's method with Hessian calculation after every +% user-specified number of iterations +% (needs user-supplied Hessian matrix) +% - 'newton': Newton's method with Hessian calculation every iteration +% (needs user-supplied Hessian matrix) +% - 'tensor': Tensor +% (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives) +% +% Several line search strategies are available for finding a step length satisfying +% the termination criteria ('LS'): +% - 0: Backtrack w/ Step Size Halving +% - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values +% - 2: Backtrack w/ Cubic Interpolation from new function + gradient +% values (default for 'bb' and 'sd') +% - 3: Bracketing w/ Step Size Doubling and Bisection +% - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function + +% gradient values (default for all except 'bb' and 'sd') +% - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation +% - 6: Use Matlab Optimization Toolbox's line search +% (requires Matlab's linesearch.m to be added to the path) +% +% Above, the first three find a point satisfying the Armijo conditions, +% while the last four search for find a point satisfying the Wolfe +% conditions. If the objective function overflows, it is recommended +% to use one of the first 3. +% The first three can be used to perform a non-monotone +% linesearch by changing the option 'Fref'. +% +% Several strategies for choosing the initial step size are avaiable ('LS_init'): +% - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd') +% (t = 1) +% - 1: Use a step similar to the previous step (default for 'cg' and 'sd') +% (t = t_old*min(2,g'd/g_old'd_old)) +% - 2: Quadratic Initialization using previous function value and new +% function value/gradient (use this if steps tend to be very long) +% (t = min(1,2*(f-f_old)/g)) +% - 3: The minimum between 1 and twice the previous step length +% (t = min(1,2*t) +% - 4: The scaled conjugate gradient step length (may accelerate +% conjugate gradient methods, but requires a Hessian-vector product) +% (t = g'd/d'Hd) +% +% Inputs: +% funObj is a function handle +% x0 is a starting vector; +% options is a struct containing parameters +% (defaults are used for non-existent or blank fields) +% all other arguments are passed to funObj +% +% Outputs: +% x is the minimum value found +% f is the function value at the minimum found +% exitflag returns an exit condition +% output returns a structure with other information +% +% Supported Input Options +% Display - Level of display [ off | final | (iter) | full | excessive ] +% MaxFunEvals - Maximum number of function evaluations allowed (1000) +% MaxIter - Maximum number of iterations allowed (500) +% TolFun - Termination tolerance on the first-order optimality (1e-5) +% TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9) +% Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 | +% qnewton | mnewton | newton | tensor ] +% c1 - Sufficient Decrease for Armijo condition (1e-4) +% c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise) +% LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise) +% LS - Line Search type -see above (2 for bb, 4 otherwise) +% Fref - Setting this to a positive integer greater than 1 +% will use non-monotone Armijo objective in the line search. +% (20 for bb, 10 for csd, 1 for all others) +% numDiff - compute derivative numerically +% (default: 0) (this option has a different effect for 'newton', see below) +% useComplex - if 1, use complex differentials when computing numerical derivatives +% to get very accurate values (default: 0) +% DerivativeCheck - if 'on', computes derivatives numerically at initial +% point and compares to user-supplied derivative (default: 'off') +% outputFcn - function to run after each iteration (default: []). It +% should have the following interface: +% outputFcn(x,infoStruct,state,varargin{:}) +% useMex - where applicable, use mex files to speed things up (default: 1) +% +% Method-specific input options: +% newton: +% HessianModify - type of Hessian modification for direct solvers to +% use if the Hessian is not positive definite (default: 0) +% 0: Minimum Euclidean norm s.t. eigenvalues sufficiently large +% (requires eigenvalues on iterations where matrix is not pd) +% 1: Start with (1/2)*||A||_F and increment until Cholesky succeeds +% (an approximation to method 0, does not require eigenvalues) +% 2: Modified LDL factorization +% (only 1 generalized Cholesky factorization done and no eigenvalues required) +% 3: Modified Spectral Decomposition +% (requires eigenvalues) +% 4: Modified Symmetric Indefinite Factorization +% 5: Uses the eigenvector of the smallest eigenvalue as negative +% curvature direction +% cgSolve - use conjugate gradient instead of direct solver (default: 0) +% 0: Direct Solver +% 1: Conjugate Gradient +% 2: Conjugate Gradient with Diagonal Preconditioner +% 3: Conjugate Gradient with LBFGS Preconditioner +% x: Conjugate Graident with Symmetric Successive Over Relaxation +% Preconditioner with parameter x +% (where x is a real number in the range [0,2]) +% x: Conjugate Gradient with Incomplete Cholesky Preconditioner +% with drop tolerance -x +% (where x is a real negative number) +% numDiff - compute Hessian numerically +% (default: 0, done with complex differentials if useComplex = 1) +% LS_saveHessiancomp - when on, only computes the Hessian at the +% first and last iteration of the line search (default: 1) +% mnewton: +% HessianIter - number of iterations to use same Hessian (default: 5) +% qnewton: +% initialHessType - scale initial Hessian approximation (default: 1) +% qnUpdate - type of quasi-Newton update (default: 3): +% 0: BFGS +% 1: SR1 (when it is positive-definite, otherwise BFGS) +% 2: Hoshino +% 3: Self-Scaling BFGS +% 4: Oren's Self-Scaling Variable Metric method +% 5: McCormick-Huang asymmetric update +% Damped - use damped BFGS update (default: 1) +% newton0/pnewton0: +% HvFunc - user-supplied function that returns Hessian-vector products +% (by default, these are computed numerically using autoHv) +% HvFunc should have the following interface: HvFunc(v,x,varargin{:}) +% useComplex - use a complex perturbation to get high accuracy +% Hessian-vector products (default: 0) +% (the increased accuracy can make the method much more efficient, +% but gradient code must properly support complex inputs) +% useNegCurv - a negative curvature direction is used as the descent +% direction if one is encountered during the cg iterations +% (default: 1) +% precFunc (for pnewton0 only) - user-supplied preconditioner +% (by default, an L-BFGS preconditioner is used) +% precFunc should have the following interfact: +% precFunc(v,x,varargin{:}) +% lbfgs: +% Corr - number of corrections to store in memory (default: 100) +% (higher numbers converge faster but use more memory) +% Damped - use damped update (default: 0) +% pcg: +% cgUpdate - type of update (default: 2) +% cg/scg/pcg: +% cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1) +% 0: Fletcher Reeves +% 1: Polak-Ribiere +% 2: Hestenes-Stiefel (not supported for pcg) +% 3: Gilbert-Nocedal +% HvFunc (for scg only)- user-supplied function that returns Hessian-vector +% products +% (by default, these are computed numerically using autoHv) +% HvFunc should have the following interface: +% HvFunc(v,x,varargin{:}) +% precFunc (for pcg only) - user-supplied preconditioner +% (by default, an L-BFGS preconditioner is used) +% precFunc should have the following interfact: +% precFunc(v,x,varargin{:}) +% bb: +% bbType - type of bb step (default: 1) +% 0: min_alpha ||delta_x - alpha delta_g||_2 +% 1: min_alpha ||alpha delta_x - delta_g||_2 +% 2: Conic BB +% 3: Gradient method with retards +% csd: +% cycle - length of cycle (default: 3) +% +% Supported Output Options +% iterations - number of iterations taken +% funcCount - number of function evaluations +% algorithm - algorithm used +% firstorderopt - first-order optimality +% message - exit message +% trace.funccount - function evaluations after each iteration +% trace.fval - function value after each iteration +% +% Author: Mark Schmidt (2006) +% Web: http://www.cs.ubc.ca/~schmidtm +% +% Sources (in order of how much the source material contributes): +% J. Nocedal and S.J. Wright. 1999. "Numerical Optimization". Springer Verlag. +% R. Fletcher. 1987. "Practical Methods of Optimization". Wiley. +% J. Demmel. 1997. "Applied Linear Algebra. SIAM. +% R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R. +% Pozo, C. Romine, and H. Van der Vost. 1994. "Templates for the Solution of +% Linear Systems: Building Blocks for Iterative Methods". SIAM. +% J. More and D. Thuente. "Line search algorithms with guaranteed +% sufficient decrease". ACM Trans. Math. Softw. vol 20, 286-307, 1994. +% M. Raydan. "The Barzilai and Borwein gradient method for the large +% scale unconstrained minimization problem". SIAM J. Optim., 7, 26-33, +% (1997). +% "Mathematical Optimization". The Computational Science Education +% Project. 1995. +% C. Kelley. 1999. "Iterative Methods for Optimization". Frontiers in +% Applied Mathematics. SIAM. + +if nargin < 3 + options = []; +end + +% Get Parameters +[verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,... + corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,... + HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... + DerivativeCheck,Damped,HvFunc,bbType,cycle,... + HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ... + minFunc_processInputOptions(options); + +if isfield(options, 'logfile') + logfile = options.logfile; +else + logfile = []; +end + +% Constants +SD = 0; +CSD = 1; +BB = 2; +CG = 3; +PCG = 4; +LBFGS = 5; +QNEWTON = 6; +NEWTON0 = 7; +NEWTON = 8; +TENSOR = 9; + +% Initialize +p = length(x0); +d = zeros(p,1); +x = x0; +t = 1; + +% If necessary, form numerical differentiation functions +funEvalMultiplier = 1; +if numDiff && method ~= TENSOR + varargin(3:end+2) = varargin(1:end); + varargin{1} = useComplex; + varargin{2} = funObj; + if method ~= NEWTON + if debug + if useComplex + fprintf('Using complex differentials for gradient computation\n'); + else + fprintf('Using finite differences for gradient computation\n'); + end + end + funObj = @autoGrad; + else + if debug + if useComplex + fprintf('Using complex differentials for gradient computation\n'); + else + fprintf('Using finite differences for gradient computation\n'); + end + end + funObj = @autoHess; + end + + if method == NEWTON0 && useComplex == 1 + if debug + fprintf('Turning off the use of complex differentials\n'); + end + useComplex = 0; + end + + if useComplex + funEvalMultiplier = p; + else + funEvalMultiplier = p+1; + end +end + +% Evaluate Initial Point +if method < NEWTON + [f,g] = feval(funObj, x, varargin{:}); +else + [f,g,H] = feval(funObj, x, varargin{:}); + computeHessian = 1; +end +funEvals = 1; + +if strcmp(DerivativeCheck,'on') + if numDiff + fprintf('Can not do derivative checking when numDiff is 1\n'); + end + % Check provided gradient/hessian function using numerical derivatives + fprintf('Checking Gradient:\n'); + [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:}); + + fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2))); + if max(abs(g-g2)) > 1e-4 + fprintf('User NumDif:\n'); + [g g2] + diff = abs(g-g2) + pause; + end + + if method >= NEWTON + fprintf('Check Hessian:\n'); + [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:}); + + fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:)))); + if max(abs(H(:)-H2(:))) > 1e-4 + H + H2 + diff = abs(H-H2) + pause; + end + end +end + +% Output Log +if verboseI + fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); +end + +if logfile + fid = fopen(logfile, 'a'); + if (fid > 0) + fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); + fclose(fid); + end +end + +% Output Function +if ~isempty(outputFcn) + callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:}); +end + +% Initialize Trace +trace.fval = f; +trace.funcCount = funEvals; + +% Check optimality of initial point +if sum(abs(g)) <= tolFun + exitflag=1; + msg = 'Optimality Condition below TolFun'; + if verbose + fprintf('%s\n',msg); + end + if nargout > 3 + output = struct('iterations',0,'funcCount',1,... + 'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); + end + return; +end + +% Perform up to a maximum of 'maxIter' descent steps: +for i = 1:maxIter + + % ****************** COMPUTE DESCENT DIRECTION ***************** + + switch method + case SD % Steepest Descent + d = -g; + + case CSD % Cyclic Steepest Descent + + if mod(i,cycle) == 1 % Use Steepest Descent + alpha = 1; + LS_init = 2; + LS = 4; % Precise Line Search + elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step + alpha = t; + LS_init = 0; + LS = 2; % Non-monotonic line search + end + d = -alpha*g; + + case BB % Steepest Descent with Barzilai and Borwein Step Length + + if i == 1 + d = -g; + else + y = g-g_old; + s = t*d; + if bbType == 0 + yy = y'*y; + alpha = (s'*y)/(yy); + if alpha <= 1e-10 || alpha > 1e10 + alpha = 1; + end + elseif bbType == 1 + sy = s'*y; + alpha = (s'*s)/sy; + if alpha <= 1e-10 || alpha > 1e10 + alpha = 1; + end + elseif bbType == 2 % Conic Interpolation ('Modified BB') + sy = s'*y; + ss = s'*s; + alpha = ss/sy; + if alpha <= 1e-10 || alpha > 1e10 + alpha = 1; + end + alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s); + if alphaConic > .001*alpha && alphaConic < 1000*alpha + alpha = alphaConic; + end + elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step) + sy = s'*y; + alpha = (s'*s)/sy; + if alpha <= 1e-10 || alpha > 1e10 + alpha = 1; + end + v(1+mod(i-2,5)) = alpha; + alpha = v(ceil(rand*length(v))); + end + d = -alpha*g; + end + g_old = g; + myF_old = f; + + + case CG % Non-Linear Conjugate Gradient + + if i == 1 + d = -g; % Initially use steepest descent direction + else + gtgo = g'*g_old; + gotgo = g_old'*g_old; + + if cgUpdate == 0 + % Fletcher-Reeves + beta = (g'*g)/(gotgo); + elseif cgUpdate == 1 + % Polak-Ribiere + beta = (g'*(g-g_old)) /(gotgo); + elseif cgUpdate == 2 + % Hestenes-Stiefel + beta = (g'*(g-g_old))/((g-g_old)'*d); + else + % Gilbert-Nocedal + beta_FR = (g'*(g-g_old)) /(gotgo); + beta_PR = (g'*g-gtgo)/(gotgo); + beta = max(-beta_FR,min(beta_PR,beta_FR)); + end + + d = -g + beta*d; + + % Restart if not a direction of sufficient descent + if g'*d > -tolX + if debug + fprintf('Restarting CG\n'); + end + beta = 0; + d = -g; + end + + % Old restart rule: + %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0 + + end + g_old = g; + + case PCG % Preconditioned Non-Linear Conjugate Gradient + + % Apply preconditioner to negative gradient + if isempty(precFunc) + % Use L-BFGS Preconditioner + if i == 1 + old_dirs = zeros(length(g),0); + old_stps = zeros(length(g),0); + Hdiag = 1; + s = -g; + else + [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); + + if useMex + s = lbfgsC(-g,old_dirs,old_stps,Hdiag); + else + s = lbfgs(-g,old_dirs,old_stps,Hdiag); + end + end + else % User-supplied preconditioner + s = precFunc(-g,x,varargin{:}); + end + + if i == 1 + d = s; + else + + if cgUpdate == 0 + % Preconditioned Fletcher-Reeves + beta = (g'*s)/(g_old'*s_old); + elseif cgUpdate < 3 + % Preconditioned Polak-Ribiere + beta = (g'*(s-s_old))/(g_old'*s_old); + else + % Preconditioned Gilbert-Nocedal + beta_FR = (g'*s)/(g_old'*s_old); + beta_PR = (g'*(s-s_old))/(g_old'*s_old); + beta = max(-beta_FR,min(beta_PR,beta_FR)); + end + d = s + beta*d; + + if g'*d > -tolX + if debug + fprintf('Restarting CG\n'); + end + beta = 0; + d = s; + end + + end + g_old = g; + s_old = s; + case LBFGS % L-BFGS + + % Update the direction and step sizes + + if i == 1 + d = -g; % Initially use steepest descent direction + old_dirs = zeros(length(g),0); + old_stps = zeros(length(d),0); + Hdiag = 1; + else + if Damped + [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); + else + [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); + end + + if useMex + d = lbfgsC(-g,old_dirs,old_stps,Hdiag); + else + d = lbfgs(-g,old_dirs,old_stps,Hdiag); + end + end + g_old = g; + + case QNEWTON % Use quasi-Newton Hessian approximation + + if i == 1 + d = -g; + else + % Compute difference vectors + y = g-g_old; + s = t*d; + + if i == 2 + % Make initial Hessian approximation + if initialHessType == 0 + % Identity + if qnUpdate <= 1 + R = eye(length(g)); + else + H = eye(length(g)); + end + else + % Scaled Identity + if debug + fprintf('Scaling Initial Hessian Approximation\n'); + end + if qnUpdate <= 1 + % Use Cholesky of Hessian approximation + R = sqrt((y'*y)/(y'*s))*eye(length(g)); + else + % Use Inverse of Hessian approximation + H = eye(length(g))*(y'*s)/(y'*y); + end + end + end + + if qnUpdate == 0 % Use BFGS updates + Bs = R'*(R*s); + if Damped + eta = .02; + if y'*s < eta*s'*Bs + if debug + fprintf('Damped Update\n'); + end + theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); + y = theta*y + (1-theta)*Bs; + end + R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); + else + if y'*s > 1e-10 + R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); + else + if debug + fprintf('Skipping Update\n'); + end + end + end + elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness + + Bs = R'*(R*s); + ymBs = y-Bs; + if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10 + R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-'); + else + if debug + fprintf('SR1 not positive-definite, doing BFGS Update\n'); + end + if Damped + eta = .02; + if y'*s < eta*s'*Bs + if debug + fprintf('Damped Update\n'); + end + theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); + y = theta*y + (1-theta)*Bs; + end + R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); + else + if y'*s > 1e-10 + R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); + else + if debug + fprintf('Skipping Update\n'); + end + end + end + end + elseif qnUpdate == 2 % Use Hoshino update + v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); + phi = 1/(1 + (y'*H*y)/(s'*y)); + H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v'; + + elseif qnUpdate == 3 % Self-Scaling BFGS update + ys = y'*s; + Hy = H*y; + yHy = y'*Hy; + gamma = ys/yHy; + v = sqrt(yHy)*(s/ys - Hy/yHy); + H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys; + elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update + + % Oren's method + if (s'*y)/(y'*H*y) > 1 + phi = 1; % BFGS + omega = 0; + elseif (s'*(H\s))/(s'*y) < 1 + phi = 0; % DFP + omega = 1; + else + phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2); + omega = phi; + end + + gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y); + v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); + H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y); + + elseif qnUpdate == 5 % McCormick-Huang asymmetric update + theta = 1; + phi = 0; + psi = 1; + omega = 0; + t1 = s*(theta*s + phi*H'*y)'; + t2 = (theta*s + phi*H'*y)'*y; + t3 = H*y*(psi*s + omega*H'*y)'; + t4 = (psi*s + omega*H'*y)'*y; + H = H + t1/t2 - t3/t4; + end + + if qnUpdate <= 1 + d = -R\(R'\g); + else + d = -H*g; + end + + end + g_old = g; + + case NEWTON0 % Hessian-Free Newton + + cgMaxIter = min(p,maxFunEvals-funEvals); + cgForce = min(0.5,sqrt(norm(g)))*norm(g); + + % Set-up preconditioner + precondFunc = []; + precondArgs = []; + if cgSolve == 1 + if isempty(precFunc) % Apply L-BFGS preconditioner + if i == 1 + old_dirs = zeros(length(g),0); + old_stps = zeros(length(g),0); + Hdiag = 1; + else + [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); + if useMex + precondFunc = @lbfgsC; + else + precondFunc = @lbfgs; + end + precondArgs = {old_dirs,old_stps,Hdiag}; + end + g_old = g; + else + % Apply user-defined preconditioner + precondFunc = precFunc; + precondArgs = {x,varargin{:}}; + end + end + + % Solve Newton system using cg and hessian-vector products + if isempty(HvFunc) + % No user-supplied Hessian-vector function, + % use automatic differentiation + HvFun = @autoHv; + HvArgs = {x,g,useComplex,funObj,varargin{:}}; + else + % Use user-supplid Hessian-vector function + HvFun = HvFunc; + HvArgs = {x,varargin{:}}; + end + + if useNegCurv + [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); + else + [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); + end + + funEvals = funEvals+cgIter; + if debug + fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes); + + end + + if useNegCurv + if ~isempty(negCurv) + %if debug + fprintf('Using negative curvature direction\n'); + %end + d = negCurv/norm(negCurv); + d = d/sum(abs(g)); + end + end + + case NEWTON % Newton search direction + + if cgSolve == 0 + if HessianModify == 0 + % Attempt to perform a Cholesky factorization of the Hessian + [R,posDef] = chol(H); + + % If the Cholesky factorization was successful, then the Hessian is + % positive definite, solve the system + if posDef == 0 + d = -R\(R'\g); + + else + % otherwise, adjust the Hessian to be positive definite based on the + % minimum eigenvalue, and solve with QR + % (expensive, we don't want to do this very much) + if debug + fprintf('Adjusting Hessian\n'); + end + H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H)))); + d = -H\g; + end + elseif HessianModify == 1 + % Modified Incomplete Cholesky + R = mcholinc(H,debug); + d = -R\(R'\g); + elseif HessianModify == 2 + % Modified Generalized Cholesky + if useMex + [L D perm] = mcholC(H); + else + [L D perm] = mchol(H); + end + d(perm) = -L' \ ((D.^-1).*(L \ g(perm))); + + elseif HessianModify == 3 + % Modified Spectral Decomposition + [V,D] = eig((H+H')/2); + D = diag(D); + D = max(abs(D),max(max(abs(D)),1)*1e-12); + d = -V*((V'*g)./D); + elseif HessianModify == 4 + % Modified Symmetric Indefinite Factorization + [L,D,perm] = ldl(H,'vector'); + [blockPos junk] = find(triu(D,1)); + for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1) + if D(diagInd,diagInd) < 1e-12 + D(diagInd,diagInd) = 1e-12; + end + end + for blockInd = blockPos' + block = D(blockInd:blockInd+1,blockInd:blockInd+1); + block_a = block(1); + block_b = block(2); + block_d = block(4); + lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2; + D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12); + end + d(perm) = -L' \ (D \ (L \ g(perm))); + else + % Take Newton step if Hessian is pd, + % otherwise take a step with negative curvature + [R,posDef] = chol(H); + if posDef == 0 + d = -R\(R'\g); + else + if debug + fprintf('Taking Direction of Negative Curvature\n'); + end + [V,D] = eig(H); + u = V(:,1); + d = -sign(u'*g)*u; + end + end + + else + % Solve with Conjugate Gradient + cgMaxIter = p; + cgForce = min(0.5,sqrt(norm(g)))*norm(g); + + % Select Preconditioner + if cgSolve == 1 + % No preconditioner + precondFunc = []; + precondArgs = []; + elseif cgSolve == 2 + % Diagonal preconditioner + precDiag = diag(H); + precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag); + precondFunc = @precondDiag; + precondArgs = {precDiag.^-1}; + elseif cgSolve == 3 + % L-BFGS preconditioner + if i == 1 + old_dirs = zeros(length(g),0); + old_stps = zeros(length(g),0); + Hdiag = 1; + else + [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); + end + g_old = g; + if useMex + precondFunc = @lbfgsC; + else + precondFunc = @lbfgs; + end + precondArgs = {old_dirs,old_stps,Hdiag}; + elseif cgSolve > 0 + % Symmetric Successive Overelaxation Preconditioner + omega = cgSolve; + D = diag(H); + D(D < 1e-12) = 1e-12 - min(D); + precDiag = (omega/(2-omega))*D.^-1; + precTriu = diag(D/omega) + triu(H,1); + precondFunc = @precondTriuDiag; + precondArgs = {precTriu,precDiag.^-1}; + else + % Incomplete Cholesky Preconditioner + opts.droptol = -cgSolve; + opts.rdiag = 1; + R = cholinc(sparse(H),opts); + if min(diag(R)) < 1e-12 + R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts); + end + precondFunc = @precondTriu; + precondArgs = {R}; + end + + % Run cg with the appropriate preconditioner + if isempty(HvFunc) + % No user-supplied Hessian-vector function + [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs); + else + % Use user-supplied Hessian-vector function + [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}}); + end + if debug + fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes); + %funEvals = funEvals + cgIter; + end + end + + case TENSOR % Tensor Method + + if numDiff + % Compute 3rd-order Tensor Numerically + [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:}); + else + % Use user-supplied 3rd-derivative Tensor + [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:}); + end + options_sub.Method = 'newton'; + options_sub.Display = 'none'; + options_sub.TolX = tolX; + options_sub.TolFun = tolFun; + d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T); + + if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX + if debug + fprintf('Using 2nd-Order Step\n'); + end + [V,D] = eig((H+H')/2); + D = diag(D); + D = max(abs(D),max(max(abs(D)),1)*1e-12); + d = -V*((V'*g)./D); + else + if debug + fprintf('Using 3rd-Order Step\n'); + end + end + end + + if ~isLegal(d) + fprintf('Step direction is illegal!\n'); + pause; + return + end + + % ****************** COMPUTE STEP LENGTH ************************ + + % Directional Derivative + gtd = g'*d; + + % Check that progress can be made along direction + if gtd > -tolX + exitflag=2; + msg = 'Directional Derivative below TolX'; + break; + end + + % Select Initial Guess + if i == 1 + if method < NEWTON0 + t = min(1,1/sum(abs(g))); + else + t = 1; + end + else + if LS_init == 0 + % Newton step + t = 1; + elseif LS_init == 1 + % Close to previous step length + t = t*min(2,(gtd_old)/(gtd)); + elseif LS_init == 2 + % Quadratic Initialization based on {f,g} and previous f + t = min(1,2*(f-f_old)/(gtd)); + elseif LS_init == 3 + % Double previous step length + t = min(1,t*2); + elseif LS_init == 4 + % Scaled step length if possible + if isempty(HvFunc) + % No user-supplied Hessian-vector function, + % use automatic differentiation + dHd = d'*autoHv(d,x,g,0,funObj,varargin{:}); + else + % Use user-supplid Hessian-vector function + dHd = d'*HvFunc(d,x,varargin{:}); + end + + funEvals = funEvals + 1; + if dHd > 0 + t = -gtd/(dHd); + else + t = min(1,2*(f-f_old)/(gtd)); + end + end + + if t <= 0 + t = 1; + end + end + f_old = f; + gtd_old = gtd; + + % Compute reference fr if using non-monotone objective + if Fref == 1 + fr = f; + else + if i == 1 + old_fvals = repmat(-inf,[Fref 1]); + end + + if i <= Fref + old_fvals(i) = f; + else + old_fvals = [old_fvals(2:end);f]; + end + fr = max(old_fvals); + end + + computeHessian = 0; + if method >= NEWTON + if HessianIter == 1 + computeHessian = 1; + elseif i > 1 && mod(i-1,HessianIter) == 0 + computeHessian = 1; + end + end + + % Line Search + f_old = f; + if LS < 3 % Use Armijo Bactracking + % Perform Backtracking line search + if computeHessian + [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); + else + [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:}); + end + funEvals = funEvals + LSfunEvals; + + elseif LS < 6 + % Find Point satisfying Wolfe + + if computeHessian + [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); + else + [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:}); + end + funEvals = funEvals + LSfunEvals; + x = x + t*d; + + else + % Use Matlab optim toolbox line search + [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=... + lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,... + tolX,[],[],[],varargin{:}); + funEvals = funEvals + LSiter; + if isempty(t) + exitflag = -2; + msg = 'Matlab LineSearch failed'; + break; + end + + if method >= NEWTON + [f_new,g_new,H] = funObj(x + t*d,varargin{:}); + funEvals = funEvals + 1; + end + x = x + t*d; + f = f_new; + g = g_new; + end + + % Output iteration information + if verboseI + fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); + end + + if logfile + fid = fopen(logfile, 'a'); + if (fid > 0) + fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); + fclose(fid); + end + end + + + % Output Function + if ~isempty(outputFcn) + callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); + end + + % Update Trace + trace.fval(end+1,1) = f; + trace.funcCount(end+1,1) = funEvals; + + % Check Optimality Condition + if sum(abs(g)) <= tolFun + exitflag=1; + msg = 'Optimality Condition below TolFun'; + break; + end + + % ******************* Check for lack of progress ******************* + + if sum(abs(t*d)) <= tolX + exitflag=2; + msg = 'Step Size below TolX'; + break; + end + + + if abs(f-f_old) < tolX + exitflag=2; + msg = 'Function Value changing by less than TolX'; + break; + end + + % ******** Check for going over iteration/evaluation limit ******************* + + if funEvals*funEvalMultiplier > maxFunEvals + exitflag = 0; + msg = 'Exceeded Maximum Number of Function Evaluations'; + break; + end + + if i == maxIter + exitflag = 0; + msg='Exceeded Maximum Number of Iterations'; + break; + end + +end + +if verbose + fprintf('%s\n',msg); +end +if nargout > 3 + output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,... + 'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); +end + +% Output Function +if ~isempty(outputFcn) + callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); +end + +end +