function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin)
% minFunc(funObj,x0,options,varargin)
%
% Unconstrained optimizer using a line search strategy
%
% Uses an interface very similar to fminunc
% (it doesn't support all of the optimization toolbox options,
% but supports many other options).
%
% It computes descent directions using one of ('Method'):
% - 'sd': Steepest Descent
% (no previous information used, not recommended)
% - 'csd': Cyclic Steepest Descent
% (uses previous step length for a fixed length cycle)
% - 'bb': Barzilai and Borwein Gradient
% (uses only previous step)
% - 'cg': Non-Linear Conjugate Gradient
% (uses only previous step and a vector beta)
% - 'scg': Scaled Non-Linear Conjugate Gradient
% (uses previous step and a vector beta,
% and Hessian-vector products to initialize line search)
% - 'pcg': Preconditionined Non-Linear Conjugate Gradient
% (uses only previous step and a vector beta, preconditioned version)
% - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating
% (default: uses a predetermined nunber of previous steps to form a
% low-rank Hessian approximation)
% - 'newton0': Hessian-Free Newton
% (numerically computes Hessian-Vector products)
% - 'pnewton0': Preconditioned Hessian-Free Newton
% (numerically computes Hessian-Vector products, preconditioned
% version)
% - 'qnewton': Quasi-Newton Hessian approximation
% (uses dense Hessian approximation)
% - 'mnewton': Newton's method with Hessian calculation after every
% user-specified number of iterations
% (needs user-supplied Hessian matrix)
% - 'newton': Newton's method with Hessian calculation every iteration
% (needs user-supplied Hessian matrix)
% - 'tensor': Tensor
% (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives)
%
% Several line search strategies are available for finding a step length satisfying
% the termination criteria ('LS'):
% - 0: Backtrack w/ Step Size Halving
% - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values
% - 2: Backtrack w/ Cubic Interpolation from new function + gradient
% values (default for 'bb' and 'sd')
% - 3: Bracketing w/ Step Size Doubling and Bisection
% - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function +
% gradient values (default for all except 'bb' and 'sd')
% - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation
% - 6: Use Matlab Optimization Toolbox's line search
% (requires Matlab's linesearch.m to be added to the path)
%
% Above, the first three find a point satisfying the Armijo conditions,
% while the last four search for find a point satisfying the Wolfe
% conditions. If the objective function overflows, it is recommended
% to use one of the first 3.
% The first three can be used to perform a non-monotone
% linesearch by changing the option 'Fref'.
%
% Several strategies for choosing the initial step size are avaiable ('LS_init'):
% - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd')
% (t = 1)
% - 1: Use a step similar to the previous step (default for 'cg' and 'sd')
% (t = t_old*min(2,g'd/g_old'd_old))
% - 2: Quadratic Initialization using previous function value and new
% function value/gradient (use this if steps tend to be very long)
% (t = min(1,2*(f-f_old)/g))
% - 3: The minimum between 1 and twice the previous step length
% (t = min(1,2*t)
% - 4: The scaled conjugate gradient step length (may accelerate
% conjugate gradient methods, but requires a Hessian-vector product)
% (t = g'd/d'Hd)
%
% Inputs:
% funObj is a function handle
% x0 is a starting vector;
% options is a struct containing parameters
% (defaults are used for non-existent or blank fields)
% all other arguments are passed to funObj
%
% Outputs:
% x is the minimum value found
% f is the function value at the minimum found
% exitflag returns an exit condition
% output returns a structure with other information
%
% Supported Input Options
% Display - Level of display [ off | final | (iter) | full | excessive ]
% MaxFunEvals - Maximum number of function evaluations allowed (1000)
% MaxIter - Maximum number of iterations allowed (500)
% TolFun - Termination tolerance on the first-order optimality (1e-5)
% TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9)
% Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 |
% qnewton | mnewton | newton | tensor ]
% c1 - Sufficient Decrease for Armijo condition (1e-4)
% c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise)
% LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise)
% LS - Line Search type -see above (2 for bb, 4 otherwise)
% Fref - Setting this to a positive integer greater than 1
% will use non-monotone Armijo objective in the line search.
% (20 for bb, 10 for csd, 1 for all others)
% numDiff - compute derivative numerically
% (default: 0) (this option has a different effect for 'newton', see below)
% useComplex - if 1, use complex differentials when computing numerical derivatives
% to get very accurate values (default: 0)
% DerivativeCheck - if 'on', computes derivatives numerically at initial
% point and compares to user-supplied derivative (default: 'off')
% outputFcn - function to run after each iteration (default: []). It
% should have the following interface:
% outputFcn(x,infoStruct,state,varargin{:})
% useMex - where applicable, use mex files to speed things up (default: 1)
%
% Method-specific input options:
% newton:
% HessianModify - type of Hessian modification for direct solvers to
% use if the Hessian is not positive definite (default: 0)
% 0: Minimum Euclidean norm s.t. eigenvalues sufficiently large
% (requires eigenvalues on iterations where matrix is not pd)
% 1: Start with (1/2)*||A||_F and increment until Cholesky succeeds
% (an approximation to method 0, does not require eigenvalues)
% 2: Modified LDL factorization
% (only 1 generalized Cholesky factorization done and no eigenvalues required)
% 3: Modified Spectral Decomposition
% (requires eigenvalues)
% 4: Modified Symmetric Indefinite Factorization
% 5: Uses the eigenvector of the smallest eigenvalue as negative
% curvature direction
% cgSolve - use conjugate gradient instead of direct solver (default: 0)
% 0: Direct Solver
% 1: Conjugate Gradient
% 2: Conjugate Gradient with Diagonal Preconditioner
% 3: Conjugate Gradient with LBFGS Preconditioner
% x: Conjugate Graident with Symmetric Successive Over Relaxation
% Preconditioner with parameter x
% (where x is a real number in the range [0,2])
% x: Conjugate Gradient with Incomplete Cholesky Preconditioner
% with drop tolerance -x
% (where x is a real negative number)
% numDiff - compute Hessian numerically
% (default: 0, done with complex differentials if useComplex = 1)
% LS_saveHessiancomp - when on, only computes the Hessian at the
% first and last iteration of the line search (default: 1)
% mnewton:
% HessianIter - number of iterations to use same Hessian (default: 5)
% qnewton:
% initialHessType - scale initial Hessian approximation (default: 1)
% qnUpdate - type of quasi-Newton update (default: 3):
% 0: BFGS
% 1: SR1 (when it is positive-definite, otherwise BFGS)
% 2: Hoshino
% 3: Self-Scaling BFGS
% 4: Oren's Self-Scaling Variable Metric method
% 5: McCormick-Huang asymmetric update
% Damped - use damped BFGS update (default: 1)
% newton0/pnewton0:
% HvFunc - user-supplied function that returns Hessian-vector products
% (by default, these are computed numerically using autoHv)
% HvFunc should have the following interface: HvFunc(v,x,varargin{:})
% useComplex - use a complex perturbation to get high accuracy
% Hessian-vector products (default: 0)
% (the increased accuracy can make the method much more efficient,
% but gradient code must properly support complex inputs)
% useNegCurv - a negative curvature direction is used as the descent
% direction if one is encountered during the cg iterations
% (default: 1)
% precFunc (for pnewton0 only) - user-supplied preconditioner
% (by default, an L-BFGS preconditioner is used)
% precFunc should have the following interfact:
% precFunc(v,x,varargin{:})
% lbfgs:
% Corr - number of corrections to store in memory (default: 100)
% (higher numbers converge faster but use more memory)
% Damped - use damped update (default: 0)
% pcg:
% cgUpdate - type of update (default: 2)
% cg/scg/pcg:
% cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1)
% 0: Fletcher Reeves
% 1: Polak-Ribiere
% 2: Hestenes-Stiefel (not supported for pcg)
% 3: Gilbert-Nocedal
% HvFunc (for scg only)- user-supplied function that returns Hessian-vector
% products
% (by default, these are computed numerically using autoHv)
% HvFunc should have the following interface:
% HvFunc(v,x,varargin{:})
% precFunc (for pcg only) - user-supplied preconditioner
% (by default, an L-BFGS preconditioner is used)
% precFunc should have the following interfact:
% precFunc(v,x,varargin{:})
% bb:
% bbType - type of bb step (default: 1)
% 0: min_alpha ||delta_x - alpha delta_g||_2
% 1: min_alpha ||alpha delta_x - delta_g||_2
% 2: Conic BB
% 3: Gradient method with retards
% csd:
% cycle - length of cycle (default: 3)
%
% Supported Output Options
% iterations - number of iterations taken
% funcCount - number of function evaluations
% algorithm - algorithm used
% firstorderopt - first-order optimality
% message - exit message
% trace.funccount - function evaluations after each iteration
% trace.fval - function value after each iteration
%
% Author: Mark Schmidt (2006)
% Web: http://www.cs.ubc.ca/~schmidtm
%
% Sources (in order of how much the source material contributes):
% J. Nocedal and S.J. Wright. 1999. "Numerical Optimization". Springer Verlag.
% R. Fletcher. 1987. "Practical Methods of Optimization". Wiley.
% J. Demmel. 1997. "Applied Linear Algebra. SIAM.
% R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R.
% Pozo, C. Romine, and H. Van der Vost. 1994. "Templates for the Solution of
% Linear Systems: Building Blocks for Iterative Methods". SIAM.
% J. More and D. Thuente. "Line search algorithms with guaranteed
% sufficient decrease". ACM Trans. Math. Softw. vol 20, 286-307, 1994.
% M. Raydan. "The Barzilai and Borwein gradient method for the large
% scale unconstrained minimization problem". SIAM J. Optim., 7, 26-33,
% (1997).
% "Mathematical Optimization". The Computational Science Education
% Project. 1995.
% C. Kelley. 1999. "Iterative Methods for Optimization". Frontiers in
% Applied Mathematics. SIAM.
if nargin < 3
options = [];
end
% Get Parameters
[verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,...
corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,...
HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
DerivativeCheck,Damped,HvFunc,bbType,cycle,...
HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ...
minFunc_processInputOptions(options);
if isfield(options, 'logfile')
logfile = options.logfile;
else
logfile = [];
end
% Constants
SD = 0;
CSD = 1;
BB = 2;
CG = 3;
PCG = 4;
LBFGS = 5;
QNEWTON = 6;
NEWTON0 = 7;
NEWTON = 8;
TENSOR = 9;
% Initialize
p = length(x0);
d = zeros(p,1);
x = x0;
t = 1;
% If necessary, form numerical differentiation functions
funEvalMultiplier = 1;
if numDiff && method ~= TENSOR
varargin(3:end+2) = varargin(1:end);
varargin{1} = useComplex;
varargin{2} = funObj;
if method ~= NEWTON
if debug
if useComplex
fprintf('Using complex differentials for gradient computation\n');
else
fprintf('Using finite differences for gradient computation\n');
end
end
funObj = @autoGrad;
else
if debug
if useComplex
fprintf('Using complex differentials for gradient computation\n');
else
fprintf('Using finite differences for gradient computation\n');
end
end
funObj = @autoHess;
end
if method == NEWTON0 && useComplex == 1
if debug
fprintf('Turning off the use of complex differentials\n');
end
useComplex = 0;
end
if useComplex
funEvalMultiplier = p;
else
funEvalMultiplier = p+1;
end
end
% Evaluate Initial Point
if method < NEWTON
[f,g] = feval(funObj, x, varargin{:});
else
[f,g,H] = feval(funObj, x, varargin{:});
computeHessian = 1;
end
funEvals = 1;
if strcmp(DerivativeCheck,'on')
if numDiff
fprintf('Can not do derivative checking when numDiff is 1\n');
end
% Check provided gradient/hessian function using numerical derivatives
fprintf('Checking Gradient:\n');
[f2,g2] = autoGrad(x,useComplex,funObj,varargin{:});
fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2)));
if max(abs(g-g2)) > 1e-4
fprintf('User NumDif:\n');
[g g2]
diff = abs(g-g2)
pause;
end
if method >= NEWTON
fprintf('Check Hessian:\n');
[f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:});
fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:))));
if max(abs(H(:)-H2(:))) > 1e-4
H
H2
diff = abs(H-H2)
pause;
end
end
end
% Output Log
if verboseI
fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
end
if logfile
fid = fopen(logfile, 'a');
if (fid > 0)
fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
fclose(fid);
end
end
% Output Function
if ~isempty(outputFcn)
callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:});
end
% Initialize Trace
trace.fval = f;
trace.funcCount = funEvals;
% Check optimality of initial point
if sum(abs(g)) <= tolFun
exitflag=1;
msg = 'Optimality Condition below TolFun';
if verbose
fprintf('%s\n',msg);
end
if nargout > 3
output = struct('iterations',0,'funcCount',1,...
'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
end
return;
end
% Perform up to a maximum of 'maxIter' descent steps:
for i = 1:maxIter
% ****************** COMPUTE DESCENT DIRECTION *****************
switch method
case SD % Steepest Descent
d = -g;
case CSD % Cyclic Steepest Descent
if mod(i,cycle) == 1 % Use Steepest Descent
alpha = 1;
LS_init = 2;
LS = 4; % Precise Line Search
elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step
alpha = t;
LS_init = 0;
LS = 2; % Non-monotonic line search
end
d = -alpha*g;
case BB % Steepest Descent with Barzilai and Borwein Step Length
if i == 1
d = -g;
else
y = g-g_old;
s = t*d;
if bbType == 0
yy = y'*y;
alpha = (s'*y)/(yy);
if alpha <= 1e-10 || alpha > 1e10
alpha = 1;
end
elseif bbType == 1
sy = s'*y;
alpha = (s'*s)/sy;
if alpha <= 1e-10 || alpha > 1e10
alpha = 1;
end
elseif bbType == 2 % Conic Interpolation ('Modified BB')
sy = s'*y;
ss = s'*s;
alpha = ss/sy;
if alpha <= 1e-10 || alpha > 1e10
alpha = 1;
end
alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s);
if alphaConic > .001*alpha && alphaConic < 1000*alpha
alpha = alphaConic;
end
elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step)
sy = s'*y;
alpha = (s'*s)/sy;
if alpha <= 1e-10 || alpha > 1e10
alpha = 1;
end
v(1+mod(i-2,5)) = alpha;
alpha = v(ceil(rand*length(v)));
end
d = -alpha*g;
end
g_old = g;
myF_old = f;
case CG % Non-Linear Conjugate Gradient
if i == 1
d = -g; % Initially use steepest descent direction
else
gtgo = g'*g_old;
gotgo = g_old'*g_old;
if cgUpdate == 0
% Fletcher-Reeves
beta = (g'*g)/(gotgo);
elseif cgUpdate == 1
% Polak-Ribiere
beta = (g'*(g-g_old)) /(gotgo);
elseif cgUpdate == 2
% Hestenes-Stiefel
beta = (g'*(g-g_old))/((g-g_old)'*d);
else
% Gilbert-Nocedal
beta_FR = (g'*(g-g_old)) /(gotgo);
beta_PR = (g'*g-gtgo)/(gotgo);
beta = max(-beta_FR,min(beta_PR,beta_FR));
end
d = -g + beta*d;
% Restart if not a direction of sufficient descent
if g'*d > -tolX
if debug
fprintf('Restarting CG\n');
end
beta = 0;
d = -g;
end
% Old restart rule:
%if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0
end
g_old = g;
case PCG % Preconditioned Non-Linear Conjugate Gradient
% Apply preconditioner to negative gradient
if isempty(precFunc)
% Use L-BFGS Preconditioner
if i == 1
old_dirs = zeros(length(g),0);
old_stps = zeros(length(g),0);
Hdiag = 1;
s = -g;
else
[old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
if useMex
s = lbfgsC(-g,old_dirs,old_stps,Hdiag);
else
s = lbfgs(-g,old_dirs,old_stps,Hdiag);
end
end
else % User-supplied preconditioner
s = precFunc(-g,x,varargin{:});
end
if i == 1
d = s;
else
if cgUpdate == 0
% Preconditioned Fletcher-Reeves
beta = (g'*s)/(g_old'*s_old);
elseif cgUpdate < 3
% Preconditioned Polak-Ribiere
beta = (g'*(s-s_old))/(g_old'*s_old);
else
% Preconditioned Gilbert-Nocedal
beta_FR = (g'*s)/(g_old'*s_old);
beta_PR = (g'*(s-s_old))/(g_old'*s_old);
beta = max(-beta_FR,min(beta_PR,beta_FR));
end
d = s + beta*d;
if g'*d > -tolX
if debug
fprintf('Restarting CG\n');
end
beta = 0;
d = s;
end
end
g_old = g;
s_old = s;
case LBFGS % L-BFGS
% Update the direction and step sizes
if i == 1
d = -g; % Initially use steepest descent direction
old_dirs = zeros(length(g),0);
old_stps = zeros(length(d),0);
Hdiag = 1;
else
if Damped
[old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
else
[old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
end
if useMex
d = lbfgsC(-g,old_dirs,old_stps,Hdiag);
else
d = lbfgs(-g,old_dirs,old_stps,Hdiag);
end
end
g_old = g;
case QNEWTON % Use quasi-Newton Hessian approximation
if i == 1
d = -g;
else
% Compute difference vectors
y = g-g_old;
s = t*d;
if i == 2
% Make initial Hessian approximation
if initialHessType == 0
% Identity
if qnUpdate <= 1
R = eye(length(g));
else
H = eye(length(g));
end
else
% Scaled Identity
if debug
fprintf('Scaling Initial Hessian Approximation\n');
end
if qnUpdate <= 1
% Use Cholesky of Hessian approximation
R = sqrt((y'*y)/(y'*s))*eye(length(g));
else
% Use Inverse of Hessian approximation
H = eye(length(g))*(y'*s)/(y'*y);
end
end
end
if qnUpdate == 0 % Use BFGS updates
Bs = R'*(R*s);
if Damped
eta = .02;
if y'*s < eta*s'*Bs
if debug
fprintf('Damped Update\n');
end
theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
y = theta*y + (1-theta)*Bs;
end
R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
else
if y'*s > 1e-10
R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
else
if debug
fprintf('Skipping Update\n');
end
end
end
elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness
Bs = R'*(R*s);
ymBs = y-Bs;
if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10
R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-');
else
if debug
fprintf('SR1 not positive-definite, doing BFGS Update\n');
end
if Damped
eta = .02;
if y'*s < eta*s'*Bs
if debug
fprintf('Damped Update\n');
end
theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
y = theta*y + (1-theta)*Bs;
end
R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
else
if y'*s > 1e-10
R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
else
if debug
fprintf('Skipping Update\n');
end
end
end
end
elseif qnUpdate == 2 % Use Hoshino update
v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
phi = 1/(1 + (y'*H*y)/(s'*y));
H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v';
elseif qnUpdate == 3 % Self-Scaling BFGS update
ys = y'*s;
Hy = H*y;
yHy = y'*Hy;
gamma = ys/yHy;
v = sqrt(yHy)*(s/ys - Hy/yHy);
H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys;
elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update
% Oren's method
if (s'*y)/(y'*H*y) > 1
phi = 1; % BFGS
omega = 0;
elseif (s'*(H\s))/(s'*y) < 1
phi = 0; % DFP
omega = 1;
else
phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2);
omega = phi;
end
gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y);
v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y);
elseif qnUpdate == 5 % McCormick-Huang asymmetric update
theta = 1;
phi = 0;
psi = 1;
omega = 0;
t1 = s*(theta*s + phi*H'*y)';
t2 = (theta*s + phi*H'*y)'*y;
t3 = H*y*(psi*s + omega*H'*y)';
t4 = (psi*s + omega*H'*y)'*y;
H = H + t1/t2 - t3/t4;
end
if qnUpdate <= 1
d = -R\(R'\g);
else
d = -H*g;
end
end
g_old = g;
case NEWTON0 % Hessian-Free Newton
cgMaxIter = min(p,maxFunEvals-funEvals);
cgForce = min(0.5,sqrt(norm(g)))*norm(g);
% Set-up preconditioner
precondFunc = [];
precondArgs = [];
if cgSolve == 1
if isempty(precFunc) % Apply L-BFGS preconditioner
if i == 1
old_dirs = zeros(length(g),0);
old_stps = zeros(length(g),0);
Hdiag = 1;
else
[old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
if useMex
precondFunc = @lbfgsC;
else
precondFunc = @lbfgs;
end
precondArgs = {old_dirs,old_stps,Hdiag};
end
g_old = g;
else
% Apply user-defined preconditioner
precondFunc = precFunc;
precondArgs = {x,varargin{:}};
end
end
% Solve Newton system using cg and hessian-vector products
if isempty(HvFunc)
% No user-supplied Hessian-vector function,
% use automatic differentiation
HvFun = @autoHv;
HvArgs = {x,g,useComplex,funObj,varargin{:}};
else
% Use user-supplid Hessian-vector function
HvFun = HvFunc;
HvArgs = {x,varargin{:}};
end
if useNegCurv
[d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
else
[d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
end
funEvals = funEvals+cgIter;
if debug
fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes);
end
if useNegCurv
if ~isempty(negCurv)
%if debug
fprintf('Using negative curvature direction\n');
%end
d = negCurv/norm(negCurv);
d = d/sum(abs(g));
end
end
case NEWTON % Newton search direction
if cgSolve == 0
if HessianModify == 0
% Attempt to perform a Cholesky factorization of the Hessian
[R,posDef] = chol(H);
% If the Cholesky factorization was successful, then the Hessian is
% positive definite, solve the system
if posDef == 0
d = -R\(R'\g);
else
% otherwise, adjust the Hessian to be positive definite based on the
% minimum eigenvalue, and solve with QR
% (expensive, we don't want to do this very much)
if debug
fprintf('Adjusting Hessian\n');
end
H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H))));
d = -H\g;
end
elseif HessianModify == 1
% Modified Incomplete Cholesky
R = mcholinc(H,debug);
d = -R\(R'\g);
elseif HessianModify == 2
% Modified Generalized Cholesky
if useMex
[L D perm] = mcholC(H);
else
[L D perm] = mchol(H);
end
d(perm) = -L' \ ((D.^-1).*(L \ g(perm)));
elseif HessianModify == 3
% Modified Spectral Decomposition
[V,D] = eig((H+H')/2);
D = diag(D);
D = max(abs(D),max(max(abs(D)),1)*1e-12);
d = -V*((V'*g)./D);
elseif HessianModify == 4
% Modified Symmetric Indefinite Factorization
[L,D,perm] = ldl(H,'vector');
[blockPos junk] = find(triu(D,1));
for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1)
if D(diagInd,diagInd) < 1e-12
D(diagInd,diagInd) = 1e-12;
end
end
for blockInd = blockPos'
block = D(blockInd:blockInd+1,blockInd:blockInd+1);
block_a = block(1);
block_b = block(2);
block_d = block(4);
lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2;
D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12);
end
d(perm) = -L' \ (D \ (L \ g(perm)));
else
% Take Newton step if Hessian is pd,
% otherwise take a step with negative curvature
[R,posDef] = chol(H);
if posDef == 0
d = -R\(R'\g);
else
if debug
fprintf('Taking Direction of Negative Curvature\n');
end
[V,D] = eig(H);
u = V(:,1);
d = -sign(u'*g)*u;
end
end
else
% Solve with Conjugate Gradient
cgMaxIter = p;
cgForce = min(0.5,sqrt(norm(g)))*norm(g);
% Select Preconditioner
if cgSolve == 1
% No preconditioner
precondFunc = [];
precondArgs = [];
elseif cgSolve == 2
% Diagonal preconditioner
precDiag = diag(H);
precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag);
precondFunc = @precondDiag;
precondArgs = {precDiag.^-1};
elseif cgSolve == 3
% L-BFGS preconditioner
if i == 1
old_dirs = zeros(length(g),0);
old_stps = zeros(length(g),0);
Hdiag = 1;
else
[old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
end
g_old = g;
if useMex
precondFunc = @lbfgsC;
else
precondFunc = @lbfgs;
end
precondArgs = {old_dirs,old_stps,Hdiag};
elseif cgSolve > 0
% Symmetric Successive Overelaxation Preconditioner
omega = cgSolve;
D = diag(H);
D(D < 1e-12) = 1e-12 - min(D);
precDiag = (omega/(2-omega))*D.^-1;
precTriu = diag(D/omega) + triu(H,1);
precondFunc = @precondTriuDiag;
precondArgs = {precTriu,precDiag.^-1};
else
% Incomplete Cholesky Preconditioner
opts.droptol = -cgSolve;
opts.rdiag = 1;
R = cholinc(sparse(H),opts);
if min(diag(R)) < 1e-12
R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts);
end
precondFunc = @precondTriu;
precondArgs = {R};
end
% Run cg with the appropriate preconditioner
if isempty(HvFunc)
% No user-supplied Hessian-vector function
[d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs);
else
% Use user-supplied Hessian-vector function
[d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}});
end
if debug
fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes);
%funEvals = funEvals + cgIter;
end
end
case TENSOR % Tensor Method
if numDiff
% Compute 3rd-order Tensor Numerically
[junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:});
else
% Use user-supplied 3rd-derivative Tensor
[junk1 junk2 junk3 T] = feval(funObj, x, varargin{:});
end
options_sub.Method = 'newton';
options_sub.Display = 'none';
options_sub.TolX = tolX;
options_sub.TolFun = tolFun;
d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T);
if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX
if debug
fprintf('Using 2nd-Order Step\n');
end
[V,D] = eig((H+H')/2);
D = diag(D);
D = max(abs(D),max(max(abs(D)),1)*1e-12);
d = -V*((V'*g)./D);
else
if debug
fprintf('Using 3rd-Order Step\n');
end
end
end
if ~isLegal(d)
fprintf('Step direction is illegal!\n');
pause;
return
end
% ****************** COMPUTE STEP LENGTH ************************
% Directional Derivative
gtd = g'*d;
% Check that progress can be made along direction
if gtd > -tolX
exitflag=2;
msg = 'Directional Derivative below TolX';
break;
end
% Select Initial Guess
if i == 1
if method < NEWTON0
t = min(1,1/sum(abs(g)));
else
t = 1;
end
else
if LS_init == 0
% Newton step
t = 1;
elseif LS_init == 1
% Close to previous step length
t = t*min(2,(gtd_old)/(gtd));
elseif LS_init == 2
% Quadratic Initialization based on {f,g} and previous f
t = min(1,2*(f-f_old)/(gtd));
elseif LS_init == 3
% Double previous step length
t = min(1,t*2);
elseif LS_init == 4
% Scaled step length if possible
if isempty(HvFunc)
% No user-supplied Hessian-vector function,
% use automatic differentiation
dHd = d'*autoHv(d,x,g,0,funObj,varargin{:});
else
% Use user-supplid Hessian-vector function
dHd = d'*HvFunc(d,x,varargin{:});
end
funEvals = funEvals + 1;
if dHd > 0
t = -gtd/(dHd);
else
t = min(1,2*(f-f_old)/(gtd));
end
end
if t <= 0
t = 1;
end
end
f_old = f;
gtd_old = gtd;
% Compute reference fr if using non-monotone objective
if Fref == 1
fr = f;
else
if i == 1
old_fvals = repmat(-inf,[Fref 1]);
end
if i <= Fref
old_fvals(i) = f;
else
old_fvals = [old_fvals(2:end);f];
end
fr = max(old_fvals);
end
computeHessian = 0;
if method >= NEWTON
if HessianIter == 1
computeHessian = 1;
elseif i > 1 && mod(i-1,HessianIter) == 0
computeHessian = 1;
end
end
% Line Search
f_old = f;
if LS < 3 % Use Armijo Bactracking
% Perform Backtracking line search
if computeHessian
[t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
else
[t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:});
end
funEvals = funEvals + LSfunEvals;
elseif LS < 6
% Find Point satisfying Wolfe
if computeHessian
[t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
else
[t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:});
end
funEvals = funEvals + LSfunEvals;
x = x + t*d;
else
% Use Matlab optim toolbox line search
[t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=...
lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,...
tolX,[],[],[],varargin{:});
funEvals = funEvals + LSiter;
if isempty(t)
exitflag = -2;
msg = 'Matlab LineSearch failed';
break;
end
if method >= NEWTON
[f_new,g_new,H] = funObj(x + t*d,varargin{:});
funEvals = funEvals + 1;
end
x = x + t*d;
f = f_new;
g = g_new;
end
% Output iteration information
if verboseI
fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
end
if logfile
fid = fopen(logfile, 'a');
if (fid > 0)
fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
fclose(fid);
end
end
% Output Function
if ~isempty(outputFcn)
callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
end
% Update Trace
trace.fval(end+1,1) = f;
trace.funcCount(end+1,1) = funEvals;
% Check Optimality Condition
if sum(abs(g)) <= tolFun
exitflag=1;
msg = 'Optimality Condition below TolFun';
break;
end
% ******************* Check for lack of progress *******************
if sum(abs(t*d)) <= tolX
exitflag=2;
msg = 'Step Size below TolX';
break;
end
if abs(f-f_old) < tolX
exitflag=2;
msg = 'Function Value changing by less than TolX';
break;
end
% ******** Check for going over iteration/evaluation limit *******************
if funEvals*funEvalMultiplier > maxFunEvals
exitflag = 0;
msg = 'Exceeded Maximum Number of Function Evaluations';
break;
end
if i == maxIter
exitflag = 0;
msg='Exceeded Maximum Number of Iterations';
break;
end
end
if verbose
fprintf('%s\n',msg);
end
if nargout > 3
output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,...
'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
end
% Output Function
if ~isempty(outputFcn)
callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
end
end