a b/combinedDeepLearningActiveContour/minFunc/minFunc.m
1
function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin)
2
% minFunc(funObj,x0,options,varargin)
3
%
4
% Unconstrained optimizer using a line search strategy
5
%
6
% Uses an interface very similar to fminunc
7
%   (it doesn't support all of the optimization toolbox options,
8
%       but supports many other options).
9
%
10
% It computes descent directions using one of ('Method'):
11
%   - 'sd': Steepest Descent
12
%       (no previous information used, not recommended)
13
%   - 'csd': Cyclic Steepest Descent
14
%       (uses previous step length for a fixed length cycle)
15
%   - 'bb': Barzilai and Borwein Gradient
16
%       (uses only previous step)
17
%   - 'cg': Non-Linear Conjugate Gradient
18
%       (uses only previous step and a vector beta)
19
%   - 'scg': Scaled Non-Linear Conjugate Gradient
20
%       (uses previous step and a vector beta, 
21
%           and Hessian-vector products to initialize line search)
22
%   - 'pcg': Preconditionined Non-Linear Conjugate Gradient
23
%       (uses only previous step and a vector beta, preconditioned version)
24
%   - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating
25
%       (default: uses a predetermined nunber of previous steps to form a 
26
%           low-rank Hessian approximation)
27
%   - 'newton0': Hessian-Free Newton
28
%       (numerically computes Hessian-Vector products)
29
%   - 'pnewton0': Preconditioned Hessian-Free Newton 
30
%       (numerically computes Hessian-Vector products, preconditioned
31
%       version)
32
%   - 'qnewton': Quasi-Newton Hessian approximation
33
%       (uses dense Hessian approximation)
34
%   - 'mnewton': Newton's method with Hessian calculation after every
35
%   user-specified number of iterations
36
%       (needs user-supplied Hessian matrix)
37
%   - 'newton': Newton's method with Hessian calculation every iteration
38
%       (needs user-supplied Hessian matrix)
39
%   - 'tensor': Tensor
40
%       (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives)
41
%
42
% Several line search strategies are available for finding a step length satisfying
43
%   the termination criteria ('LS'):
44
%   - 0: Backtrack w/ Step Size Halving
45
%   - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values
46
%   - 2: Backtrack w/ Cubic Interpolation from new function + gradient
47
%   values (default for 'bb' and 'sd')
48
%   - 3: Bracketing w/ Step Size Doubling and Bisection
49
%   - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function +
50
%   gradient values (default for all except 'bb' and 'sd')
51
%   - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation
52
%   - 6: Use Matlab Optimization Toolbox's line search
53
%           (requires Matlab's linesearch.m to be added to the path)
54
%
55
%   Above, the first three find a point satisfying the Armijo conditions,
56
%   while the last four search for find a point satisfying the Wolfe
57
%   conditions.  If the objective function overflows, it is recommended
58
%   to use one of the first 3.
59
%   The first three can be used to perform a non-monotone
60
%   linesearch by changing the option 'Fref'.
61
%
62
% Several strategies for choosing the initial step size are avaiable ('LS_init'):
63
%   - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd')
64
%       (t = 1)
65
%   - 1: Use a step similar to the previous step (default for 'cg' and 'sd')
66
%       (t = t_old*min(2,g'd/g_old'd_old))
67
%   - 2: Quadratic Initialization using previous function value and new
68
%   function value/gradient (use this if steps tend to be very long)
69
%       (t = min(1,2*(f-f_old)/g))
70
%   - 3: The minimum between 1 and twice the previous step length
71
%       (t = min(1,2*t)
72
%   - 4: The scaled conjugate gradient step length (may accelerate
73
%   conjugate gradient methods, but requires a Hessian-vector product)
74
%       (t = g'd/d'Hd)
75
%
76
% Inputs:
77
%   funObj is a function handle
78
%   x0 is a starting vector;
79
%   options is a struct containing parameters
80
%  (defaults are used for non-existent or blank fields)
81
%   all other arguments are passed to funObj
82
%
83
% Outputs:
84
%   x is the minimum value found
85
%   f is the function value at the minimum found
86
%   exitflag returns an exit condition
87
%   output returns a structure with other information
88
%
89
% Supported Input Options
90
%   Display - Level of display [ off | final | (iter) | full | excessive ]
91
%   MaxFunEvals - Maximum number of function evaluations allowed (1000)
92
%   MaxIter - Maximum number of iterations allowed (500)
93
%   TolFun - Termination tolerance on the first-order optimality (1e-5)
94
%   TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9)
95
%   Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 |
96
%       qnewton | mnewton | newton | tensor ]
97
%   c1 - Sufficient Decrease for Armijo condition (1e-4)
98
%   c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise)
99
%   LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise)
100
%   LS - Line Search type -see above (2 for bb, 4 otherwise)
101
%   Fref - Setting this to a positive integer greater than 1
102
%       will use non-monotone Armijo objective in the line search.
103
%       (20 for bb, 10 for csd, 1 for all others)
104
%   numDiff - compute derivative numerically
105
%       (default: 0) (this option has a different effect for 'newton', see below)
106
%   useComplex - if 1, use complex differentials when computing numerical derivatives
107
%       to get very accurate values (default: 0)
108
%   DerivativeCheck - if 'on', computes derivatives numerically at initial
109
%       point and compares to user-supplied derivative (default: 'off')
110
%   outputFcn - function to run after each iteration (default: []).  It
111
%       should have the following interface:
112
%       outputFcn(x,infoStruct,state,varargin{:})
113
%   useMex - where applicable, use mex files to speed things up (default: 1)
114
%
115
% Method-specific input options:
116
%   newton:
117
%       HessianModify - type of Hessian modification for direct solvers to
118
%       use if the Hessian is not positive definite (default: 0)
119
%           0: Minimum Euclidean norm s.t. eigenvalues sufficiently large
120
%           (requires eigenvalues on iterations where matrix is not pd)
121
%           1: Start with (1/2)*||A||_F and increment until Cholesky succeeds
122
%           (an approximation to method 0, does not require eigenvalues)
123
%           2: Modified LDL factorization
124
%           (only 1 generalized Cholesky factorization done and no eigenvalues required)
125
%           3: Modified Spectral Decomposition
126
%           (requires eigenvalues)
127
%           4: Modified Symmetric Indefinite Factorization
128
%           5: Uses the eigenvector of the smallest eigenvalue as negative
129
%           curvature direction
130
%       cgSolve - use conjugate gradient instead of direct solver (default: 0)
131
%           0: Direct Solver
132
%           1: Conjugate Gradient
133
%           2: Conjugate Gradient with Diagonal Preconditioner
134
%           3: Conjugate Gradient with LBFGS Preconditioner
135
%           x: Conjugate Graident with Symmetric Successive Over Relaxation
136
%           Preconditioner with parameter x
137
%               (where x is a real number in the range [0,2])
138
%           x: Conjugate Gradient with Incomplete Cholesky Preconditioner
139
%           with drop tolerance -x
140
%               (where x is a real negative number)
141
%       numDiff - compute Hessian numerically
142
%                 (default: 0, done with complex differentials if useComplex = 1)
143
%       LS_saveHessiancomp - when on, only computes the Hessian at the
144
%       first and last iteration of the line search (default: 1)
145
%   mnewton:
146
%       HessianIter - number of iterations to use same Hessian (default: 5)
147
%   qnewton:
148
%       initialHessType - scale initial Hessian approximation (default: 1)
149
%       qnUpdate - type of quasi-Newton update (default: 3):
150
%           0: BFGS
151
%           1: SR1 (when it is positive-definite, otherwise BFGS)
152
%           2: Hoshino
153
%           3: Self-Scaling BFGS
154
%           4: Oren's Self-Scaling Variable Metric method 
155
%           5: McCormick-Huang asymmetric update
156
%       Damped - use damped BFGS update (default: 1)
157
%   newton0/pnewton0:
158
%       HvFunc - user-supplied function that returns Hessian-vector products
159
%           (by default, these are computed numerically using autoHv)
160
%           HvFunc should have the following interface: HvFunc(v,x,varargin{:})
161
%       useComplex - use a complex perturbation to get high accuracy
162
%           Hessian-vector products (default: 0)
163
%           (the increased accuracy can make the method much more efficient,
164
%               but gradient code must properly support complex inputs)
165
%       useNegCurv - a negative curvature direction is used as the descent
166
%           direction if one is encountered during the cg iterations
167
%           (default: 1)
168
%       precFunc (for pnewton0 only) - user-supplied preconditioner
169
%           (by default, an L-BFGS preconditioner is used)
170
%           precFunc should have the following interfact:
171
%           precFunc(v,x,varargin{:})
172
%   lbfgs:
173
%       Corr - number of corrections to store in memory (default: 100)
174
%           (higher numbers converge faster but use more memory)
175
%       Damped - use damped update (default: 0)
176
%   pcg:
177
%       cgUpdate - type of update (default: 2)
178
%   cg/scg/pcg:
179
%       cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1)
180
%           0: Fletcher Reeves
181
%           1: Polak-Ribiere
182
%           2: Hestenes-Stiefel (not supported for pcg)
183
%           3: Gilbert-Nocedal
184
%       HvFunc (for scg only)- user-supplied function that returns Hessian-vector 
185
%           products
186
%           (by default, these are computed numerically using autoHv)
187
%           HvFunc should have the following interface:
188
%           HvFunc(v,x,varargin{:})
189
%       precFunc (for pcg only) - user-supplied preconditioner
190
%           (by default, an L-BFGS preconditioner is used)
191
%           precFunc should have the following interfact:
192
%           precFunc(v,x,varargin{:})
193
%   bb:
194
%       bbType - type of bb step (default: 1)
195
%           0: min_alpha ||delta_x - alpha delta_g||_2
196
%           1: min_alpha ||alpha delta_x - delta_g||_2
197
%           2: Conic BB
198
%           3: Gradient method with retards
199
%   csd:
200
%       cycle - length of cycle (default: 3)
201
%
202
% Supported Output Options
203
%   iterations - number of iterations taken
204
%   funcCount - number of function evaluations
205
%   algorithm - algorithm used
206
%   firstorderopt - first-order optimality
207
%   message - exit message
208
%   trace.funccount - function evaluations after each iteration
209
%   trace.fval - function value after each iteration
210
%
211
% Author: Mark Schmidt (2006)
212
% Web: http://www.cs.ubc.ca/~schmidtm
213
%
214
% Sources (in order of how much the source material contributes):
215
%   J. Nocedal and S.J. Wright.  1999.  "Numerical Optimization".  Springer Verlag.
216
%   R. Fletcher.  1987.  "Practical Methods of Optimization".  Wiley.
217
%   J. Demmel.  1997.  "Applied Linear Algebra.  SIAM.
218
%   R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R.
219
%   Pozo, C. Romine, and H. Van der Vost.  1994.  "Templates for the Solution of
220
%   Linear Systems: Building Blocks for Iterative Methods".  SIAM.
221
%   J. More and D. Thuente.  "Line search algorithms with guaranteed
222
%   sufficient decrease".  ACM Trans. Math. Softw. vol 20, 286-307, 1994.
223
%   M. Raydan.  "The Barzilai and Borwein gradient method for the large
224
%   scale unconstrained minimization problem".  SIAM J. Optim., 7, 26-33,
225
%   (1997).
226
%   "Mathematical Optimization".  The Computational Science Education
227
%   Project.  1995.
228
%   C. Kelley.  1999.  "Iterative Methods for Optimization".  Frontiers in
229
%   Applied Mathematics.  SIAM.
230
231
if nargin < 3
232
    options = [];
233
end
234
235
% Get Parameters
236
[verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,...
237
    corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,...
238
    HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
239
    DerivativeCheck,Damped,HvFunc,bbType,cycle,...
240
    HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ...
241
    minFunc_processInputOptions(options);
242
243
if isfield(options, 'logfile')
244
    logfile = options.logfile;
245
else
246
    logfile = [];
247
end
248
249
% Constants
250
SD = 0;
251
CSD = 1;
252
BB = 2;
253
CG = 3;
254
PCG = 4;
255
LBFGS = 5;
256
QNEWTON = 6;
257
NEWTON0 = 7;
258
NEWTON = 8;
259
TENSOR = 9;
260
261
% Initialize
262
p = length(x0);
263
d = zeros(p,1);
264
x = x0;
265
t = 1;
266
267
% If necessary, form numerical differentiation functions
268
funEvalMultiplier = 1;
269
if numDiff && method ~= TENSOR
270
    varargin(3:end+2) = varargin(1:end);
271
    varargin{1} = useComplex;
272
    varargin{2} = funObj;
273
    if method ~= NEWTON
274
        if debug
275
            if useComplex
276
                fprintf('Using complex differentials for gradient computation\n');
277
            else
278
                fprintf('Using finite differences for gradient computation\n');
279
            end
280
        end
281
        funObj = @autoGrad;
282
    else
283
        if debug
284
            if useComplex
285
                fprintf('Using complex differentials for gradient computation\n');
286
            else
287
                fprintf('Using finite differences for gradient computation\n');
288
            end
289
        end
290
        funObj = @autoHess;
291
    end
292
293
    if method == NEWTON0 && useComplex == 1
294
        if debug
295
            fprintf('Turning off the use of complex differentials\n');
296
        end
297
        useComplex = 0;
298
    end
299
300
    if useComplex
301
        funEvalMultiplier = p;
302
    else
303
        funEvalMultiplier = p+1;
304
    end
305
end
306
307
% Evaluate Initial Point
308
if method < NEWTON
309
    [f,g] = feval(funObj, x, varargin{:});
310
else
311
    [f,g,H] = feval(funObj, x, varargin{:});
312
    computeHessian = 1;
313
end
314
funEvals = 1;
315
316
if strcmp(DerivativeCheck,'on')
317
    if numDiff
318
        fprintf('Can not do derivative checking when numDiff is 1\n');
319
    end
320
    % Check provided gradient/hessian function using numerical derivatives
321
    fprintf('Checking Gradient:\n');
322
    [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:});
323
324
    fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2)));
325
    if max(abs(g-g2)) > 1e-4
326
        fprintf('User NumDif:\n');
327
        [g g2]
328
        diff = abs(g-g2)
329
        pause;
330
    end
331
332
    if method >= NEWTON
333
        fprintf('Check Hessian:\n');
334
        [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:});
335
336
        fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:))));
337
        if max(abs(H(:)-H2(:))) > 1e-4
338
            H
339
            H2
340
            diff = abs(H-H2)
341
            pause;
342
        end
343
    end
344
end
345
346
% Output Log
347
if verboseI
348
    fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
349
end
350
351
if logfile
352
    fid = fopen(logfile, 'a');
353
    if (fid > 0)
354
        fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
355
        fclose(fid);
356
    end
357
end
358
359
% Output Function
360
if ~isempty(outputFcn)
361
    callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:});
362
end
363
364
% Initialize Trace
365
trace.fval = f;
366
trace.funcCount = funEvals;
367
368
% Check optimality of initial point
369
if sum(abs(g)) <= tolFun
370
    exitflag=1;
371
    msg = 'Optimality Condition below TolFun';
372
    if verbose
373
        fprintf('%s\n',msg);
374
    end
375
    if nargout > 3
376
        output = struct('iterations',0,'funcCount',1,...
377
            'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
378
    end
379
    return;
380
end
381
382
% Perform up to a maximum of 'maxIter' descent steps:
383
for i = 1:maxIter
384
385
    % ****************** COMPUTE DESCENT DIRECTION *****************
386
387
    switch method
388
        case SD % Steepest Descent
389
            d = -g;
390
391
        case CSD % Cyclic Steepest Descent
392
393
            if mod(i,cycle) == 1 % Use Steepest Descent
394
                alpha = 1;
395
                LS_init = 2;
396
                LS = 4; % Precise Line Search
397
            elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step
398
                alpha = t;
399
                LS_init = 0;
400
                LS = 2; % Non-monotonic line search
401
            end
402
            d = -alpha*g;
403
404
        case BB % Steepest Descent with Barzilai and Borwein Step Length
405
406
            if i == 1
407
                d = -g;
408
            else
409
                y = g-g_old;
410
                s = t*d;
411
                if bbType == 0
412
                    yy = y'*y;
413
                    alpha = (s'*y)/(yy);
414
                    if alpha <= 1e-10 || alpha > 1e10
415
                        alpha = 1;
416
                    end
417
                elseif bbType == 1
418
                    sy = s'*y;
419
                    alpha = (s'*s)/sy;
420
                    if alpha <= 1e-10 || alpha > 1e10
421
                        alpha = 1;
422
                    end
423
                elseif bbType == 2 % Conic Interpolation ('Modified BB')
424
                    sy = s'*y;
425
                    ss = s'*s;
426
                    alpha = ss/sy;
427
                    if alpha <= 1e-10 || alpha > 1e10
428
                        alpha = 1;
429
                    end
430
                    alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s);
431
                    if alphaConic > .001*alpha && alphaConic < 1000*alpha
432
                        alpha = alphaConic;
433
                    end
434
                elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step)
435
                    sy = s'*y;
436
                    alpha = (s'*s)/sy;
437
                    if alpha <= 1e-10 || alpha > 1e10
438
                        alpha = 1;
439
                    end
440
                    v(1+mod(i-2,5)) = alpha;
441
                    alpha = v(ceil(rand*length(v)));
442
                end
443
                d = -alpha*g;
444
            end
445
            g_old = g;
446
            myF_old = f;
447
448
449
        case CG % Non-Linear Conjugate Gradient
450
451
            if i == 1
452
                d = -g; % Initially use steepest descent direction
453
            else
454
                gtgo = g'*g_old;
455
                gotgo = g_old'*g_old;
456
457
                if cgUpdate == 0
458
                    % Fletcher-Reeves
459
                    beta = (g'*g)/(gotgo);
460
                elseif cgUpdate == 1
461
                    % Polak-Ribiere
462
                    beta = (g'*(g-g_old)) /(gotgo);
463
                elseif cgUpdate == 2
464
                    % Hestenes-Stiefel
465
                    beta = (g'*(g-g_old))/((g-g_old)'*d);
466
                else
467
                    % Gilbert-Nocedal
468
                    beta_FR = (g'*(g-g_old)) /(gotgo);
469
                    beta_PR = (g'*g-gtgo)/(gotgo);
470
                    beta = max(-beta_FR,min(beta_PR,beta_FR));
471
                end
472
473
                d = -g + beta*d;
474
475
                % Restart if not a direction of sufficient descent
476
                if g'*d > -tolX
477
                    if debug
478
                        fprintf('Restarting CG\n');
479
                    end
480
                    beta = 0;
481
                    d = -g;
482
                end
483
484
                % Old restart rule:
485
                %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0
486
487
            end
488
            g_old = g;
489
490
        case PCG % Preconditioned Non-Linear Conjugate Gradient
491
492
            % Apply preconditioner to negative gradient
493
            if isempty(precFunc)
494
                % Use L-BFGS Preconditioner
495
                if i == 1
496
                    old_dirs = zeros(length(g),0);
497
                    old_stps = zeros(length(g),0);
498
                    Hdiag = 1;
499
                    s = -g;
500
                else
501
                    [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
502
503
                    if useMex
504
                        s = lbfgsC(-g,old_dirs,old_stps,Hdiag);
505
                    else
506
                        s = lbfgs(-g,old_dirs,old_stps,Hdiag);
507
                    end
508
                end
509
            else % User-supplied preconditioner
510
                s = precFunc(-g,x,varargin{:});
511
            end
512
513
            if i == 1
514
                d = s;
515
            else
516
517
                if cgUpdate == 0
518
                    % Preconditioned Fletcher-Reeves
519
                    beta = (g'*s)/(g_old'*s_old);
520
                elseif cgUpdate < 3
521
                    % Preconditioned Polak-Ribiere
522
                    beta = (g'*(s-s_old))/(g_old'*s_old);
523
                else
524
                    % Preconditioned Gilbert-Nocedal
525
                    beta_FR = (g'*s)/(g_old'*s_old);
526
                    beta_PR = (g'*(s-s_old))/(g_old'*s_old);
527
                    beta = max(-beta_FR,min(beta_PR,beta_FR));
528
                end
529
                d = s + beta*d;
530
531
                if g'*d > -tolX
532
                    if debug
533
                        fprintf('Restarting CG\n');
534
                    end
535
                    beta = 0;
536
                    d = s;
537
                end
538
539
            end
540
            g_old = g;
541
            s_old = s;
542
        case LBFGS % L-BFGS
543
544
            % Update the direction and step sizes
545
546
            if i == 1
547
                d = -g; % Initially use steepest descent direction
548
                old_dirs = zeros(length(g),0);
549
                old_stps = zeros(length(d),0);
550
                Hdiag = 1;
551
            else
552
                if Damped
553
                    [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
554
                else
555
                    [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
556
                end
557
558
                if useMex
559
                    d = lbfgsC(-g,old_dirs,old_stps,Hdiag);
560
                else
561
                    d = lbfgs(-g,old_dirs,old_stps,Hdiag);
562
                end
563
            end
564
            g_old = g;
565
566
        case QNEWTON % Use quasi-Newton Hessian approximation
567
568
            if i == 1
569
                d = -g;
570
            else
571
                % Compute difference vectors
572
                y = g-g_old;
573
                s = t*d;
574
575
                if i == 2
576
                    % Make initial Hessian approximation
577
                    if initialHessType == 0
578
                        % Identity
579
                        if qnUpdate <= 1
580
                            R = eye(length(g));
581
                        else
582
                            H = eye(length(g));
583
                        end
584
                    else
585
                        % Scaled Identity
586
                        if debug
587
                            fprintf('Scaling Initial Hessian Approximation\n');
588
                        end
589
                        if qnUpdate <= 1
590
                            % Use Cholesky of Hessian approximation
591
                            R = sqrt((y'*y)/(y'*s))*eye(length(g));
592
                        else
593
                            % Use Inverse of Hessian approximation
594
                            H = eye(length(g))*(y'*s)/(y'*y);
595
                        end
596
                    end
597
                end
598
599
                if qnUpdate == 0 % Use BFGS updates
600
                    Bs = R'*(R*s);
601
                    if Damped
602
                        eta = .02;
603
                        if y'*s < eta*s'*Bs
604
                            if debug
605
                                fprintf('Damped Update\n');
606
                            end
607
                            theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
608
                            y = theta*y + (1-theta)*Bs;
609
                        end
610
                        R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
611
                    else
612
                        if y'*s > 1e-10
613
                            R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
614
                        else
615
                            if debug
616
                                fprintf('Skipping Update\n');
617
                            end
618
                        end
619
                    end
620
                elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness
621
622
                    Bs = R'*(R*s);
623
                    ymBs = y-Bs;
624
                    if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10
625
                        R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-');
626
                    else
627
                        if debug
628
                            fprintf('SR1 not positive-definite, doing BFGS Update\n');
629
                        end
630
                        if Damped
631
                            eta = .02;
632
                            if y'*s < eta*s'*Bs
633
                                if debug
634
                                    fprintf('Damped Update\n');
635
                                end
636
                                theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
637
                                y = theta*y + (1-theta)*Bs;
638
                            end
639
                            R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
640
                        else
641
                            if y'*s > 1e-10
642
                                R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
643
                            else
644
                                if debug
645
                                    fprintf('Skipping Update\n');
646
                                end
647
                            end
648
                        end
649
                    end
650
                elseif qnUpdate == 2 % Use Hoshino update
651
                    v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
652
                    phi = 1/(1 + (y'*H*y)/(s'*y));
653
                    H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v';
654
655
                elseif qnUpdate == 3 % Self-Scaling BFGS update
656
                    ys = y'*s;
657
                    Hy = H*y;
658
                    yHy = y'*Hy;
659
                    gamma = ys/yHy;
660
                    v = sqrt(yHy)*(s/ys - Hy/yHy);
661
                    H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys;
662
                elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update
663
664
                    % Oren's method
665
                    if (s'*y)/(y'*H*y) > 1
666
                        phi = 1; % BFGS
667
                        omega = 0;
668
                    elseif (s'*(H\s))/(s'*y) < 1
669
                        phi = 0; % DFP
670
                        omega = 1;
671
                    else
672
                        phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2);
673
                        omega = phi;
674
                    end
675
676
                    gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y);
677
                    v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
678
                    H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y);
679
680
                elseif qnUpdate == 5 % McCormick-Huang asymmetric update
681
                    theta = 1;
682
                    phi = 0;
683
                    psi = 1;
684
                    omega = 0;
685
                    t1 = s*(theta*s + phi*H'*y)';
686
                    t2 = (theta*s + phi*H'*y)'*y;
687
                    t3 = H*y*(psi*s + omega*H'*y)';
688
                    t4 = (psi*s + omega*H'*y)'*y;
689
                    H = H + t1/t2 - t3/t4;
690
                end
691
692
                if qnUpdate <= 1
693
                    d = -R\(R'\g);
694
                else
695
                    d = -H*g;
696
                end
697
698
            end
699
            g_old = g;
700
701
        case NEWTON0 % Hessian-Free Newton
702
703
            cgMaxIter = min(p,maxFunEvals-funEvals);
704
            cgForce = min(0.5,sqrt(norm(g)))*norm(g);
705
706
            % Set-up preconditioner
707
            precondFunc = [];
708
            precondArgs = [];
709
            if cgSolve == 1
710
                if isempty(precFunc) % Apply L-BFGS preconditioner
711
                    if i == 1
712
                        old_dirs = zeros(length(g),0);
713
                        old_stps = zeros(length(g),0);
714
                        Hdiag = 1;
715
                    else
716
                        [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
717
                        if useMex
718
                            precondFunc = @lbfgsC;
719
                        else
720
                            precondFunc = @lbfgs;
721
                        end
722
                        precondArgs = {old_dirs,old_stps,Hdiag};
723
                    end
724
                    g_old = g;
725
                else
726
                    % Apply user-defined preconditioner
727
                    precondFunc = precFunc;
728
                    precondArgs = {x,varargin{:}};
729
                end
730
            end
731
732
            % Solve Newton system using cg and hessian-vector products
733
            if isempty(HvFunc)
734
                % No user-supplied Hessian-vector function,
735
                % use automatic differentiation
736
                HvFun = @autoHv;
737
                HvArgs = {x,g,useComplex,funObj,varargin{:}};
738
            else
739
                % Use user-supplid Hessian-vector function
740
                HvFun = HvFunc;
741
                HvArgs = {x,varargin{:}};
742
            end
743
            
744
            if useNegCurv
745
                [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
746
            else
747
                [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
748
            end
749
750
            funEvals = funEvals+cgIter;
751
            if debug
752
                fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes);
753
754
            end
755
756
            if useNegCurv
757
                if ~isempty(negCurv)
758
                    %if debug
759
                    fprintf('Using negative curvature direction\n');
760
                    %end
761
                    d = negCurv/norm(negCurv);
762
                    d = d/sum(abs(g));
763
                end
764
            end
765
766
        case NEWTON % Newton search direction
767
768
            if cgSolve == 0
769
                if HessianModify == 0
770
                    % Attempt to perform a Cholesky factorization of the Hessian
771
                    [R,posDef] = chol(H);
772
773
                    % If the Cholesky factorization was successful, then the Hessian is
774
                    % positive definite, solve the system
775
                    if posDef == 0
776
                        d = -R\(R'\g);
777
778
                    else
779
                        % otherwise, adjust the Hessian to be positive definite based on the
780
                        % minimum eigenvalue, and solve with QR
781
                        % (expensive, we don't want to do this very much)
782
                        if debug
783
                            fprintf('Adjusting Hessian\n');
784
                        end
785
                        H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H))));
786
                        d = -H\g;
787
                    end
788
                elseif HessianModify == 1
789
                    % Modified Incomplete Cholesky
790
                    R = mcholinc(H,debug);
791
                    d = -R\(R'\g);
792
                elseif HessianModify == 2
793
                    % Modified Generalized Cholesky
794
                    if useMex
795
                        [L D perm] = mcholC(H);
796
                    else
797
                        [L D perm] = mchol(H);
798
                    end
799
                    d(perm) = -L' \ ((D.^-1).*(L \ g(perm)));
800
801
                elseif HessianModify == 3
802
                    % Modified Spectral Decomposition
803
                    [V,D] = eig((H+H')/2);
804
                    D = diag(D);
805
                    D = max(abs(D),max(max(abs(D)),1)*1e-12);
806
                    d = -V*((V'*g)./D);
807
                elseif HessianModify == 4
808
                    % Modified Symmetric Indefinite Factorization
809
                    [L,D,perm] = ldl(H,'vector');
810
                    [blockPos junk] = find(triu(D,1));
811
                    for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1)
812
                        if D(diagInd,diagInd) < 1e-12
813
                            D(diagInd,diagInd) = 1e-12;
814
                        end
815
                    end
816
                    for blockInd = blockPos'
817
                        block = D(blockInd:blockInd+1,blockInd:blockInd+1);
818
                        block_a = block(1);
819
                        block_b = block(2);
820
                        block_d = block(4);
821
                        lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2;
822
                        D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12);
823
                    end
824
                    d(perm) = -L' \ (D \ (L \ g(perm)));
825
                else
826
                    % Take Newton step if Hessian is pd,
827
                    % otherwise take a step with negative curvature
828
                    [R,posDef] = chol(H);
829
                    if posDef == 0
830
                        d = -R\(R'\g);
831
                    else
832
                        if debug
833
                            fprintf('Taking Direction of Negative Curvature\n');
834
                        end
835
                        [V,D] = eig(H);
836
                        u = V(:,1);
837
                        d = -sign(u'*g)*u;
838
                    end
839
                end
840
841
            else
842
                % Solve with Conjugate Gradient
843
                cgMaxIter = p;
844
                cgForce = min(0.5,sqrt(norm(g)))*norm(g);
845
846
                % Select Preconditioner
847
                if cgSolve == 1
848
                    % No preconditioner
849
                    precondFunc = [];
850
                    precondArgs = [];
851
                elseif cgSolve == 2
852
                    % Diagonal preconditioner
853
                    precDiag = diag(H);
854
                    precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag);
855
                    precondFunc = @precondDiag;
856
                    precondArgs = {precDiag.^-1};
857
                elseif cgSolve == 3
858
                    % L-BFGS preconditioner
859
                    if i == 1
860
                        old_dirs = zeros(length(g),0);
861
                        old_stps = zeros(length(g),0);
862
                        Hdiag = 1;
863
                    else
864
                        [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
865
                    end
866
                    g_old = g;
867
                    if useMex
868
                        precondFunc = @lbfgsC;
869
                    else
870
                        precondFunc = @lbfgs;
871
                    end
872
                    precondArgs = {old_dirs,old_stps,Hdiag};
873
                elseif cgSolve > 0
874
                    % Symmetric Successive Overelaxation Preconditioner
875
                    omega = cgSolve;
876
                    D = diag(H);
877
                    D(D < 1e-12) = 1e-12 - min(D);
878
                    precDiag = (omega/(2-omega))*D.^-1;
879
                    precTriu = diag(D/omega) + triu(H,1);
880
                    precondFunc = @precondTriuDiag;
881
                    precondArgs = {precTriu,precDiag.^-1};
882
                else
883
                    % Incomplete Cholesky Preconditioner
884
                    opts.droptol = -cgSolve;
885
                    opts.rdiag = 1;
886
                    R = cholinc(sparse(H),opts);
887
                    if min(diag(R)) < 1e-12
888
                        R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts);
889
                    end
890
                    precondFunc = @precondTriu;
891
                    precondArgs = {R};
892
                end
893
894
                % Run cg with the appropriate preconditioner
895
                if isempty(HvFunc)
896
                    % No user-supplied Hessian-vector function
897
                    [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs);
898
                else
899
                    % Use user-supplied Hessian-vector function
900
                    [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}});
901
                end
902
                if debug
903
                    fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes);
904
                    %funEvals = funEvals + cgIter;
905
                end
906
            end
907
908
        case TENSOR % Tensor Method
909
910
            if numDiff
911
                % Compute 3rd-order Tensor Numerically
912
                [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:});
913
            else
914
                % Use user-supplied 3rd-derivative Tensor
915
                [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:});
916
            end
917
            options_sub.Method = 'newton';
918
            options_sub.Display = 'none';
919
            options_sub.TolX = tolX;
920
            options_sub.TolFun = tolFun;
921
            d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T);
922
923
            if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX
924
                if debug
925
                    fprintf('Using 2nd-Order Step\n');
926
                end
927
                [V,D] = eig((H+H')/2);
928
                D = diag(D);
929
                D = max(abs(D),max(max(abs(D)),1)*1e-12);
930
                d = -V*((V'*g)./D);
931
            else
932
                if debug
933
                    fprintf('Using 3rd-Order Step\n');
934
                end
935
            end
936
    end
937
938
    if ~isLegal(d)
939
        fprintf('Step direction is illegal!\n');
940
        pause;
941
        return
942
    end
943
944
    % ****************** COMPUTE STEP LENGTH ************************
945
946
    % Directional Derivative
947
    gtd = g'*d;
948
949
    % Check that progress can be made along direction
950
    if gtd > -tolX
951
        exitflag=2;
952
        msg = 'Directional Derivative below TolX';
953
        break;
954
    end
955
956
    % Select Initial Guess
957
    if i == 1
958
        if method < NEWTON0
959
            t = min(1,1/sum(abs(g)));
960
        else
961
            t = 1;
962
        end
963
    else
964
        if LS_init == 0
965
            % Newton step
966
            t = 1;
967
        elseif LS_init == 1
968
            % Close to previous step length
969
            t = t*min(2,(gtd_old)/(gtd));
970
        elseif LS_init == 2
971
            % Quadratic Initialization based on {f,g} and previous f
972
            t = min(1,2*(f-f_old)/(gtd));
973
        elseif LS_init == 3
974
            % Double previous step length
975
            t = min(1,t*2);
976
        elseif LS_init == 4
977
            % Scaled step length if possible
978
            if isempty(HvFunc)
979
                % No user-supplied Hessian-vector function,
980
                % use automatic differentiation
981
                dHd = d'*autoHv(d,x,g,0,funObj,varargin{:});
982
            else
983
                % Use user-supplid Hessian-vector function
984
                dHd = d'*HvFunc(d,x,varargin{:});
985
            end
986
987
            funEvals = funEvals + 1;
988
            if dHd > 0
989
                t = -gtd/(dHd);
990
            else
991
                t = min(1,2*(f-f_old)/(gtd));
992
            end
993
        end
994
995
        if t <= 0
996
            t = 1;
997
        end
998
    end
999
    f_old = f;
1000
    gtd_old = gtd;
1001
1002
    % Compute reference fr if using non-monotone objective
1003
    if Fref == 1
1004
        fr = f;
1005
    else
1006
        if i == 1
1007
            old_fvals = repmat(-inf,[Fref 1]);
1008
        end
1009
1010
        if i <= Fref
1011
            old_fvals(i) = f;
1012
        else
1013
            old_fvals = [old_fvals(2:end);f];
1014
        end
1015
        fr = max(old_fvals);
1016
    end
1017
1018
    computeHessian = 0;
1019
    if method >= NEWTON
1020
        if HessianIter == 1
1021
            computeHessian = 1;
1022
        elseif i > 1 && mod(i-1,HessianIter) == 0
1023
            computeHessian = 1;
1024
        end
1025
    end
1026
1027
    % Line Search
1028
    f_old = f;
1029
    if LS < 3 % Use Armijo Bactracking
1030
        % Perform Backtracking line search
1031
        if computeHessian
1032
            [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
1033
        else
1034
            [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:});
1035
        end
1036
        funEvals = funEvals + LSfunEvals;
1037
1038
    elseif LS < 6
1039
        % Find Point satisfying Wolfe
1040
1041
        if computeHessian
1042
            [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
1043
        else
1044
            [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:});
1045
        end
1046
        funEvals = funEvals + LSfunEvals;
1047
        x = x + t*d;
1048
1049
    else
1050
        % Use Matlab optim toolbox line search
1051
        [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=...
1052
            lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,...
1053
            tolX,[],[],[],varargin{:});
1054
        funEvals = funEvals + LSiter;
1055
        if isempty(t)
1056
            exitflag = -2;
1057
            msg = 'Matlab LineSearch failed';
1058
            break;
1059
        end
1060
1061
        if method >= NEWTON
1062
            [f_new,g_new,H] = funObj(x + t*d,varargin{:});
1063
            funEvals = funEvals + 1;
1064
        end
1065
        x = x + t*d;
1066
        f = f_new;
1067
        g = g_new;
1068
    end
1069
1070
    % Output iteration information
1071
    if verboseI
1072
        fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
1073
    end
1074
1075
    if logfile
1076
        fid = fopen(logfile, 'a');
1077
        if (fid > 0)
1078
            fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
1079
            fclose(fid);
1080
        end
1081
    end
1082
1083
    
1084
    % Output Function
1085
    if ~isempty(outputFcn)
1086
        callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
1087
    end
1088
1089
    % Update Trace
1090
    trace.fval(end+1,1) = f;
1091
    trace.funcCount(end+1,1) = funEvals;
1092
1093
    % Check Optimality Condition
1094
    if sum(abs(g)) <= tolFun
1095
        exitflag=1;
1096
        msg = 'Optimality Condition below TolFun';
1097
        break;
1098
    end
1099
1100
    % ******************* Check for lack of progress *******************
1101
1102
    if sum(abs(t*d)) <= tolX
1103
        exitflag=2;
1104
        msg = 'Step Size below TolX';
1105
        break;
1106
    end
1107
1108
1109
    if abs(f-f_old) < tolX
1110
        exitflag=2;
1111
        msg = 'Function Value changing by less than TolX';
1112
        break;
1113
    end
1114
1115
    % ******** Check for going over iteration/evaluation limit *******************
1116
1117
    if funEvals*funEvalMultiplier > maxFunEvals
1118
        exitflag = 0;
1119
        msg = 'Exceeded Maximum Number of Function Evaluations';
1120
        break;
1121
    end
1122
1123
    if i == maxIter
1124
        exitflag = 0;
1125
        msg='Exceeded Maximum Number of Iterations';
1126
        break;
1127
    end
1128
1129
end
1130
1131
if verbose
1132
    fprintf('%s\n',msg);
1133
end
1134
if nargout > 3
1135
    output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,...
1136
        'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
1137
end
1138
1139
% Output Function
1140
if ~isempty(outputFcn)
1141
    callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
1142
end
1143
1144
end
1145