Diff of /DPC_MTFL_ZWZ.m [000000] .. [d8e26d]

Switch to unified view

a b/DPC_MTFL_ZWZ.m
1
function [ Sol, ind_zf, tsol ] = DPC_MTFL_ZWZ( Xs, ys, Lambda, opts )
2
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3
%% Changed from the function DPC_MTFL, the difference is that the input Xs and ys are cells instead of matrix and vector
4
%% Implementation of the sequential DPC rule proposed in
5
%  Safe Screening for Multi-Task Learning with Multiple Data Matrices
6
%  Jie Wang, Peter Wonka, and Jieping Ye. 
7
%
8
%% input: 
9
%         X: 
10
%            stacked data matrix, each column corresponds to a feature 
11
%            each row corresponds to a data instance
12
%
13
%         y: 
14
%            the response vector
15
%       
16
%         Lambda: 
17
%            the parameters sequence
18
%
19
%         opts: 
20
%            settings for the solver
21
%% output:
22
%         Sol: 
23
%              the solution; the ith column corresponds to the solution
24
%              with the ith parameter in Lambda
25
%
26
%         ind_zf: 
27
%              the index of the discarded features; the ith column
28
%              refers to the solution of the ith value in Lambda
29
%
30
%         t_sol: 
31
%              the ith entry of ts_sol records the running time of the
32
%              solver after screening
33
%% For any problem, please contact Jie Wang (jiewangustc@gmail.com)
34
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35
X = [];y = [];
36
for idx_t = 1:length(ys)
37
X = [X;Xs{idx_t}];
38
Xs{idx_t} = [];
39
y = [y;ys{idx_t}];
40
ys{idx_t}=[];
41
end
42
[N,d] = size(X);
43
npar = length(Lambda); % number of parameters
44
45
% ---------------------------- pass parameters -------------------------- %
46
opts.init = 0; % set .init for warm start
47
task_ind = opts.ind;
48
stask = diff(task_ind); % size of each task
49
T = length(stask); % number of tasks
50
51
% --------------------- initialize the output --------------------------- %
52
Sol = zeros(d,T,npar);
53
ind_zf = false(d,npar);
54
%rej_ratio = zeros(1,npar);
55
56
% ------- construct sparse matrix to vectorize the computation ---------- %
57
ft_ind = zeros(1,N);
58
for i = 1:T
59
    ft_ind(1,task_ind(i)+1:task_ind(i+1))=i; % fg_ind(j) is the index of the group 
60
                                     % which contains the jth feature
61
end
62
tS = sparse(ft_ind,1:N,ones(1,N),T,N,N); % ith row refers to the ith task, if the ith row
63
                   % refers to the i_s to i_e features, then
64
                   % tS(i,i_s:i_e) = 1.
65
66
% ------ compute the norm of each feature of each task ||x_l^t||_2 ---------- %  
67
Xtnorm = sqrt(tS*(X.*X));
68
H = -2*Xtnorm;
69
[xtnmx,xtnmxind] = max(Xtnorm);
70
71
% --------------------------- compute lambda_max ------------------------ %
72
Xtty = tS*(X.*repmat(y,1,d)); % Xtty is of T*d and Xtty(i,j) = <x_j^i,y_i>
73
74
[lambda_max2, indmx] = max(sum(Xtty.*Xtty,1));
75
lambda_max = sqrt(lambda_max2);
76
opts.lambda_max = lambda_max;
77
78
if opts.rFlag == 1
79
    opts.rFlag = 0; % the parameter value passing to the solver is its 
80
                       % absolute value rather than a ratio
81
    Lambda = Lambda*lambda_max;
82
end
83
84
% ----------------- sort the parameters in descend order ---------------- %
85
[Lambdav,Lambda_ind] = sort(Lambda,'descend');
86
rLambdav = 1./Lambdav;
87
88
% ----------------- solve MTFL sequentially with DPC ------------ %
89
lambdap = lambda_max;
90
tsol = zeros(1,npar);
91
for i = 1:npar
92
    fprintf('in DPC step: %d\n',i);
93
    lambdac = Lambdav(1,i);
94
    if lambdac>=lambda_max
95
        %Sol(:,Lambda_ind(i)) = 0; % because Sol is initalized to be 0
96
        ind_zf(:,Lambda_ind(i)) = true;     
97
        %rej_ratio(Lambda_ind(i)) = 1;
98
    else
99
        if lambdap==lambda_max
100
            theta = y/lambda_max;
101
            indmx = indmx(1);
102
            nv = (tS'*Xtty(:,indmx)).*X(:,indmx);
103
        else
104
            theta = (y - sum(X.*(Sol(:,:,Lambda_ind(i-1))*tS)',2))*rlambdap;
105
            nv = y*rlambdap-theta;
106
        end
107
        
108
        rlambdac = rLambdav(1,i);
109
        
110
        nv = nv/norm(nv);
111
        rv = y*rlambdac-theta;
112
        Prv = rv - (nv'*rv)*nv;
113
        o = theta + 0.5*Prv;
114
        r = 0.5*norm(Prv);
115
116
        % ------- screening by DPC, remove the ith feature if T(i)=1 ----- %
117
        Xtto = tS*(X.*repmat(o,1,d));
118
        q = -2*(Xtto.*Xtnorm);
119
        qpmx = -newton_qp(H,q,r,2*xtnmx,xtnmxind);
120
        Tmt = 1 > (sum(Xtto.*Xtto,1))' + qpmx + 1e-8;      
121
        ind_zf(:,Lambda_ind(i)) = Tmt;
122
        nTmt = ~Tmt;
123
        Xr = X(:,nTmt);
124
        
125
        if lambdap == lambda_max
126
            opts.x0 = zeros(size(Xr,2),T);
127
        else
128
            opts.x0 = Sol(nTmt,:,Lambda_ind(i-1));
129
        end
130
        
131
        % --- solve the group Lasso problem on the reduced data matrix -- %
132
        starts = tic;
133
        [x1, ~, ~]= mtLeastR(Xr, y, lambdac, opts);   
134
        tsol(Lambda_ind(i)) = toc(starts);
135
        
136
        Sol(nTmt,:,Lambda_ind(i)) = x1;
137
        
138
        lambdap = lambdac;
139
        rlambdap = rlambdac;
140
    end
141
end
142
143
end
144