|
a |
|
b/DPC_MTFL_ZWZ.m |
|
|
1 |
function [ Sol, ind_zf, tsol ] = DPC_MTFL_ZWZ( Xs, ys, Lambda, opts ) |
|
|
2 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
|
3 |
%% Changed from the function DPC_MTFL, the difference is that the input Xs and ys are cells instead of matrix and vector |
|
|
4 |
%% Implementation of the sequential DPC rule proposed in |
|
|
5 |
% Safe Screening for Multi-Task Learning with Multiple Data Matrices |
|
|
6 |
% Jie Wang, Peter Wonka, and Jieping Ye. |
|
|
7 |
% |
|
|
8 |
%% input: |
|
|
9 |
% X: |
|
|
10 |
% stacked data matrix, each column corresponds to a feature |
|
|
11 |
% each row corresponds to a data instance |
|
|
12 |
% |
|
|
13 |
% y: |
|
|
14 |
% the response vector |
|
|
15 |
% |
|
|
16 |
% Lambda: |
|
|
17 |
% the parameters sequence |
|
|
18 |
% |
|
|
19 |
% opts: |
|
|
20 |
% settings for the solver |
|
|
21 |
%% output: |
|
|
22 |
% Sol: |
|
|
23 |
% the solution; the ith column corresponds to the solution |
|
|
24 |
% with the ith parameter in Lambda |
|
|
25 |
% |
|
|
26 |
% ind_zf: |
|
|
27 |
% the index of the discarded features; the ith column |
|
|
28 |
% refers to the solution of the ith value in Lambda |
|
|
29 |
% |
|
|
30 |
% t_sol: |
|
|
31 |
% the ith entry of ts_sol records the running time of the |
|
|
32 |
% solver after screening |
|
|
33 |
%% For any problem, please contact Jie Wang (jiewangustc@gmail.com) |
|
|
34 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
|
35 |
X = [];y = []; |
|
|
36 |
for idx_t = 1:length(ys) |
|
|
37 |
X = [X;Xs{idx_t}]; |
|
|
38 |
Xs{idx_t} = []; |
|
|
39 |
y = [y;ys{idx_t}]; |
|
|
40 |
ys{idx_t}=[]; |
|
|
41 |
end |
|
|
42 |
[N,d] = size(X); |
|
|
43 |
npar = length(Lambda); % number of parameters |
|
|
44 |
|
|
|
45 |
% ---------------------------- pass parameters -------------------------- % |
|
|
46 |
opts.init = 0; % set .init for warm start |
|
|
47 |
task_ind = opts.ind; |
|
|
48 |
stask = diff(task_ind); % size of each task |
|
|
49 |
T = length(stask); % number of tasks |
|
|
50 |
|
|
|
51 |
% --------------------- initialize the output --------------------------- % |
|
|
52 |
Sol = zeros(d,T,npar); |
|
|
53 |
ind_zf = false(d,npar); |
|
|
54 |
%rej_ratio = zeros(1,npar); |
|
|
55 |
|
|
|
56 |
% ------- construct sparse matrix to vectorize the computation ---------- % |
|
|
57 |
ft_ind = zeros(1,N); |
|
|
58 |
for i = 1:T |
|
|
59 |
ft_ind(1,task_ind(i)+1:task_ind(i+1))=i; % fg_ind(j) is the index of the group |
|
|
60 |
% which contains the jth feature |
|
|
61 |
end |
|
|
62 |
tS = sparse(ft_ind,1:N,ones(1,N),T,N,N); % ith row refers to the ith task, if the ith row |
|
|
63 |
% refers to the i_s to i_e features, then |
|
|
64 |
% tS(i,i_s:i_e) = 1. |
|
|
65 |
|
|
|
66 |
% ------ compute the norm of each feature of each task ||x_l^t||_2 ---------- % |
|
|
67 |
Xtnorm = sqrt(tS*(X.*X)); |
|
|
68 |
H = -2*Xtnorm; |
|
|
69 |
[xtnmx,xtnmxind] = max(Xtnorm); |
|
|
70 |
|
|
|
71 |
% --------------------------- compute lambda_max ------------------------ % |
|
|
72 |
Xtty = tS*(X.*repmat(y,1,d)); % Xtty is of T*d and Xtty(i,j) = <x_j^i,y_i> |
|
|
73 |
|
|
|
74 |
[lambda_max2, indmx] = max(sum(Xtty.*Xtty,1)); |
|
|
75 |
lambda_max = sqrt(lambda_max2); |
|
|
76 |
opts.lambda_max = lambda_max; |
|
|
77 |
|
|
|
78 |
if opts.rFlag == 1 |
|
|
79 |
opts.rFlag = 0; % the parameter value passing to the solver is its |
|
|
80 |
% absolute value rather than a ratio |
|
|
81 |
Lambda = Lambda*lambda_max; |
|
|
82 |
end |
|
|
83 |
|
|
|
84 |
% ----------------- sort the parameters in descend order ---------------- % |
|
|
85 |
[Lambdav,Lambda_ind] = sort(Lambda,'descend'); |
|
|
86 |
rLambdav = 1./Lambdav; |
|
|
87 |
|
|
|
88 |
% ----------------- solve MTFL sequentially with DPC ------------ % |
|
|
89 |
lambdap = lambda_max; |
|
|
90 |
tsol = zeros(1,npar); |
|
|
91 |
for i = 1:npar |
|
|
92 |
fprintf('in DPC step: %d\n',i); |
|
|
93 |
lambdac = Lambdav(1,i); |
|
|
94 |
if lambdac>=lambda_max |
|
|
95 |
%Sol(:,Lambda_ind(i)) = 0; % because Sol is initalized to be 0 |
|
|
96 |
ind_zf(:,Lambda_ind(i)) = true; |
|
|
97 |
%rej_ratio(Lambda_ind(i)) = 1; |
|
|
98 |
else |
|
|
99 |
if lambdap==lambda_max |
|
|
100 |
theta = y/lambda_max; |
|
|
101 |
indmx = indmx(1); |
|
|
102 |
nv = (tS'*Xtty(:,indmx)).*X(:,indmx); |
|
|
103 |
else |
|
|
104 |
theta = (y - sum(X.*(Sol(:,:,Lambda_ind(i-1))*tS)',2))*rlambdap; |
|
|
105 |
nv = y*rlambdap-theta; |
|
|
106 |
end |
|
|
107 |
|
|
|
108 |
rlambdac = rLambdav(1,i); |
|
|
109 |
|
|
|
110 |
nv = nv/norm(nv); |
|
|
111 |
rv = y*rlambdac-theta; |
|
|
112 |
Prv = rv - (nv'*rv)*nv; |
|
|
113 |
o = theta + 0.5*Prv; |
|
|
114 |
r = 0.5*norm(Prv); |
|
|
115 |
|
|
|
116 |
% ------- screening by DPC, remove the ith feature if T(i)=1 ----- % |
|
|
117 |
Xtto = tS*(X.*repmat(o,1,d)); |
|
|
118 |
q = -2*(Xtto.*Xtnorm); |
|
|
119 |
qpmx = -newton_qp(H,q,r,2*xtnmx,xtnmxind); |
|
|
120 |
Tmt = 1 > (sum(Xtto.*Xtto,1))' + qpmx + 1e-8; |
|
|
121 |
ind_zf(:,Lambda_ind(i)) = Tmt; |
|
|
122 |
nTmt = ~Tmt; |
|
|
123 |
Xr = X(:,nTmt); |
|
|
124 |
|
|
|
125 |
if lambdap == lambda_max |
|
|
126 |
opts.x0 = zeros(size(Xr,2),T); |
|
|
127 |
else |
|
|
128 |
opts.x0 = Sol(nTmt,:,Lambda_ind(i-1)); |
|
|
129 |
end |
|
|
130 |
|
|
|
131 |
% --- solve the group Lasso problem on the reduced data matrix -- % |
|
|
132 |
starts = tic; |
|
|
133 |
[x1, ~, ~]= mtLeastR(Xr, y, lambdac, opts); |
|
|
134 |
tsol(Lambda_ind(i)) = toc(starts); |
|
|
135 |
|
|
|
136 |
Sol(nTmt,:,Lambda_ind(i)) = x1; |
|
|
137 |
|
|
|
138 |
lambdap = lambdac; |
|
|
139 |
rlambdap = rlambdac; |
|
|
140 |
end |
|
|
141 |
end |
|
|
142 |
|
|
|
143 |
end |
|
|
144 |
|