a b/MLFre/gen_sync2.m
1
function [ X, y, beta, ind ] = gen_sync2( p, N, d, nns, ratio )
2
%% generate synthetic data with zero pair-wise correlation
3
4
nl = p./nns; % the lenght of nl is d+1; n[end] = 1 as we only has 
5
                      % one root node; nl[1] is the number of nodes at d
6
                      % layer; nl[2] is the number of nodes at d-1 layer
7
8
% construct sparse matrix to vectorize the computation
9
gind = zeros(d,p);
10
for l = 1:d
11
    gind(l,nns(l):nns(l):p) = 1;
12
    gind(l,:) = nl(l)+1-cumsum(gind(l,:),'reverse');
13
end
14
Gind = cell(1,d);
15
if nl(1)==p
16
    j=2;
17
else
18
    j=1;
19
end
20
for l=j:d
21
    Gind{l}=sparse(gind(l,:),1:p,ones(1,p),nl(l),p);
22
end
23
                      
24
% generate the data matrix
25
mu = zeros(1,p);
26
if p<=20000
27
    Mrow = repmat((1:p)',1,p);
28
    Mcolumn = repmat(1:p,p,1);
29
    SIGMA = (0.5*ones(p,p)).^abs(Mrow-Mcolumn);
30
else
31
    power = 50;
32
    nz = p+2*(power*p-(power+1)*power/2);
33
    Mrow = zeros(1,nz);
34
    Mcolumn = zeros(1,nz);
35
    s = zeros(1,nz);
36
    %count = 0;
37
    Mrow(1:p) = 1:p;
38
    Mcolumn(1:p) = 1:p;
39
    s(1:p) = 1;
40
    count = nnz(s);
41
    for i = 1:power
42
        Mrow(count+1:count+p-i) = 1:(p-i);
43
        Mcolumn(count+1:count+p-i) = (i+1):p;
44
        s(count+1:count+p-i) = 0.5^i;
45
        count = nnz(s);
46
        Mrow(count+1:count+p-i) = (i+1):p;
47
        Mcolumn(count+1:count+p-i) = 1:(p-i);
48
        s(count+1:count+p-i) = 0.5^i;
49
        count = nnz(s);
50
    end
51
    SIGMA=sparse(Mrow,Mcolumn,s,p,p,nz);
52
end
53
X = mvnrnd(mu,SIGMA,N);
54
55
% generate the response
56
T = true(p,1);
57
for l = 1:d
58
    if l==1&&nl(1)==p
59
        Tf = unifrnd(0,1,[p,1])<=ratio(l);
60
    else
61
        Tl = unifrnd(0,1,[nl(l),1])<=ratio(l);
62
        Tf = logical((Gind{l})'*Tl);
63
    end
64
    T = T&Tf;
65
end
66
67
68
beta = zeros(p,1);
69
beta(T) = normrnd(0,1,[nnz(T),1]);
70
71
y = X*beta + 0.01*normrnd(0,1,[N,1]);  
72
73
% construct ind
74
75
if nl(1) == p % if the nodes at the first layer only has one feature
76
    cumnl = [0,1,cumsum(nl(2:end))+1];
77
    ind = zeros(3,cumnl(end));
78
    ind(:,1)=[-1, -1, 1]';
79
    ind(:,end)=[1,p,0]';
80
    for i = 2:d
81
        ind(1,cumnl(i)+1:cumnl(i+1)) = 1:nns(i):p;
82
        ind(2,cumnl(i)+1:cumnl(i+1)) = ind(1,cumnl(i)+1:cumnl(i+1))+nns(i)-1;
83
        ind(3,cumnl(i)+1:cumnl(i+1)) = sqrt(nns(i));
84
    end
85
else
86
    cumnl = [0,cumsum(nl)];
87
    ind = zeros(3,cumnl(end));
88
    ind(:,end)=[1,p,0]';
89
    for i = 1:d
90
        ind(1,cumnl(i)+1:cumnl(i+1)) = 1:nns(i):p;
91
        ind(2,cumnl(i)+1:cumnl(i+1)) = ind(1,cumnl(i)+1:cumnl(i+1)) + nns(i);
92
        ind(3,cumnl(i)+1:cumnl(i+1)) = sqrt(nns(i));
93
    end
94
end
95
96
end
97