--- a +++ b/MLFre/gen_sync2.m @@ -0,0 +1,97 @@ +function [ X, y, beta, ind ] = gen_sync2( p, N, d, nns, ratio ) +%% generate synthetic data with zero pair-wise correlation + +nl = p./nns; % the lenght of nl is d+1; n[end] = 1 as we only has + % one root node; nl[1] is the number of nodes at d + % layer; nl[2] is the number of nodes at d-1 layer + +% construct sparse matrix to vectorize the computation +gind = zeros(d,p); +for l = 1:d + gind(l,nns(l):nns(l):p) = 1; + gind(l,:) = nl(l)+1-cumsum(gind(l,:),'reverse'); +end +Gind = cell(1,d); +if nl(1)==p + j=2; +else + j=1; +end +for l=j:d + Gind{l}=sparse(gind(l,:),1:p,ones(1,p),nl(l),p); +end + +% generate the data matrix +mu = zeros(1,p); +if p<=20000 + Mrow = repmat((1:p)',1,p); + Mcolumn = repmat(1:p,p,1); + SIGMA = (0.5*ones(p,p)).^abs(Mrow-Mcolumn); +else + power = 50; + nz = p+2*(power*p-(power+1)*power/2); + Mrow = zeros(1,nz); + Mcolumn = zeros(1,nz); + s = zeros(1,nz); + %count = 0; + Mrow(1:p) = 1:p; + Mcolumn(1:p) = 1:p; + s(1:p) = 1; + count = nnz(s); + for i = 1:power + Mrow(count+1:count+p-i) = 1:(p-i); + Mcolumn(count+1:count+p-i) = (i+1):p; + s(count+1:count+p-i) = 0.5^i; + count = nnz(s); + Mrow(count+1:count+p-i) = (i+1):p; + Mcolumn(count+1:count+p-i) = 1:(p-i); + s(count+1:count+p-i) = 0.5^i; + count = nnz(s); + end + SIGMA=sparse(Mrow,Mcolumn,s,p,p,nz); +end +X = mvnrnd(mu,SIGMA,N); + +% generate the response +T = true(p,1); +for l = 1:d + if l==1&&nl(1)==p + Tf = unifrnd(0,1,[p,1])<=ratio(l); + else + Tl = unifrnd(0,1,[nl(l),1])<=ratio(l); + Tf = logical((Gind{l})'*Tl); + end + T = T&Tf; +end + + +beta = zeros(p,1); +beta(T) = normrnd(0,1,[nnz(T),1]); + +y = X*beta + 0.01*normrnd(0,1,[N,1]); + +% construct ind + +if nl(1) == p % if the nodes at the first layer only has one feature + cumnl = [0,1,cumsum(nl(2:end))+1]; + ind = zeros(3,cumnl(end)); + ind(:,1)=[-1, -1, 1]'; + ind(:,end)=[1,p,0]'; + for i = 2:d + ind(1,cumnl(i)+1:cumnl(i+1)) = 1:nns(i):p; + ind(2,cumnl(i)+1:cumnl(i+1)) = ind(1,cumnl(i)+1:cumnl(i+1))+nns(i)-1; + ind(3,cumnl(i)+1:cumnl(i+1)) = sqrt(nns(i)); + end +else + cumnl = [0,cumsum(nl)]; + ind = zeros(3,cumnl(end)); + ind(:,end)=[1,p,0]'; + for i = 1:d + ind(1,cumnl(i)+1:cumnl(i+1)) = 1:nns(i):p; + ind(2,cumnl(i)+1:cumnl(i+1)) = ind(1,cumnl(i)+1:cumnl(i+1)) + nns(i); + ind(3,cumnl(i)+1:cumnl(i+1)) = sqrt(nns(i)); + end +end + +end +