Switch to unified view

a b/preprocessOfApneaECG/BioSigKit/Algorithms/projective.m
1
function [px] = projective (x, t, d, m, r) 
2
% PROJECTIVE  
3
%
4
% CALL: [px] = projective (x, t, d, m, r);
5
%
6
% INPUTS:
7
%   x: the vector containing the scalar observations
8
%   t: delay parameter used in embedding
9
%   d: embedding dimension to consider
10
%   m: dimensiona of null space
11
%   r: number of nearest neighbors to consider
12
%
13
% OUTPUT:
14
%   px: cleaned scalar observations
15
%
16
% copiright (c) and written by David Chelidze 4/20/2011
17
% [px] = projective (foetal_ecg(:,2), 1, 50, 45, 1500);
18
 
19
if nargin < 5
20
    error('need five input variables')
21
end
22
if nargin > 5
23
    error('too many input variables, needs only five')
24
end
25
 
26
% check data and make a raw vector of scalar data
27
if min(size(x)) ~= 1
28
   error('first input should be a vector of scalar data points.')
29
else
30
    x=x(:)'; % just the way we need it
31
end
32
px = x;
33
 
34
n = length(x) - d*t; % maximum number of reconstructed points possible
35
36
indx = 1:n; % index of points in the embedding
37
y = zeros(d+1,n);
38
dy = y; % initialize corrective array
39
40
% reconstruct the phase space
41
for i = 1:d+1
42
    y(i,:) = x(indx+(i-1)*t);
43
end
44
45
% define the weighting matrix
46
W = eye(d+1,d+1); W(1,1) = 10^3; W(d+1,d+1) = 10^3;
47
48
fprintf('Partitioning data\n')
49
[kd_tree, q] = kd_partition(y, 256); % partition into kd-tree
50
51
fprintf('Filtering data\n')
52
for k = 1:n % find the nearest neighbors and their properties
53
    [nns, ~, ~] = kd_search(y(:,k), r, kd_tree, y(:,q));
54
    nns = nns{1};
55
    for i = 1:d
56
        nns(i,:) = nns(i,:) - mean(nns(i,:)); % remove the mean
57
    end
58
    D = nns*nns.'/r; % find correlation matrix
59
    G = W*D*W; % transform it
60
    %[E,~] = eig(G); % assuming the MATLAB gives them in increasing order
61
    %E = E(:,end-1:end); 
62
    %E = E(1:m);
63
    % if eig does not sort correctly use the following instead:
64
    [E,g] = eig(G);
65
    [~,srt] = sort(diag(g));
66
    E = E(:,srt(1:m));
67
    nns(:,1) = (nns(:,1) - mean(nns,2));
68
    dy(:,k) = W\(E*E.'*W*nns(:,1)); % corrective vector
69
    dy(:,k) = mean(nns,2) + dy(:,k);
70
    px(i) = mean(dy(:,k));
71
end
72
73
% clean the time series
74
indx = [0:t:d*t, n+(0:t:d*t)];
75
for q = [1:d+1, d:-1:1]
76
    for l = (1+indx(q)):indx(q+1)
77
        cx = 0;
78
        for k = 1:q % go through all of possible corrections
79
        cx = cx + dy(k,l-(k-1)*t); % all possible corrections to x(l)
80
        end
81
        px(l) = x(l) -  cx/q; % cleaned values
82
    end
83
end
84
85
%--------------------------------------------------------------------------            
86
function [kd_tree, r] = kd_partition(y, b, c)
87
% KD_PARTITION  Create a kd-tree and partitioned database for efficiently 
88
%               finding the nearest neighbors to a point in a 
89
%               d-dimensional space.
90
%
91
% USE: [kd_tree, r] = kd_partition(y, b, c);
92
%
93
% INPUT:
94
%   y: original multivariate data (points arranged columnwise, size(y,1)=d)
95
%   b: maximum number of distinct points for each bin (default is 100)
96
%   c: minimum range of point cluster within a final leaf (default is 0)
97
%
98
% OUTPUT:
99
%   kd_tree structure with the following variables:
100
%       splitdim: dimension used in splitting the node
101
%       splitval: corresponding cutting point
102
%       first & last: indices of points in the node
103
%       left & right: node #s of consequent branches to the current node
104
%   r: sorted index of points in the original y corresponding to the leafs
105
%
106
% to find k-nearest neighbors use kd_search.m
107
%
108
% copyrighted (c) and written by David Chelidze, January 28, 2009.
109
 
110
% check the inputs
111
if nargin == 0
112
    error('Need to input at least the data to partition')
113
elseif nargin > 3
114
    error('Too many inputs')
115
end
116
 
117
 
118
% initializes default variables if needed
119
if nargin < 2
120
    b = 100;
121
end
122
if nargin < 3
123
    c = 0;
124
end
125
 
126
[d, n] = size(y); % get the dimension and the number of points in y
127
 
128
r = 1:n; % initialize original index of points in y
129
 
130
% initializes variables for the number of nodes and the last node
131
node = 1;
132
last = 1;
133
 
134
% initializes the first node's cut dimension and value in the kd_tree
135
kd_tree.splitdim = 0;
136
kd_tree.splitval = 0;
137
 
138
% initializes the bounds on the index of all points
139
kd_tree.first = 1;
140
kd_tree.last = n;
141
 
142
% initializes location of consequent branches in the kd_tree
143
kd_tree.left = 0;
144
kd_tree.right = 0;
145
146
while node <= last % do until the tree is complete
147
    
148
    % specify the index of all the points that are partitioned in this node
149
    segment = kd_tree.first(node):kd_tree.last(node);
150
    
151
    % determines range of data in each dimension and sorts it
152
    [rng, index] = sort(range(y(:,segment),2));
153
    
154
    % now determine if this segment needs splitting (cutting)
155
    if rng(d) > c && length(segment)>= b % then split
156
        yt = y(:,segment); 
157
        rt = r(segment);
158
        [sorted, sorted_index] = sort(yt(index(d),:));
159
        % estimate where the cut should go
160
        lng = size(yt,2);
161
        cut = (sorted(ceil(lng/2)) + sorted(floor(lng/2+1)))/2;
162
        L = (sorted <= cut); % points to the left of cut
163
        if sum(L) == lng % right node is empty
164
            L = (sorted < cut); % decrease points on the left
165
            cut = (cut + max(sorted(L)))/2; % adjust the cut
166
        end
167
        
168
        % adjust the order of the data in this node
169
        y(:,segment) = yt(:,sorted_index); 
170
        r(segment) = rt(sorted_index);
171
 
172
        % assign appropriate split dimension and split value
173
        kd_tree.splitdim(node) = index(d);
174
        kd_tree.splitval(node) = cut;
175
        
176
        % assign the location of consequent bins and 
177
        kd_tree.left(node) = last + 1;
178
        kd_tree.right(node) = last + 2;
179
        
180
        % specify which is the last node at this moment
181
        last = last + 2;
182
        
183
        % initialize next nodes cut dimension and value in the kd_tree
184
        % assuming they are terminal at this point
185
        kd_tree.splitdim = [kd_tree.splitdim 0 0];
186
        kd_tree.splitval = [kd_tree.splitval 0 0];
187
 
188
        % initialize the bounds on the index of the next nodes
189
        kd_tree.first = [kd_tree.first segment(1) segment(1)+sum(L)];
190
        kd_tree.last = [kd_tree.last segment(1)+sum(L)-1 segment(lng)];
191
 
192
        % initialize location of consequent branches in the kd_tree
193
        % assuming they are terminal at this point
194
        kd_tree.left = [kd_tree.left 0 0];
195
        kd_tree.right = [kd_tree.right 0 0];
196
        
197
    end % the splitting process
198
 
199
    % increment the node
200
    node = node + 1;
201
 
202
end % the partitioning
203
204
%--------------------------------------------------------------------------            
205
function [pqr, pqd, pqi] = kd_search(y,r,tree,yp)
206
% KD_SEARCH     search kd_tree for r nearest neighbors of point yq.
207
%               Need to partition original data using kd_partition.m
208
%
209
% USE: [pqr, pqd, pqi] = kd_search(y,r,tree,yp);
210
%
211
% INPUT:
212
%       y: array of query points in columnwise form
213
%       r: requested number of nearest neighbors to the query point yq
214
%       tree: kd_tree constructed using kd_partition.m
215
%       yp: partitioned (ordered) set of data that needs to be searched
216
%          (using my kd_partirion you want to input ym(:,indx), where ym 
217
%           is the data used for partitioning and indx sorted index of ym)
218
%
219
% OUTPUT:
220
%       pqr: cell array of the r nearest neighbors of y in yp 
221
%       pqd: cell array of the corresponding distances
222
%       pqi: cell array of the indices of r nearest neighbors of y in yp 
223
%
224
% copyright (c) and written by David Chelidze, February 02, 2009.
225
 
226
% check inputs
227
if nargin < 4
228
    error('Need four input variables to work')
229
end
230
 
231
% declare global variables for subfunctions
232
global yq qri qr qrd finish b_lower b_upper
233
 
234
[~, n] = size(y);
235
pqr = cell(n,1);
236
pqd = cell(n,1);
237
pqi = cell(n,1);
238
 
239
for k = 1:n
240
    yq = y(:,k);
241
    qrd = Inf*ones(1,r); % initialize array for r distances
242
    qr = zeros(size(yq,1),r); % initialize r nearest neighbor points
243
    qri = zeros(1,r); % initialize index of r nearest neighbors
244
    finish = 0; % becomes 1 after search is complete
245
 
246
    % set up the box bounds, which start at +/- infinity (whole kd space)
247
    b_upper = Inf*ones(size(yq));
248
    b_lower = -b_upper;
249
 
250
    kdsrch(1,r,tree,yp); % start the search from the first node
251
    pqr{k} = qr;
252
    pqd{k} = sqrt(qrd);
253
    pqi{k} = qri;
254
end
255
 
256
%--------------------------------------------------------------------------            
257
function kdsrch(node,r,tree,yp)
258
% KDSRCH    actual kd search
259
% this drills down the tree to the end node and updates the 
260
% nearest neighbors list with new points
261
%
262
% INPUT: starting node number, and kd_partition data
263
%
264
% copyright (c) and written by David Chelidze, February 02, 2009.
265
 
266
global yq qri qr qrd finish b_lower b_upper
267
 
268
% first find the terminal node containing yq
269
if tree.left(node) ~= 0 % not a terminal node, search deeper
270
 
271
    % first determin which child node to search
272
    if yq(tree.splitdim(node)) <= tree.splitval(node)
273
        % need to search left child node
274
        tmp = b_upper(tree.splitdim(node));
275
        b_upper(tree.splitdim(node)) = tree.splitval(node);
276
        kdsrch(tree.left(node),r,tree,yp);
277
        b_upper(tree.splitdim(node)) = tmp;
278
    else % need to search the right child node
279
        tmp = b_lower(tree.splitdim(node));
280
        b_lower(tree.splitdim(node)) = tree.splitval(node);
281
        kdsrch(tree.right(node),r,tree,yp);
282
        b_lower(tree.splitdim(node)) = tmp;
283
    end % when the terminal node (leaf) containing yq is reached
284
    if finish % done searching
285
        return
286
    end
287
 
288
    % check if other nodes need to be searched
289
    if yq(tree.splitdim(node)) <= tree.splitval(node)
290
        tmp = b_lower(tree.splitdim(node));
291
        b_lower(tree.splitdim(node)) = tree.splitval(node);
292
        if overlap(yq, b_lower, b_upper, qrd(r)) 
293
            % need to search the right child node
294
            kdsrch(tree.right(node),r,tree,yp);
295
        end
296
        b_lower(tree.splitdim(node)) = tmp;
297
    else
298
        tmp = b_upper(tree.splitdim(node));
299
        b_upper(tree.splitdim(node)) = tree.splitval(node);
300
        if overlap(yq, b_lower, b_upper, qrd(r)) 
301
            % need to search the left child node
302
            kdsrch(tree.left(node),r,tree,yp);
303
        end
304
        b_upper(tree.splitdim(node)) = tmp;
305
    end % when all the other nodes are searched
306
    if finish % done searching
307
        return
308
    end
309
 
310
else % this is a terminal node: update the nearest neighbors
311
 
312
    yt = yp(:,tree.first(node):tree.last(node)); % all points in node
313
    dstnc = zeros(1,size(yt,2));
314
    for k = 1:size(yt,1)
315
        dstnc = dstnc + (yt(k,:)-yq(k)).^2; 
316
    end
317
    %dstnc = sum((yt-yq*ones(1,size(yt,2))).^2,1); % distances squared
318
    qrd = [qrd dstnc]; % current list of distances squared
319
    qr = [qr yt]; % current list of nearest neighbors
320
    qri = [qri tree.first(node):tree.last(node)];
321
    [qrd, indx] = sort(qrd); % sorted distances squared and their index
322
    qr = qr(:,indx); % sorted list of nearest neighbors
323
    qri = qri(indx); % sorted list of indexes
324
    if size(qr,2) > r % truncate to the first r points
325
        qrd = qrd(1:r);
326
        qr = qr(:,1:r);
327
        qri = qri(1:r);
328
    end
329
    % be done if all points are with this box on the first run
330
    if within(yq, b_lower, b_upper, qrd(r));
331
        finish = 1;
332
    end % otherwise (during backtracking) WITHIN will always return 0.
333
    
334
end % if
335
 
336
%--------------------------------------------------------------------------            
337
function flag = within(yq, b_lower, b_upper, ball)
338
% WITHIN    check if additional nodes need to be searched (i.e. if the ball
339
%  centered at yq and containing all current nearest neighbors overlaps the
340
%  boundary of the leaf box containing yq)
341
%
342
% INPUT:
343
%   yq: query point
344
%   b_lower, b_upper: lower and upper bounds on the leaf box
345
%   ball: square of the radius of the ball centered at yq and containing
346
%         all current r nearest neighbors
347
% OUTPUT:
348
%   flag: 1 if ball does not intersect the box, 0 if it does
349
%
350
% Modified by David Chelidze on 02/03/2009.
351
 
352
if ball <= min([abs(yq-b_lower)', abs(yq-b_upper)'])^2
353
    % ball containing all the current nn is inside the leaf box (finish)
354
    flag = 1;
355
else % ball overlaps other leaf boxes (continue recursive search)
356
    flag = 0; 
357
end
358
 
359
%--------------------------------------------------------------------------            
360
function flag = overlap(yq, b_lower, b_upper, ball)
361
% OVERLAP   check if the current box overlaps with the ball centered at yq
362
%   and containing all current r nearest neighbors’.
363
%
364
% INPUT:
365
%   yq: query point
366
%   b_lower, b_upper: lower and upper bounds on the current box
367
%   ball: square of the radius of the ball centered at yq and containing
368
%         all current r nearest neighbors
369
% OUTPUT:
370
%   flag: 0 if ball does not overlap the box, 1 if it does
371
%
372
% Modified by David Chelidze on 02/03/2009.
373
 
374
il = find(yq < b_lower); % index of yq coordinates that are lower the box 
375
iu = find(yq > b_upper); % index of yq coordinates that are upper the box
376
% distance squared from yq to the edge of the box
377
dst = sum((yq(il)-b_lower(il)).^2,1)+sum((yq(iu)-b_upper(iu)).^2,1);
378
if dst >= ball % there is no overlap (finish this search)    
379
    flag = 0;
380
else % there is overlap and the box needs to be searched for nn
381
    flag = 1;
382
end