Download this file

382 lines (330 with data), 13.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
function [px] = projective (x, t, d, m, r)
% PROJECTIVE
%
% CALL: [px] = projective (x, t, d, m, r);
%
% INPUTS:
% x: the vector containing the scalar observations
% t: delay parameter used in embedding
% d: embedding dimension to consider
% m: dimensiona of null space
% r: number of nearest neighbors to consider
%
% OUTPUT:
% px: cleaned scalar observations
%
% copiright (c) and written by David Chelidze 4/20/2011
% [px] = projective (foetal_ecg(:,2), 1, 50, 45, 1500);
if nargin < 5
error('need five input variables')
end
if nargin > 5
error('too many input variables, needs only five')
end
% check data and make a raw vector of scalar data
if min(size(x)) ~= 1
error('first input should be a vector of scalar data points.')
else
x=x(:)'; % just the way we need it
end
px = x;
n = length(x) - d*t; % maximum number of reconstructed points possible
indx = 1:n; % index of points in the embedding
y = zeros(d+1,n);
dy = y; % initialize corrective array
% reconstruct the phase space
for i = 1:d+1
y(i,:) = x(indx+(i-1)*t);
end
% define the weighting matrix
W = eye(d+1,d+1); W(1,1) = 10^3; W(d+1,d+1) = 10^3;
fprintf('Partitioning data\n')
[kd_tree, q] = kd_partition(y, 256); % partition into kd-tree
fprintf('Filtering data\n')
for k = 1:n % find the nearest neighbors and their properties
[nns, ~, ~] = kd_search(y(:,k), r, kd_tree, y(:,q));
nns = nns{1};
for i = 1:d
nns(i,:) = nns(i,:) - mean(nns(i,:)); % remove the mean
end
D = nns*nns.'/r; % find correlation matrix
G = W*D*W; % transform it
%[E,~] = eig(G); % assuming the MATLAB gives them in increasing order
%E = E(:,end-1:end);
%E = E(1:m);
% if eig does not sort correctly use the following instead:
[E,g] = eig(G);
[~,srt] = sort(diag(g));
E = E(:,srt(1:m));
nns(:,1) = (nns(:,1) - mean(nns,2));
dy(:,k) = W\(E*E.'*W*nns(:,1)); % corrective vector
dy(:,k) = mean(nns,2) + dy(:,k);
px(i) = mean(dy(:,k));
end
% clean the time series
indx = [0:t:d*t, n+(0:t:d*t)];
for q = [1:d+1, d:-1:1]
for l = (1+indx(q)):indx(q+1)
cx = 0;
for k = 1:q % go through all of possible corrections
cx = cx + dy(k,l-(k-1)*t); % all possible corrections to x(l)
end
px(l) = x(l) - cx/q; % cleaned values
end
end
%--------------------------------------------------------------------------
function [kd_tree, r] = kd_partition(y, b, c)
% KD_PARTITION Create a kd-tree and partitioned database for efficiently
% finding the nearest neighbors to a point in a
% d-dimensional space.
%
% USE: [kd_tree, r] = kd_partition(y, b, c);
%
% INPUT:
% y: original multivariate data (points arranged columnwise, size(y,1)=d)
% b: maximum number of distinct points for each bin (default is 100)
% c: minimum range of point cluster within a final leaf (default is 0)
%
% OUTPUT:
% kd_tree structure with the following variables:
% splitdim: dimension used in splitting the node
% splitval: corresponding cutting point
% first & last: indices of points in the node
% left & right: node #s of consequent branches to the current node
% r: sorted index of points in the original y corresponding to the leafs
%
% to find k-nearest neighbors use kd_search.m
%
% copyrighted (c) and written by David Chelidze, January 28, 2009.
% check the inputs
if nargin == 0
error('Need to input at least the data to partition')
elseif nargin > 3
error('Too many inputs')
end
% initializes default variables if needed
if nargin < 2
b = 100;
end
if nargin < 3
c = 0;
end
[d, n] = size(y); % get the dimension and the number of points in y
r = 1:n; % initialize original index of points in y
% initializes variables for the number of nodes and the last node
node = 1;
last = 1;
% initializes the first node's cut dimension and value in the kd_tree
kd_tree.splitdim = 0;
kd_tree.splitval = 0;
% initializes the bounds on the index of all points
kd_tree.first = 1;
kd_tree.last = n;
% initializes location of consequent branches in the kd_tree
kd_tree.left = 0;
kd_tree.right = 0;
while node <= last % do until the tree is complete
% specify the index of all the points that are partitioned in this node
segment = kd_tree.first(node):kd_tree.last(node);
% determines range of data in each dimension and sorts it
[rng, index] = sort(range(y(:,segment),2));
% now determine if this segment needs splitting (cutting)
if rng(d) > c && length(segment)>= b % then split
yt = y(:,segment);
rt = r(segment);
[sorted, sorted_index] = sort(yt(index(d),:));
% estimate where the cut should go
lng = size(yt,2);
cut = (sorted(ceil(lng/2)) + sorted(floor(lng/2+1)))/2;
L = (sorted <= cut); % points to the left of cut
if sum(L) == lng % right node is empty
L = (sorted < cut); % decrease points on the left
cut = (cut + max(sorted(L)))/2; % adjust the cut
end
% adjust the order of the data in this node
y(:,segment) = yt(:,sorted_index);
r(segment) = rt(sorted_index);
% assign appropriate split dimension and split value
kd_tree.splitdim(node) = index(d);
kd_tree.splitval(node) = cut;
% assign the location of consequent bins and
kd_tree.left(node) = last + 1;
kd_tree.right(node) = last + 2;
% specify which is the last node at this moment
last = last + 2;
% initialize next nodes cut dimension and value in the kd_tree
% assuming they are terminal at this point
kd_tree.splitdim = [kd_tree.splitdim 0 0];
kd_tree.splitval = [kd_tree.splitval 0 0];
% initialize the bounds on the index of the next nodes
kd_tree.first = [kd_tree.first segment(1) segment(1)+sum(L)];
kd_tree.last = [kd_tree.last segment(1)+sum(L)-1 segment(lng)];
% initialize location of consequent branches in the kd_tree
% assuming they are terminal at this point
kd_tree.left = [kd_tree.left 0 0];
kd_tree.right = [kd_tree.right 0 0];
end % the splitting process
% increment the node
node = node + 1;
end % the partitioning
%--------------------------------------------------------------------------
function [pqr, pqd, pqi] = kd_search(y,r,tree,yp)
% KD_SEARCH search kd_tree for r nearest neighbors of point yq.
% Need to partition original data using kd_partition.m
%
% USE: [pqr, pqd, pqi] = kd_search(y,r,tree,yp);
%
% INPUT:
% y: array of query points in columnwise form
% r: requested number of nearest neighbors to the query point yq
% tree: kd_tree constructed using kd_partition.m
% yp: partitioned (ordered) set of data that needs to be searched
% (using my kd_partirion you want to input ym(:,indx), where ym
% is the data used for partitioning and indx sorted index of ym)
%
% OUTPUT:
% pqr: cell array of the r nearest neighbors of y in yp
% pqd: cell array of the corresponding distances
% pqi: cell array of the indices of r nearest neighbors of y in yp
%
% copyright (c) and written by David Chelidze, February 02, 2009.
% check inputs
if nargin < 4
error('Need four input variables to work')
end
% declare global variables for subfunctions
global yq qri qr qrd finish b_lower b_upper
[~, n] = size(y);
pqr = cell(n,1);
pqd = cell(n,1);
pqi = cell(n,1);
for k = 1:n
yq = y(:,k);
qrd = Inf*ones(1,r); % initialize array for r distances
qr = zeros(size(yq,1),r); % initialize r nearest neighbor points
qri = zeros(1,r); % initialize index of r nearest neighbors
finish = 0; % becomes 1 after search is complete
% set up the box bounds, which start at +/- infinity (whole kd space)
b_upper = Inf*ones(size(yq));
b_lower = -b_upper;
kdsrch(1,r,tree,yp); % start the search from the first node
pqr{k} = qr;
pqd{k} = sqrt(qrd);
pqi{k} = qri;
end
%--------------------------------------------------------------------------
function kdsrch(node,r,tree,yp)
% KDSRCH actual kd search
% this drills down the tree to the end node and updates the
% nearest neighbors list with new points
%
% INPUT: starting node number, and kd_partition data
%
% copyright (c) and written by David Chelidze, February 02, 2009.
global yq qri qr qrd finish b_lower b_upper
% first find the terminal node containing yq
if tree.left(node) ~= 0 % not a terminal node, search deeper
% first determin which child node to search
if yq(tree.splitdim(node)) <= tree.splitval(node)
% need to search left child node
tmp = b_upper(tree.splitdim(node));
b_upper(tree.splitdim(node)) = tree.splitval(node);
kdsrch(tree.left(node),r,tree,yp);
b_upper(tree.splitdim(node)) = tmp;
else % need to search the right child node
tmp = b_lower(tree.splitdim(node));
b_lower(tree.splitdim(node)) = tree.splitval(node);
kdsrch(tree.right(node),r,tree,yp);
b_lower(tree.splitdim(node)) = tmp;
end % when the terminal node (leaf) containing yq is reached
if finish % done searching
return
end
% check if other nodes need to be searched
if yq(tree.splitdim(node)) <= tree.splitval(node)
tmp = b_lower(tree.splitdim(node));
b_lower(tree.splitdim(node)) = tree.splitval(node);
if overlap(yq, b_lower, b_upper, qrd(r))
% need to search the right child node
kdsrch(tree.right(node),r,tree,yp);
end
b_lower(tree.splitdim(node)) = tmp;
else
tmp = b_upper(tree.splitdim(node));
b_upper(tree.splitdim(node)) = tree.splitval(node);
if overlap(yq, b_lower, b_upper, qrd(r))
% need to search the left child node
kdsrch(tree.left(node),r,tree,yp);
end
b_upper(tree.splitdim(node)) = tmp;
end % when all the other nodes are searched
if finish % done searching
return
end
else % this is a terminal node: update the nearest neighbors
yt = yp(:,tree.first(node):tree.last(node)); % all points in node
dstnc = zeros(1,size(yt,2));
for k = 1:size(yt,1)
dstnc = dstnc + (yt(k,:)-yq(k)).^2;
end
%dstnc = sum((yt-yq*ones(1,size(yt,2))).^2,1); % distances squared
qrd = [qrd dstnc]; % current list of distances squared
qr = [qr yt]; % current list of nearest neighbors
qri = [qri tree.first(node):tree.last(node)];
[qrd, indx] = sort(qrd); % sorted distances squared and their index
qr = qr(:,indx); % sorted list of nearest neighbors
qri = qri(indx); % sorted list of indexes
if size(qr,2) > r % truncate to the first r points
qrd = qrd(1:r);
qr = qr(:,1:r);
qri = qri(1:r);
end
% be done if all points are with this box on the first run
if within(yq, b_lower, b_upper, qrd(r));
finish = 1;
end % otherwise (during backtracking) WITHIN will always return 0.
end % if
%--------------------------------------------------------------------------
function flag = within(yq, b_lower, b_upper, ball)
% WITHIN check if additional nodes need to be searched (i.e. if the ball
% centered at yq and containing all current nearest neighbors overlaps the
% boundary of the leaf box containing yq)
%
% INPUT:
% yq: query point
% b_lower, b_upper: lower and upper bounds on the leaf box
% ball: square of the radius of the ball centered at yq and containing
% all current r nearest neighbors
% OUTPUT:
% flag: 1 if ball does not intersect the box, 0 if it does
%
% Modified by David Chelidze on 02/03/2009.
if ball <= min([abs(yq-b_lower)', abs(yq-b_upper)'])^2
% ball containing all the current nn is inside the leaf box (finish)
flag = 1;
else % ball overlaps other leaf boxes (continue recursive search)
flag = 0;
end
%--------------------------------------------------------------------------
function flag = overlap(yq, b_lower, b_upper, ball)
% OVERLAP check if the current box overlaps with the ball centered at yq
% and containing all current r nearest neighbors.
%
% INPUT:
% yq: query point
% b_lower, b_upper: lower and upper bounds on the current box
% ball: square of the radius of the ball centered at yq and containing
% all current r nearest neighbors
% OUTPUT:
% flag: 0 if ball does not overlap the box, 1 if it does
%
% Modified by David Chelidze on 02/03/2009.
il = find(yq < b_lower); % index of yq coordinates that are lower the box
iu = find(yq > b_upper); % index of yq coordinates that are upper the box
% distance squared from yq to the edge of the box
dst = sum((yq(il)-b_lower(il)).^2,1)+sum((yq(iu)-b_upper(iu)).^2,1);
if dst >= ball % there is no overlap (finish this search)
flag = 0;
else % there is overlap and the box needs to be searched for nn
flag = 1;
end