|
a |
|
b/preprocessOfApneaECG/BioSigKit/Algorithms/projective.m |
|
|
1 |
function [px] = projective (x, t, d, m, r) |
|
|
2 |
% PROJECTIVE |
|
|
3 |
% |
|
|
4 |
% CALL: [px] = projective (x, t, d, m, r); |
|
|
5 |
% |
|
|
6 |
% INPUTS: |
|
|
7 |
% x: the vector containing the scalar observations |
|
|
8 |
% t: delay parameter used in embedding |
|
|
9 |
% d: embedding dimension to consider |
|
|
10 |
% m: dimensiona of null space |
|
|
11 |
% r: number of nearest neighbors to consider |
|
|
12 |
% |
|
|
13 |
% OUTPUT: |
|
|
14 |
% px: cleaned scalar observations |
|
|
15 |
% |
|
|
16 |
% copiright (c) and written by David Chelidze 4/20/2011 |
|
|
17 |
% [px] = projective (foetal_ecg(:,2), 1, 50, 45, 1500); |
|
|
18 |
|
|
|
19 |
if nargin < 5 |
|
|
20 |
error('need five input variables') |
|
|
21 |
end |
|
|
22 |
if nargin > 5 |
|
|
23 |
error('too many input variables, needs only five') |
|
|
24 |
end |
|
|
25 |
|
|
|
26 |
% check data and make a raw vector of scalar data |
|
|
27 |
if min(size(x)) ~= 1 |
|
|
28 |
error('first input should be a vector of scalar data points.') |
|
|
29 |
else |
|
|
30 |
x=x(:)'; % just the way we need it |
|
|
31 |
end |
|
|
32 |
px = x; |
|
|
33 |
|
|
|
34 |
n = length(x) - d*t; % maximum number of reconstructed points possible |
|
|
35 |
|
|
|
36 |
indx = 1:n; % index of points in the embedding |
|
|
37 |
y = zeros(d+1,n); |
|
|
38 |
dy = y; % initialize corrective array |
|
|
39 |
|
|
|
40 |
% reconstruct the phase space |
|
|
41 |
for i = 1:d+1 |
|
|
42 |
y(i,:) = x(indx+(i-1)*t); |
|
|
43 |
end |
|
|
44 |
|
|
|
45 |
% define the weighting matrix |
|
|
46 |
W = eye(d+1,d+1); W(1,1) = 10^3; W(d+1,d+1) = 10^3; |
|
|
47 |
|
|
|
48 |
fprintf('Partitioning data\n') |
|
|
49 |
[kd_tree, q] = kd_partition(y, 256); % partition into kd-tree |
|
|
50 |
|
|
|
51 |
fprintf('Filtering data\n') |
|
|
52 |
for k = 1:n % find the nearest neighbors and their properties |
|
|
53 |
[nns, ~, ~] = kd_search(y(:,k), r, kd_tree, y(:,q)); |
|
|
54 |
nns = nns{1}; |
|
|
55 |
for i = 1:d |
|
|
56 |
nns(i,:) = nns(i,:) - mean(nns(i,:)); % remove the mean |
|
|
57 |
end |
|
|
58 |
D = nns*nns.'/r; % find correlation matrix |
|
|
59 |
G = W*D*W; % transform it |
|
|
60 |
%[E,~] = eig(G); % assuming the MATLAB gives them in increasing order |
|
|
61 |
%E = E(:,end-1:end); |
|
|
62 |
%E = E(1:m); |
|
|
63 |
% if eig does not sort correctly use the following instead: |
|
|
64 |
[E,g] = eig(G); |
|
|
65 |
[~,srt] = sort(diag(g)); |
|
|
66 |
E = E(:,srt(1:m)); |
|
|
67 |
nns(:,1) = (nns(:,1) - mean(nns,2)); |
|
|
68 |
dy(:,k) = W\(E*E.'*W*nns(:,1)); % corrective vector |
|
|
69 |
dy(:,k) = mean(nns,2) + dy(:,k); |
|
|
70 |
px(i) = mean(dy(:,k)); |
|
|
71 |
end |
|
|
72 |
|
|
|
73 |
% clean the time series |
|
|
74 |
indx = [0:t:d*t, n+(0:t:d*t)]; |
|
|
75 |
for q = [1:d+1, d:-1:1] |
|
|
76 |
for l = (1+indx(q)):indx(q+1) |
|
|
77 |
cx = 0; |
|
|
78 |
for k = 1:q % go through all of possible corrections |
|
|
79 |
cx = cx + dy(k,l-(k-1)*t); % all possible corrections to x(l) |
|
|
80 |
end |
|
|
81 |
px(l) = x(l) - cx/q; % cleaned values |
|
|
82 |
end |
|
|
83 |
end |
|
|
84 |
|
|
|
85 |
%-------------------------------------------------------------------------- |
|
|
86 |
function [kd_tree, r] = kd_partition(y, b, c) |
|
|
87 |
% KD_PARTITION Create a kd-tree and partitioned database for efficiently |
|
|
88 |
% finding the nearest neighbors to a point in a |
|
|
89 |
% d-dimensional space. |
|
|
90 |
% |
|
|
91 |
% USE: [kd_tree, r] = kd_partition(y, b, c); |
|
|
92 |
% |
|
|
93 |
% INPUT: |
|
|
94 |
% y: original multivariate data (points arranged columnwise, size(y,1)=d) |
|
|
95 |
% b: maximum number of distinct points for each bin (default is 100) |
|
|
96 |
% c: minimum range of point cluster within a final leaf (default is 0) |
|
|
97 |
% |
|
|
98 |
% OUTPUT: |
|
|
99 |
% kd_tree structure with the following variables: |
|
|
100 |
% splitdim: dimension used in splitting the node |
|
|
101 |
% splitval: corresponding cutting point |
|
|
102 |
% first & last: indices of points in the node |
|
|
103 |
% left & right: node #s of consequent branches to the current node |
|
|
104 |
% r: sorted index of points in the original y corresponding to the leafs |
|
|
105 |
% |
|
|
106 |
% to find k-nearest neighbors use kd_search.m |
|
|
107 |
% |
|
|
108 |
% copyrighted (c) and written by David Chelidze, January 28, 2009. |
|
|
109 |
|
|
|
110 |
% check the inputs |
|
|
111 |
if nargin == 0 |
|
|
112 |
error('Need to input at least the data to partition') |
|
|
113 |
elseif nargin > 3 |
|
|
114 |
error('Too many inputs') |
|
|
115 |
end |
|
|
116 |
|
|
|
117 |
|
|
|
118 |
% initializes default variables if needed |
|
|
119 |
if nargin < 2 |
|
|
120 |
b = 100; |
|
|
121 |
end |
|
|
122 |
if nargin < 3 |
|
|
123 |
c = 0; |
|
|
124 |
end |
|
|
125 |
|
|
|
126 |
[d, n] = size(y); % get the dimension and the number of points in y |
|
|
127 |
|
|
|
128 |
r = 1:n; % initialize original index of points in y |
|
|
129 |
|
|
|
130 |
% initializes variables for the number of nodes and the last node |
|
|
131 |
node = 1; |
|
|
132 |
last = 1; |
|
|
133 |
|
|
|
134 |
% initializes the first node's cut dimension and value in the kd_tree |
|
|
135 |
kd_tree.splitdim = 0; |
|
|
136 |
kd_tree.splitval = 0; |
|
|
137 |
|
|
|
138 |
% initializes the bounds on the index of all points |
|
|
139 |
kd_tree.first = 1; |
|
|
140 |
kd_tree.last = n; |
|
|
141 |
|
|
|
142 |
% initializes location of consequent branches in the kd_tree |
|
|
143 |
kd_tree.left = 0; |
|
|
144 |
kd_tree.right = 0; |
|
|
145 |
|
|
|
146 |
while node <= last % do until the tree is complete |
|
|
147 |
|
|
|
148 |
% specify the index of all the points that are partitioned in this node |
|
|
149 |
segment = kd_tree.first(node):kd_tree.last(node); |
|
|
150 |
|
|
|
151 |
% determines range of data in each dimension and sorts it |
|
|
152 |
[rng, index] = sort(range(y(:,segment),2)); |
|
|
153 |
|
|
|
154 |
% now determine if this segment needs splitting (cutting) |
|
|
155 |
if rng(d) > c && length(segment)>= b % then split |
|
|
156 |
yt = y(:,segment); |
|
|
157 |
rt = r(segment); |
|
|
158 |
[sorted, sorted_index] = sort(yt(index(d),:)); |
|
|
159 |
% estimate where the cut should go |
|
|
160 |
lng = size(yt,2); |
|
|
161 |
cut = (sorted(ceil(lng/2)) + sorted(floor(lng/2+1)))/2; |
|
|
162 |
L = (sorted <= cut); % points to the left of cut |
|
|
163 |
if sum(L) == lng % right node is empty |
|
|
164 |
L = (sorted < cut); % decrease points on the left |
|
|
165 |
cut = (cut + max(sorted(L)))/2; % adjust the cut |
|
|
166 |
end |
|
|
167 |
|
|
|
168 |
% adjust the order of the data in this node |
|
|
169 |
y(:,segment) = yt(:,sorted_index); |
|
|
170 |
r(segment) = rt(sorted_index); |
|
|
171 |
|
|
|
172 |
% assign appropriate split dimension and split value |
|
|
173 |
kd_tree.splitdim(node) = index(d); |
|
|
174 |
kd_tree.splitval(node) = cut; |
|
|
175 |
|
|
|
176 |
% assign the location of consequent bins and |
|
|
177 |
kd_tree.left(node) = last + 1; |
|
|
178 |
kd_tree.right(node) = last + 2; |
|
|
179 |
|
|
|
180 |
% specify which is the last node at this moment |
|
|
181 |
last = last + 2; |
|
|
182 |
|
|
|
183 |
% initialize next nodes cut dimension and value in the kd_tree |
|
|
184 |
% assuming they are terminal at this point |
|
|
185 |
kd_tree.splitdim = [kd_tree.splitdim 0 0]; |
|
|
186 |
kd_tree.splitval = [kd_tree.splitval 0 0]; |
|
|
187 |
|
|
|
188 |
% initialize the bounds on the index of the next nodes |
|
|
189 |
kd_tree.first = [kd_tree.first segment(1) segment(1)+sum(L)]; |
|
|
190 |
kd_tree.last = [kd_tree.last segment(1)+sum(L)-1 segment(lng)]; |
|
|
191 |
|
|
|
192 |
% initialize location of consequent branches in the kd_tree |
|
|
193 |
% assuming they are terminal at this point |
|
|
194 |
kd_tree.left = [kd_tree.left 0 0]; |
|
|
195 |
kd_tree.right = [kd_tree.right 0 0]; |
|
|
196 |
|
|
|
197 |
end % the splitting process |
|
|
198 |
|
|
|
199 |
% increment the node |
|
|
200 |
node = node + 1; |
|
|
201 |
|
|
|
202 |
end % the partitioning |
|
|
203 |
|
|
|
204 |
%-------------------------------------------------------------------------- |
|
|
205 |
function [pqr, pqd, pqi] = kd_search(y,r,tree,yp) |
|
|
206 |
% KD_SEARCH search kd_tree for r nearest neighbors of point yq. |
|
|
207 |
% Need to partition original data using kd_partition.m |
|
|
208 |
% |
|
|
209 |
% USE: [pqr, pqd, pqi] = kd_search(y,r,tree,yp); |
|
|
210 |
% |
|
|
211 |
% INPUT: |
|
|
212 |
% y: array of query points in columnwise form |
|
|
213 |
% r: requested number of nearest neighbors to the query point yq |
|
|
214 |
% tree: kd_tree constructed using kd_partition.m |
|
|
215 |
% yp: partitioned (ordered) set of data that needs to be searched |
|
|
216 |
% (using my kd_partirion you want to input ym(:,indx), where ym |
|
|
217 |
% is the data used for partitioning and indx sorted index of ym) |
|
|
218 |
% |
|
|
219 |
% OUTPUT: |
|
|
220 |
% pqr: cell array of the r nearest neighbors of y in yp |
|
|
221 |
% pqd: cell array of the corresponding distances |
|
|
222 |
% pqi: cell array of the indices of r nearest neighbors of y in yp |
|
|
223 |
% |
|
|
224 |
% copyright (c) and written by David Chelidze, February 02, 2009. |
|
|
225 |
|
|
|
226 |
% check inputs |
|
|
227 |
if nargin < 4 |
|
|
228 |
error('Need four input variables to work') |
|
|
229 |
end |
|
|
230 |
|
|
|
231 |
% declare global variables for subfunctions |
|
|
232 |
global yq qri qr qrd finish b_lower b_upper |
|
|
233 |
|
|
|
234 |
[~, n] = size(y); |
|
|
235 |
pqr = cell(n,1); |
|
|
236 |
pqd = cell(n,1); |
|
|
237 |
pqi = cell(n,1); |
|
|
238 |
|
|
|
239 |
for k = 1:n |
|
|
240 |
yq = y(:,k); |
|
|
241 |
qrd = Inf*ones(1,r); % initialize array for r distances |
|
|
242 |
qr = zeros(size(yq,1),r); % initialize r nearest neighbor points |
|
|
243 |
qri = zeros(1,r); % initialize index of r nearest neighbors |
|
|
244 |
finish = 0; % becomes 1 after search is complete |
|
|
245 |
|
|
|
246 |
% set up the box bounds, which start at +/- infinity (whole kd space) |
|
|
247 |
b_upper = Inf*ones(size(yq)); |
|
|
248 |
b_lower = -b_upper; |
|
|
249 |
|
|
|
250 |
kdsrch(1,r,tree,yp); % start the search from the first node |
|
|
251 |
pqr{k} = qr; |
|
|
252 |
pqd{k} = sqrt(qrd); |
|
|
253 |
pqi{k} = qri; |
|
|
254 |
end |
|
|
255 |
|
|
|
256 |
%-------------------------------------------------------------------------- |
|
|
257 |
function kdsrch(node,r,tree,yp) |
|
|
258 |
% KDSRCH actual kd search |
|
|
259 |
% this drills down the tree to the end node and updates the |
|
|
260 |
% nearest neighbors list with new points |
|
|
261 |
% |
|
|
262 |
% INPUT: starting node number, and kd_partition data |
|
|
263 |
% |
|
|
264 |
% copyright (c) and written by David Chelidze, February 02, 2009. |
|
|
265 |
|
|
|
266 |
global yq qri qr qrd finish b_lower b_upper |
|
|
267 |
|
|
|
268 |
% first find the terminal node containing yq |
|
|
269 |
if tree.left(node) ~= 0 % not a terminal node, search deeper |
|
|
270 |
|
|
|
271 |
% first determin which child node to search |
|
|
272 |
if yq(tree.splitdim(node)) <= tree.splitval(node) |
|
|
273 |
% need to search left child node |
|
|
274 |
tmp = b_upper(tree.splitdim(node)); |
|
|
275 |
b_upper(tree.splitdim(node)) = tree.splitval(node); |
|
|
276 |
kdsrch(tree.left(node),r,tree,yp); |
|
|
277 |
b_upper(tree.splitdim(node)) = tmp; |
|
|
278 |
else % need to search the right child node |
|
|
279 |
tmp = b_lower(tree.splitdim(node)); |
|
|
280 |
b_lower(tree.splitdim(node)) = tree.splitval(node); |
|
|
281 |
kdsrch(tree.right(node),r,tree,yp); |
|
|
282 |
b_lower(tree.splitdim(node)) = tmp; |
|
|
283 |
end % when the terminal node (leaf) containing yq is reached |
|
|
284 |
if finish % done searching |
|
|
285 |
return |
|
|
286 |
end |
|
|
287 |
|
|
|
288 |
% check if other nodes need to be searched |
|
|
289 |
if yq(tree.splitdim(node)) <= tree.splitval(node) |
|
|
290 |
tmp = b_lower(tree.splitdim(node)); |
|
|
291 |
b_lower(tree.splitdim(node)) = tree.splitval(node); |
|
|
292 |
if overlap(yq, b_lower, b_upper, qrd(r)) |
|
|
293 |
% need to search the right child node |
|
|
294 |
kdsrch(tree.right(node),r,tree,yp); |
|
|
295 |
end |
|
|
296 |
b_lower(tree.splitdim(node)) = tmp; |
|
|
297 |
else |
|
|
298 |
tmp = b_upper(tree.splitdim(node)); |
|
|
299 |
b_upper(tree.splitdim(node)) = tree.splitval(node); |
|
|
300 |
if overlap(yq, b_lower, b_upper, qrd(r)) |
|
|
301 |
% need to search the left child node |
|
|
302 |
kdsrch(tree.left(node),r,tree,yp); |
|
|
303 |
end |
|
|
304 |
b_upper(tree.splitdim(node)) = tmp; |
|
|
305 |
end % when all the other nodes are searched |
|
|
306 |
if finish % done searching |
|
|
307 |
return |
|
|
308 |
end |
|
|
309 |
|
|
|
310 |
else % this is a terminal node: update the nearest neighbors |
|
|
311 |
|
|
|
312 |
yt = yp(:,tree.first(node):tree.last(node)); % all points in node |
|
|
313 |
dstnc = zeros(1,size(yt,2)); |
|
|
314 |
for k = 1:size(yt,1) |
|
|
315 |
dstnc = dstnc + (yt(k,:)-yq(k)).^2; |
|
|
316 |
end |
|
|
317 |
%dstnc = sum((yt-yq*ones(1,size(yt,2))).^2,1); % distances squared |
|
|
318 |
qrd = [qrd dstnc]; % current list of distances squared |
|
|
319 |
qr = [qr yt]; % current list of nearest neighbors |
|
|
320 |
qri = [qri tree.first(node):tree.last(node)]; |
|
|
321 |
[qrd, indx] = sort(qrd); % sorted distances squared and their index |
|
|
322 |
qr = qr(:,indx); % sorted list of nearest neighbors |
|
|
323 |
qri = qri(indx); % sorted list of indexes |
|
|
324 |
if size(qr,2) > r % truncate to the first r points |
|
|
325 |
qrd = qrd(1:r); |
|
|
326 |
qr = qr(:,1:r); |
|
|
327 |
qri = qri(1:r); |
|
|
328 |
end |
|
|
329 |
% be done if all points are with this box on the first run |
|
|
330 |
if within(yq, b_lower, b_upper, qrd(r)); |
|
|
331 |
finish = 1; |
|
|
332 |
end % otherwise (during backtracking) WITHIN will always return 0. |
|
|
333 |
|
|
|
334 |
end % if |
|
|
335 |
|
|
|
336 |
%-------------------------------------------------------------------------- |
|
|
337 |
function flag = within(yq, b_lower, b_upper, ball) |
|
|
338 |
% WITHIN check if additional nodes need to be searched (i.e. if the ball |
|
|
339 |
% centered at yq and containing all current nearest neighbors overlaps the |
|
|
340 |
% boundary of the leaf box containing yq) |
|
|
341 |
% |
|
|
342 |
% INPUT: |
|
|
343 |
% yq: query point |
|
|
344 |
% b_lower, b_upper: lower and upper bounds on the leaf box |
|
|
345 |
% ball: square of the radius of the ball centered at yq and containing |
|
|
346 |
% all current r nearest neighbors |
|
|
347 |
% OUTPUT: |
|
|
348 |
% flag: 1 if ball does not intersect the box, 0 if it does |
|
|
349 |
% |
|
|
350 |
% Modified by David Chelidze on 02/03/2009. |
|
|
351 |
|
|
|
352 |
if ball <= min([abs(yq-b_lower)', abs(yq-b_upper)'])^2 |
|
|
353 |
% ball containing all the current nn is inside the leaf box (finish) |
|
|
354 |
flag = 1; |
|
|
355 |
else % ball overlaps other leaf boxes (continue recursive search) |
|
|
356 |
flag = 0; |
|
|
357 |
end |
|
|
358 |
|
|
|
359 |
%-------------------------------------------------------------------------- |
|
|
360 |
function flag = overlap(yq, b_lower, b_upper, ball) |
|
|
361 |
% OVERLAP check if the current box overlaps with the ball centered at yq |
|
|
362 |
% and containing all current r nearest neighbors’. |
|
|
363 |
% |
|
|
364 |
% INPUT: |
|
|
365 |
% yq: query point |
|
|
366 |
% b_lower, b_upper: lower and upper bounds on the current box |
|
|
367 |
% ball: square of the radius of the ball centered at yq and containing |
|
|
368 |
% all current r nearest neighbors |
|
|
369 |
% OUTPUT: |
|
|
370 |
% flag: 0 if ball does not overlap the box, 1 if it does |
|
|
371 |
% |
|
|
372 |
% Modified by David Chelidze on 02/03/2009. |
|
|
373 |
|
|
|
374 |
il = find(yq < b_lower); % index of yq coordinates that are lower the box |
|
|
375 |
iu = find(yq > b_upper); % index of yq coordinates that are upper the box |
|
|
376 |
% distance squared from yq to the edge of the box |
|
|
377 |
dst = sum((yq(il)-b_lower(il)).^2,1)+sum((yq(iu)-b_upper(iu)).^2,1); |
|
|
378 |
if dst >= ball % there is no overlap (finish this search) |
|
|
379 |
flag = 0; |
|
|
380 |
else % there is overlap and the box needs to be searched for nn |
|
|
381 |
flag = 1; |
|
|
382 |
end |