[422372]: / functions / studyfunc / std_rejectoutliers.m

Download this file

135 lines (126 with data), 6.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
% STD_REJECTOUTLIERS - Commandline function, to reject outlier component(s) from clusters.
% Reassign the outlier component(s) to an outlier cluster specific to each cluster.
% Usage:
% >> [STUDY] = std_rejectoutliers(STUDY, ALLEEG, clusters, th);
% Inputs:
% STUDY - EEGLAB STUDY set comprising some or all of the EEG datasets in ALLEEG.
% ALLEEG - global EEGLAB vector of EEG structures for the dataset(s) included in the STUDY.
% ALLEEG for a STUDY set is typically created using LOAD_ALLEEG.
% Optional inputs:
% clusters - [numeric vector| 'all' ] specific cluster numbers (or 'all' clusters), which outliers
% will be rejected from. {default:'all'}.
% th - [number] a threshold factor to select outliers. How far a component can be from the
% cluster centroid (in the cluster std multiples) before it will be considered as an outlier.
% Components that their distance from the cluster centroid are more than this factor
% times the cluster std (th *std) will be rejected. {default: 3}.
%
% Outputs:
% STUDY - the input STUDY set structure modified with the components reassignment,
% from the cluster to its outlier cluster.
%
% Example:
% >> clusters = [10 15]; th = 2;
% >> [STUDY] = std_rejectoutliers(STUDY, ALLEEG, clusters, th);
% Reject outlier components (that are more than 2 std from the cluster centroid) from cluster 10 and 15.
%
% See also pop_clustedit
%
% Authors: Hilit Serby, Arnaud Delorme, Scott Makeig, SCCN, INC, UCSD, July, 2005
% Copyright (C) Hilit Serby, SCCN, INC, UCSD, July 11, 2005, hilit@sccn.ucsd.edu
%
% This file is part of EEGLAB, see http://www.eeglab.org
% for the documentation and details.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
%
% 1. Redistributions of source code must retain the above copyright notice,
% this list of conditions and the following disclaimer.
%
% 2. Redistributions in binary form must reproduce the above copyright notice,
% this list of conditions and the following disclaimer in the documentation
% and/or other materials provided with the distribution.
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
% THE POSSIBILITY OF SUCH DAMAGE.
function STUDY = std_rejectoutliers(STUDY, ALLEEG, varargin)
cls = 2:length(STUDY.cluster); % all clusters in STUDY
th = 3; % The threshold factor - default: 3
if length(varargin) > 1
if isnumeric(varargin{1})
cls = varargin{1};
if isempty(cls)
cls = 2:length(STUDY.cluster);
end
else
if ischar(varargin{1}) && strcmpi(varargin{1}, 'all')
cls = 2:length(STUDY.cluster);
else
error('std_prejectoutliers: clusters input takes either specific clusters (numeric vector) or keyword ''all''.');
end
end
end
tmp =[];
for k = 1: length(cls)
% don't include 'Notclust' clusters
if ~strncmpi('Notclust',STUDY.cluster(cls(k)).name,8) && ~strncmpi('ParentCluster',STUDY.cluster(cls(k)).name,13)
tmp = [tmp cls(k)];
end
end
cls = tmp;
clear tmp
if length(varargin) == 2
if isnumeric(varargin{2})
th = varargin{2};
else
error('std_prejectoutliers: std input must be a numeric value.');
end
end
% Perform validity checks
for k = 1:length(cls)
% Cannot reject outlier components if cluster is a 'Notclust' or 'Outlier' cluster
if strncmpi('Notclust',STUDY.cluster(cls(k)).name,8) || strncmpi('Outliers',STUDY.cluster(cls(k)).name,8) || ...
strncmpi('ParentCluster', STUDY.cluster(cls(k)).name,13)
warndlg2('Cannot reject outlier components from a Notclust or Outliers cluster');
return;
end
% Cannot reject outlier components if cluster has children clusters
if ~isempty(STUDY.cluster(cls(k)).child)
warndlg2('Cannot reject outlier components if cluster has children clusters.');
return;
end
% If the PCA data matrix of the cluster components is empty (case of merged cluster)
if isempty(STUDY.cluster(cls(k)).preclust.preclustdata) % No preclustering information
warndlg2('Cannot reject outlier components if cluster was not a part of pre-clustering.');
return;
end
end
% For each of the clusters reject outlier components
for k = 1:length(cls)
% The PCA data matrix of the cluster components
clsPCA = STUDY.cluster(cls(k)).preclust.preclustdata;
% The cluster centroid
clsCentr = mean(clsPCA,1);
% The std of the cluster (based on the distances between all cluster components to the cluster centroid).
std_std = std(sum((clsPCA-ones(size(clsPCA,1),1)*clsCentr).^2,2),1);
outliers = [];
for l = 1:length(STUDY.cluster(cls(k)).comps)
compdist = sum((clsPCA(l,:) - clsCentr).^2); % Component distance from cluster centroid
if compdist > std_std * th % check if an outlier
outliers = [ outliers l];
end
end
% Move outlier to the outlier cluster
if ~isempty(outliers) % reject outliers if exist
STUDY = std_moveoutlier(STUDY, ALLEEG,cls(k) , outliers);
end
end