eeglab / Git / [422372] /functions/studyfunc/pop

Models:
ReneeD/
eeglab
Downloads: 1
[422372]: / functions / studyfunc / pop_clust.m
History
Download this file
424 lines (383 with data), 20.8 kB

% POP_CLUST - select and run a clustering algorithm on components from an EEGLAB STUDY 
%               structure of EEG datasets. Clustering data should be prepared beforehand using 
%               POP_PRECLUST and/or STD_PRECLUST. The number of clusters must be
%               specified in advance. If called in gui mode, the POP_CLUSTEDIT window
%               appears when the clustering is complete to display clustering results
%               and allow the user to review and edit them.
% Usage: 
%               >> STUDY = pop_clust( STUDY, ALLEEG); % pop up a graphic interface
%               >> STUDY = pop_clust( STUDY, ALLEEG, 'key1', 'val1', ...); % no pop-up
% Inputs:
%   STUDY       - an EEGLAB STUDY set containing some or all of the EEG sets in ALLEEG. 
%   ALLEEG      - a vector of loaded EEG dataset structures of all sets in the STUDY set.
%
% Optional Inputs:
%   'algorithm' - ['kmeans'|'kmeanscluster'|'Neural Network'] algorithm to be used for 
%                 clustering. The 'kmeans' options requires the statistical toolbox. The
%                 'kmeanscluster' option is included in EEGLAB. The 'Neural Network' 
%                  option requires the Matlab Neural Net toolbox {default: 'kmeans'} 
%   'clus_num'  - [integer] the number of desired clusters (must be > 1)
%                 {default: 20}. Not necessary when using Affinity Propagation algorithm 
%   'maxiter'   - maximum number of iterations when using Affinity Propagation algorithm 
%   'outliers'  - [integer] identify outliers further than the given number of standard
%                 deviations from any cluster centroid. Inf --> identify no such outliers.
%                 {default: Inf from the command line; 3 for 'kmeans' from the pop window}
%   'save'      - ['on' | 'off'] save the updated STUDY to disk {default: 'off'} 
%   'filename'  - [string] if save option is 'on', save the STUDY under this file name
%                    {default: current STUDY filename}
%   'filepath'  - [string] if save option is 'on', will save the STUDY in this directory 
%                    {default: current STUDY filepath}
% Outputs:
%   STUDY       - as input, but modified adding the clustering results.
%
% Graphic interface buttons:
%  "Clustering algorithm" - [list box] display/choose among the available clustering 
%                           algorithms. 
%  "Number of clusters to compute" - [edit box] the number of desired clusters (>2)
%  "Identify outliers"  - [check box] check to detect outliers. 
%  "Save STUDY"         - [check box] check to save the updated STUDY after clustering 
%                         is performed. If no file entered, overwrites the current STUDY. 
%
%  See also  POP_CLUSTEDIT, POP_PRECLUST, STD_PRECLUST, POP_CLUST
%
% Authors:  Hilit Serby & Arnaud Delorme, SCCN, INC, UCSD, October 11, 2004

% Copyright (C) Hilit Serby, SCCN, INC, UCSD, October 11, 2004, hilit@sccn.ucsd.edu
%
% This file is part of EEGLAB, see http://www.eeglab.org
% for the documentation and details.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
%
% 1. Redistributions of source code must retain the above copyright notice,
% this list of conditions and the following disclaimer.
%
% 2. Redistributions in binary form must reproduce the above copyright notice,
% this list of conditions and the following disclaimer in the documentation
% and/or other materials provided with the distribution.
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
% THE POSSIBILITY OF SUCH DAMAGE.

% Coding notes: Useful information on functions and global variables used.

function [STUDY, ALLEEG, command] = pop_clust(STUDY, ALLEEG, varargin)

command = '';
if nargin < 2
    help pop_clust;
    return;
end

if isempty(STUDY.etc)
    error('No pre-clustering information, pre-cluster first!');
end
if ~isfield(STUDY.etc, 'preclust')
    error('No pre-clustering information, pre-cluster first!');
end
if isempty(STUDY.etc.preclust)
    error('No pre-clustering information, pre-cluster first!');
end

% Check that path to the stats toolbox comes first (conflict with Fieldtrip)
flagstats = strcmp(regexp(which('kmeans'), '(?<=[\\/]toolbox[\\/])[^\\/]+', 'match', 'once'),'stats');
if ~flagstats
    kmeansPath = fileparts(which('kmeans'));
    rmpath(kmeansPath);
    addpath(kmeansPath);
end
        
if isempty(varargin) %GUI call
    
    % remove clusters below clustering level (done also after GUI)
    % --------------------------------------
    rmindex    = [];
    clustlevel = STUDY.etc.preclust.clustlevel;
    nameclustbase = STUDY.cluster(clustlevel).name;
    if clustlevel == 1
        rmindex = [2:length(STUDY.cluster)];
    else
        for index = 2:length(STUDY.cluster)
            if strcmpi(STUDY.cluster(index).parent{1}, nameclustbase) && ~strncmpi('Notclust',STUDY.cluster(index).name,8)
                rmindex = [ rmindex index ];
            end
        end;        
    end
    
    if length(STUDY.cluster) > 2 && ~isempty(rmindex)
        resp = questdlg2('Clustering again will delete the last clustering results', 'Warning', 'Cancel', 'Ok', 'Ok');
        if strcmpi(resp, 'cancel'), return; end
    end
    
	alg_options = {'Kmeans (stat. toolbox)' 'Neural Network (stat. toolbox)' 'Kmeanscluster (no toolbox)' 'Affinity Propagation' 'Optimal Kmeans' }; %'Hierarchical tree' 
	set_outliers = ['set(findobj(''parent'', gcbf, ''tag'', ''outliers_std''), ''enable'', fastif(get(gcbo, ''value''), ''on'', ''off''));'...
                            'set(findobj(''parent'', gcbf, ''tag'', ''std_txt''), ''enable'', fastif(get(gcbo, ''value''), ''on'', ''off''));']; 
                        
    algoptions = [ 'set(findobj(''parent'', gcbf, ''userdata'', ''kmeans''), ''enable'', fastif(get(gcbo, ''value'')==1, ''on'', ''off''), ''visible'', fastif(get(gcbo, ''value'')==1, ''on'', ''off''));' ...
        'if get(findobj(''parent'', gcbf, ''tag'', ''clust_algorithm''),''value'') == 4;' ... % Check for Affinity Propagation to disable dialogbox
        '    set(findobj(''parent'', gcbf, ''userdata'', ''clust_num''), ''enable'', ''off'', ''visible'', ''off'');' ...
        'else;' ...
        '    set(findobj(''parent'', gcbf, ''userdata'', ''clust_num''), ''enable'', ''on'', ''visible'', ''on'');' ...
        'end;' ...
        'if get(findobj(''parent'', gcbf, ''tag'', ''clust_algorithm''),''value'') == 5;' ... % Check for Optimal Kmeans to change text
        '    set(findobj(''parent'', gcbf, ''userdata'', ''clust_num_text''), ''string'', ''Find optimal clust count [min max]:'');' ...
        'elseif get(findobj(''parent'', gcbf, ''tag'', ''clust_algorithm''),''value'') == 4;' ...
        '    set(findobj(''parent'', gcbf, ''userdata'', ''clust_num_text''), ''string'', '' '');' ...
        'else;' ...
        '    set(findobj(''parent'', gcbf, ''userdata'', ''clust_num_text''), ''string'', ''Number of clusters to compute:'');' ...
        'end;'];

    saveSTUDY = [ 'set(findobj(''parent'', gcbf, ''userdata'', ''save''), ''enable'', fastif(get(gcbo, ''value'')==1, ''on'', ''off''));' ];
	browsesave = [ '[filename, filepath] = uiputfile2(''*.study'', ''Save STUDY with .study extension -- pop_clust()''); ' ... 
                      'set(findobj(''parent'', gcbf, ''tag'', ''studyfile''), ''string'', [filepath filename]);' ];
    if ~exist('kmeans'), valalg = 3; else valalg = 1; end
                  
    strclust = '';
    if STUDY.etc.preclust.clustlevel > length(STUDY.cluster)
        STUDY.etc.preclust.clustlevel = 1;
    end
    if STUDY.etc.preclust.clustlevel == 1
        strclust = [ 'Performing clustering on cluster ''' STUDY.cluster(STUDY.etc.preclust.clustlevel).name '''' ];
    else
        strclust = [ 'Performing sub-clustering on cluster ''' STUDY.cluster(STUDY.etc.preclust.clustlevel).name '''' ];
    end
    
    numClust = ceil(mean(cellfun(@length, { STUDY.datasetinfo.comps })));
    if numClust > 2, numClustStr = num2str(numClust);
    else             numClustStr = '10';
    end
    
clust_param = inputgui( { [1] [1] [1 1] [1 0.5 0.5 ] [ 1 0.5 0.5 ] }, ...
    { {'style' 'text'       'string' strclust 'fontweight' 'bold'  } {} ...
      {'style' 'text'       'string' 'Clustering algorithm:' } ...
      {'style' 'popupmenu'  'string' alg_options  'value' valalg 'tag' 'clust_algorithm'  'Callback' algoptions } ...
      {'style' 'text'       'string' 'Number of clusters to compute:' 'userdata' 'clust_num_text' } ...
      {'style' 'edit'       'string' numClustStr 'tag' 'clust_num' 'userdata' 'clust_num' } {} ...
      {'style' 'checkbox'   'string' 'Separate outliers (enter std.)' 'tag' 'outliers_on' 'value' 0 'Callback' set_outliers 'enable' 'on' } ...
      {'style' 'edit'       'string' '3' 'tag' 'outliers_std' 'enable' 'off' } {} },...
                            'pophelp(''pop_clust'')', 'Set clustering algorithm -- pop_clust()' , [] , 'normal', [ 1 .5 1 1 1]);

	if ~isempty(clust_param)
        
        % removing previous cluster information
        % -------------------------------------
        if ~isempty(rmindex)
            fprintf('Removing child clusters of ''%s''...\n', nameclustbase);
            STUDY.cluster(rmindex)          = [];
            STUDY.cluster(clustlevel).child = [];
            if clustlevel == 1 && length(STUDY.cluster) > 1
                STUDY.cluster(1).child = { STUDY.cluster(2).name }; % "Notclust" cluster
            end
        end
        
        clus_alg = alg_options{clust_param{1}};
        clus_num = str2num(clust_param{2});
        outliers_on = clust_param{3};
        stdval = clust_param{4};
                
        outliers = [];
        try
            clustdata = STUDY.etc.preclust.preclustdata;
        catch
            error('Error accessing preclustering data. Perform pre-clustering.');
        end
        command = '[STUDY] = pop_clust(STUDY, ALLEEG,';
        
        if ~isempty(findstr(clus_alg, 'Kmeanscluster')), clus_alg = 'kmeanscluster'; end
        if ~isempty(findstr(clus_alg, 'Kmeans ')), clus_alg = 'kmeans'; end
        if ~isempty(findstr(clus_alg, 'Neural ')), clus_alg = 'neural network'; end
        if ~isempty(findstr(clus_alg, 'Optimal ')), clus_alg = 'optimal_kmeans'; end

        % Cleaning cache
        STUDY.cache = [];
        
        disp('Clustering ...');
        
        switch clus_alg
            case { 'kmeans' 'kmeanscluster' 'optimal_kmeans'}
                command = sprintf('%s %s%s%s %d %s', command, '''algorithm'',''', clus_alg, ''',''clus_num'', ', clus_num, ',');
                if outliers_on
                    command = sprintf('%s %s %s %s', command, '''outliers'', ', stdval, ',');
                    [IDX,C,sumd,D,outliers] = robust_kmeans(clustdata,clus_num,str2num(stdval),5,lower(clus_alg));
                    [STUDY] = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'robust_kmeans', clus_num});
                else
                    if strcmpi(clus_alg, 'kmeans')
                        [IDX,C,sumd,D] = kmeans(clustdata,clus_num,'replicates',10,'emptyaction','drop');
                    elseif strcmpi(clus_alg, 'optimal_kmeans')
                        [IDX,C] = optimal_kmeans(clustdata,clus_num);
                    else
                        %[IDX,C,sumd,D] = kmeanscluster(clustdata,clus_num);
                        [C,IDX,sumd] =kmeans_st(real(clustdata),clus_num,150);
                    end
                    [STUDY] = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'Kmeans', clus_num});
                end    
         case 'Hierarchical tree'
             %[IDX,C] = hierarchical_tree(clustdata,clus_num);
             %[STUDY] = std_createclust(STUDY,IDX,C,  {'Neural Network', clus_num});
         case 'neural network'
             [IDX,C] = neural_net(clustdata,clus_num);
             [STUDY] = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm',  {'Neural Network', clus_num});
             command = sprintf('%s %s %d %s', command, '''algorithm'', ''Neural Network'',''clus_num'', ', clus_num, ',');
             
         case 'Affinity Propagation'
             command = sprintf('%s %s%s%s %d %s', command, '''algorithm'',''Affinity Propagation'',');
             plugin_askinstall('limo_eeg', 'apcluster');
             [IDX,C,sumd] = std_apcluster(clustdata,'maxits',200);
             [STUDY]      = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'Affinity Propagation',size(C,1)});
        end
        disp('Done.');
        
        % If save updated STUDY to disk
        save_on = 0; % old option to save STUDY
        if save_on
            command = sprintf('%s %s', command, '''save'', ''on'',');
            if ~isempty(clust_param{6})
                [filepath filename ext] = fileparts(clust_param{6});
                command = sprintf('%s%s%s%s%s%s', command, '''filename'', ''', [filename ext], ', ''filepath'', ''', filepath, ''');' );
                STUDY = pop_savestudy(STUDY, ALLEEG, 'filename', [filename ext], 'filepath', filepath);
              else
                command(end:end+1) = ');';
                if (~isempty(STUDY.filename)) && (~isempty(STUDY.filepath))
                    STUDY = pop_savestudy(STUDY, ALLEEG, 'filename', STUDY.filename, 'filepath', STUDY.filepath);
                else
                    STUDY = pop_savestudy(STUDY, ALLEEG);
                end
           end
       else
           command(end:end+1) = ');';
        end
           
       % Call menu to plot clusters (use EEGLAB menu which include std_envtopo)
       LASTCOM = command;
       eval([ get(findobj(findobj('tag', 'EEGLAB'), 'Label', 'Edit/plot component clusters'), 'callback') ] );
       %[STUDY com] = pop_clustedit(STUDY, ALLEEG); 
	end
    
else %command line call
    % remove clusters below clustering level (done also after GUI)
    % --------------------------------------
    rmindex    = [];
    clustlevel = STUDY.etc.preclust.clustlevel;
    nameclustbase = STUDY.cluster(clustlevel).name;
    if clustlevel == 1
        rmindex = [2:length(STUDY.cluster)];
    else
        for index = 2:length(STUDY.cluster)
            if strcmpi(STUDY.cluster(index).parent{1}, nameclustbase) && ~strncmpi('Notclust',STUDY.cluster(index).name,8)
                rmindex = [ rmindex index ];
            end
        end;        
    end
    if ~isempty(rmindex)
        fprintf('Removing child clusters of ''%s''...\n', nameclustbase);
        STUDY.cluster(rmindex)          = [];
        STUDY.cluster(clustlevel).child = [];
        if clustlevel == 1 && length(STUDY.cluster) > 1
            STUDY.cluster(1).child = { STUDY.cluster(2).name }; % "Notclust" cluster
        end
    end

    %default values
    algorithm = 'kmeans';
    clus_num  = 20;
    save     = 'off';
    filename = STUDY.filename;
    filepath = STUDY.filepath;
    outliers = Inf; % default std is Inf - no outliers
    maxiter  = 200;
    
    if mod(length(varargin),2) ~= 0
        error('pop_clust(): input variables must be specified in pairs: keywords, values');
    end
    
    for k = 1:2:length(varargin)
        switch(varargin{k})
            case 'algorithm'
                algorithm  = varargin{k+1};
            case 'clus_num'
                clus_num   = varargin{k+1};
            case 'outliers'
                outliers   =  varargin{k+1};                
            case 'save'
                save       = varargin{k+1};
            case 'filename' 
                filename   = varargin{k+1};
            case 'filepath'
                filepath   = varargin{k+1};
            case 'maxiter' 
                maxiter    = varargin{k+1};
        end
    end    
    if clus_num < 2
        clus_num = 2;
    end
    
    clustdata = STUDY.etc.preclust.preclustdata;
    switch lower(algorithm)
        case { 'kmeans' 'kmeanscluster' }
            if outliers == Inf
                if strcmpi(algorithm, 'kmeans')
                    [IDX,C,sumd,D] = kmeans(clustdata,clus_num,'replicates',10,'emptyaction','drop');
                else
                    [IDX,C,sumd,D] = kmeanscluster(clustdata,clus_num);
                end
                [STUDY] = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'Kmeans', clus_num});
            else
                [IDX,C,sumd,D,outliers] = robust_kmeans(clustdata,clus_num,outliers,5, algorithm);
                [STUDY] = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'robust_kmeans', clus_num});
            end
        case 'neural network'
            [IDX,C] = neural_net(clustdata,clus_num);
            [STUDY] = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm',  {'Neural Network', clus_num});
        case 'affinity propagation'           
             [IDX,C,sumd] = std_apcluster(clustdata,'maxits',maxiter);
             [STUDY]      = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'Affinity Propagation',size(C,1)});
        case 'optimal_kmeans'
             [IDX,C] = optimal_kmeans(clustdata,clus_num);
             [STUDY]      = std_createclust(STUDY, ALLEEG, 'clusterind', IDX, 'algorithm', {'Optimal Kmeans',size(C,1)});
        otherwise
            disp('pop_clust: unknown algorithm return');
            return
    end
            % If save updated STUDY to disk
    if strcmpi(save,'on')
        if (~isempty(STUDY.filename)) && (~isempty(STUDY.filepath))
            STUDY = pop_savestudy(STUDY, 'filename', STUDY.filename, 'filepath', STUDY.filepath);
        else
            STUDY = pop_savestudy(STUDY);
        end
   end       
end
STUDY.saved = 'no';


% IDX - index of cluster for each component. Ex: 63 components and 2
% clusters: IDX will be a 61x1 vector of 1 and 2 (and 0=outlisers)
% C - centroid for clusters. If 2 clusters, size will be 2 x
%     width of the preclustering matrix
function [STUDY] = std_createclust2_old(STUDY,IDX,C, algorithm)

% Find the next available cluster index
% -------------------------------------
clusters = [];
cls = size(C,1); % number of cluster = number of row of centroid matrix
nc  = 0; % index of last cluster 
for k =  1:length(STUDY.cluster)
    ti = strfind(STUDY.cluster(k).name, ' ');
    tmp = STUDY.cluster(k).name(ti(end) + 1:end);
    nc = max(nc,str2num(tmp));
    % check if there is a cluster of Notclust components
    if strcmp(STUDY.cluster(k).parent,STUDY.cluster(STUDY.etc.preclust.clustlevel).name) 
        STUDY.cluster(k).preclust.preclustparams = STUDY.etc.preclust.preclustparams;
        clusters = [clusters k];
    end
end
len = length(STUDY.cluster);

if ~isempty(find(IDX==0)) %outliers exist
    firstind = 0;
    nc  = nc + 1;
    len = len + 1;
else
    firstind = 1;
end

% create all clusters
% -------------------
for k = firstind:cls 
    
    % cluster name
    % ------------
    if k == 0
         STUDY.cluster(len).name   = [ 'outlier ' num2str(k+nc)];
    else STUDY.cluster(k+len).name = [ 'Cls ' num2str(k+nc)];
    end

    % find indices
    % ------------
    tmp = find(IDX==k); % IDX contains the cluster index for each component
    STUDY.cluster(k+len).sets  = STUDY.cluster(STUDY.etc.preclust.clustlevel).sets(:,tmp);
    STUDY.cluster(k+len).comps = STUDY.cluster(STUDY.etc.preclust.clustlevel).comps(tmp);
    STUDY.cluster(k+len).algorithm = algorithm;
    STUDY.cluster(k+len).parent{end+1} = STUDY.cluster(STUDY.etc.preclust.clustlevel).name;
    STUDY.cluster(k+len).child = [];
    STUDY.cluster(k+len).preclust.preclustdata = STUDY.etc.preclust.preclustdata(tmp,:);
    STUDY.cluster(k+len).preclust.preclustparams = STUDY.etc.preclust.preclustparams;
    STUDY.cluster(k+len).preclust.preclustcomps = STUDY.etc.preclust.preclustcomps;

    %update parents clusters with cluster child indices
    % -------------------------------------------------
    STUDY.cluster(STUDY.etc.preclust.clustlevel).child{end+1} = STUDY.cluster(k+nc).name;
end

clusters = [ clusters firstind+len:cls+len];%the new created clusters indices.