Diff of /EEGLearn/utils.py [000000] .. [117083]

Switch to unified view

a b/EEGLearn/utils.py
1
#coding:utf-8
2
3
import numpy as np
4
import math as m
5
import os
6
import scipy.io
7
from scipy.interpolate import griddata
8
from sklearn.preprocessing import scale
9
from functools import reduce
10
11
12
def cart2sph(x, y, z):
13
    """
14
    Transform Cartesian coordinates to spherical
15
    :param x: X coordinate
16
    :param y: Y coordinate
17
    :param z: Z coordinate
18
    :return: radius, elevation, azimuth
19
    """
20
    x2_y2 = x**2 + y**2
21
    r = m.sqrt(x2_y2 + z**2)                    # r     tant^(-1)(y/x)
22
    elev = m.atan2(z, m.sqrt(x2_y2))            # Elevation
23
    az = m.atan2(y, x)                          # Azimuth
24
    return r, elev, az
25
26
27
def pol2cart(theta, rho):
28
    """
29
    Transform polar coordinates to Cartesian 
30
    :param theta: angle value
31
    :param rho: radius value
32
    :return: X, Y
33
    """
34
    return rho * m.cos(theta), rho * m.sin(theta)
35
36
def azim_proj(pos):
37
    """
38
    Computes the Azimuthal Equidistant Projection of input point in 3D Cartesian Coordinates.
39
    Imagine a plane being placed against (tangent to) a globe. If
40
    a light source inside the globe projects the graticule onto
41
    the plane the result would be a planar, or azimuthal, map
42
    projection.
43
44
    :param pos: position in 3D Cartesian coordinates    [x, y, z]
45
    :return: projected coordinates using Azimuthal Equidistant Projection
46
    """
47
    [r, elev, az] = cart2sph(pos[0], pos[1], pos[2])
48
    return pol2cart(az, m.pi / 2 - elev)
49
50
51
def load_data(data_file, classification=True):
52
    """                                               
53
    Loads the data from MAT file. MAT file should contain two
54
    variables. 'featMat' which contains the feature matrix in the
55
    shape of [samples, features] and 'labels' which contains the output
56
    labels as a vector. Label numbers are assumed to start from 1.
57
58
    Parameters
59
    ----------
60
    data_file: str
61
                        # load data from .mat [samples, (features:labels)]
62
    Returns 
63
    -------
64
    data: array_like
65
    """
66
    print("Loading data from %s" % (data_file))
67
    dataMat = scipy.io.loadmat(data_file, mat_dtype=True)
68
    print("Data loading complete. Shape is %r" % (dataMat['features'].shape,))
69
    if classification:
70
        return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1
71
    else:
72
        return dataMat['features'][:, :-1], dataMat['features'][:, -1]
73
74
75
def reformatInput(data, labels, indices):
76
    """
77
    Receives the indices for train and test datasets.
78
    param indices: tuple of (train, test) index numbers
79
    Outputs the train, validation, and test data and label datasets.
80
    """
81
    np.random.shuffle(indices[0])
82
    np.random.shuffle(indices[0])
83
    trainIndices = indices[0][len(indices[1]):]
84
    validIndices = indices[0][:len(indices[1])]
85
    testIndices = indices[1]
86
87
    if data.ndim == 4:
88
        return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
89
                (data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
90
                (data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
91
    elif data.ndim == 5:
92
        return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
93
                (data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
94
                (data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
95
96
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
97
    """
98
    Iterates over the samples returing batches of size batchsize.
99
    :param inputs: input data array. It should be a 4D numpy array for images [n_samples, n_colors, W, H] and 5D numpy
100
                    array if working with sequence of images [n_timewindows, n_samples, n_colors, W, H].
101
    :param targets: vector of target labels.
102
    :param batchsize: Batch size
103
    :param shuffle: Flag whether to shuffle the samples before iterating or not.
104
    :return: images and labels for a batch
105
    """
106
    if inputs.ndim == 4:
107
        input_len = inputs.shape[0]
108
    elif inputs.ndim == 5:
109
        input_len = inputs.shape[1]
110
    assert input_len == len(targets)
111
112
    if shuffle:
113
        indices = np.arange(input_len)  
114
        np.random.shuffle(indices) 
115
    for start_idx in range(0, input_len, batchsize):
116
        if shuffle:
117
            excerpt = indices[start_idx:start_idx + batchsize]
118
        else:
119
            excerpt = slice(start_idx, start_idx + batchsize)
120
        if inputs.ndim == 4:
121
            yield inputs[excerpt], targets[excerpt]
122
        elif inputs.ndim == 5:
123
            yield inputs[:, excerpt], targets[excerpt]
124
125
126
def gen_images(locs, features, n_gridpoints=32, normalize=True, edgeless=False):
127
    """
128
    Generates EEG images given electrode locations in 2D space and multiple feature values for each electrode
129
    :param locs: An array with shape [n_electrodes, 2] containing X, Y
130
                        coordinates for each electrode.
131
    :param features: Feature matrix as [n_samples, n_features]
132
                                Features are as columns.
133
                                Features corresponding to each frequency band are concatenated.
134
                                (alpha1, alpha2, ..., beta1, beta2,...)
135
    :param n_gridpoints: Number of pixels in the output images
136
    :param normalize:   Flag for whether to normalize each band over all samples
137
    :param edgeless:    If True generates edgeless images by adding artificial channels
138
                        at four corners of the image with value = 0 (default=False).
139
    :return:            Tensor of size [samples, colors, W, H] containing generated
140
                        images.
141
    """
142
    feat_array_temp = []
143
    nElectrodes = locs.shape[0]     # Number of electrodes
144
    # Test whether the feature vector length is divisible by number of electrodes
145
    assert features.shape[1] % nElectrodes == 0
146
    n_colors = features.shape[1] // nElectrodes
147
    for c in range(n_colors):
148
        feat_array_temp.append(features[:, c * nElectrodes : nElectrodes * (c+1)])  # features.shape为[samples, 3*nElectrodes]
149
150
    nSamples = features.shape[0]    # sample number 2670
151
    # Interpolate the values        # print(np.mgrid[-1:1:5j]) get [-1.  -0.5  0.   0.5  1. ]
152
    grid_x, grid_y = np.mgrid[
153
                     min(locs[:, 0]):max(locs[:, 0]):n_gridpoints*1j,
154
                     min(locs[:, 1]):max(locs[:, 1]):n_gridpoints*1j
155
                     ]
156
    
157
    temp_interp = []
158
    for c in range(n_colors):
159
        temp_interp.append(np.zeros([nSamples, n_gridpoints, n_gridpoints]))
160
161
    
162
    # Generate edgeless images
163
    if edgeless:
164
        min_x, min_y = np.min(locs, axis=0)
165
        max_x, max_y = np.max(locs, axis=0)
166
        locs = np.append(locs, np.array([[min_x, min_y], [min_x, max_y],[max_x, min_y],[max_x, max_y]]),axis=0)
167
        for c in range(n_colors):
168
            feat_array_temp[c] = np.append(feat_array_temp[c], np.zeros((nSamples, 4)), axis=1)
169
    
170
    # Interpolating
171
    for i in range(nSamples):
172
        for c in range(n_colors):
173
            temp_interp[c][i, :, :] = griddata(locs, feat_array_temp[c][i, :], (grid_x, grid_y),    # cubic
174
                                    method='cubic', fill_value=np.nan)
175
    
176
    # Normalizing
177
    for c in range(n_colors):
178
        if normalize:
179
            temp_interp[c][~np.isnan(temp_interp[c])] = \
180
                scale(temp_interp[c][~np.isnan(temp_interp[c])])
181
        
182
        temp_interp[c] = np.nan_to_num(temp_interp[c])
183
        
184
    temp_interp = np.swapaxes(np.asarray(temp_interp), 0, 1)     # swap axes to have [samples, colors, W, H] # WH xy
185
    temp_interp = np.swapaxes(temp_interp, 1, 2)
186
    temp_interp = np.swapaxes(temp_interp, 2, 3)    # [samples, W, H,colors]
187
    return temp_interp
188
189
190
191
def load_or_generate_images(file_path, average_image=3):
192
    """
193
    Generates EEG images
194
    :param average_image: average_image 1 for CNN model only, 2 for multi-frame model 
195
                        sucn as lstm, 3 for both.
196
197
    :return:            Tensor of size [window_size, samples, W, H, channel] containing generated
198
                        images.
199
    """
200
    print('-'*100)
201
    print('Loading original data...')
202
    locs = scipy.io.loadmat('../SampleData/Neuroscan_locs_orig.mat')
203
    locs_3d = locs['A']
204
    locs_2d = []
205
    # Convert to 2D
206
    for e in locs_3d:
207
        locs_2d.append(azim_proj(e))
208
209
    # Class labels should start from 0
210
    feats, labels = load_data('../SampleData/FeatureMat_timeWin.mat')   # 2670*1344 和 2670*1
211
    
212
213
    if average_image == 1:   # for CNN only
214
        if os.path.exists(file_path + 'images_average.mat'):
215
            images_average = scipy.io.loadmat(file_path + 'images_average.mat')['images_average']
216
            print('\n')
217
            print('Load images_average done!')
218
        else:
219
            print('\n')
220
            print('Generating average images over time windows...')
221
            # Find the average response over time windows
222
            for i in range(7):
223
                if i == 0:
224
                    temp  = feats[:, i*192:(i+1)*192]    # each window contains 64*3=192 data
225
                else:
226
                    temp += feats[:, i*192:(i+1)*192]
227
            av_feats = temp / 7
228
            images_average = gen_images(np.array(locs_2d), av_feats, 32, normalize=False)
229
            scipy.io.savemat( file_path+'images_average.mat', {'images_average':images_average})
230
            print('Saving images_average done!')
231
        
232
        del feats
233
        images_average = images_average[np.newaxis,:]
234
        print('The shape of images_average.shape', images_average.shape)
235
        return images_average, labels
236
    
237
    elif average_image == 2:    # for mulit-frame model such as LSTM
238
        if os.path.exists(file_path + 'images_timewin.mat'):
239
            images_timewin = scipy.io.loadmat(file_path + 'images_timewin.mat')['images_timewin']
240
            print('\n')    
241
            print('Load images_timewin done!')
242
        else:
243
            print('Generating images for all time windows...')
244
            images_timewin = np.array([
245
                gen_images(
246
                    np.array(locs_2d),
247
                    feats[:, i*192:(i+1)*192], 32, normalize=False) for i in range(feats.shape[1]//192)
248
                ])
249
            scipy.io.savemat(file_path + 'images_timewin.mat', {'images_timewin':images_timewin})
250
            print('Saving images for all time windows done!')
251
        
252
        del feats
253
        print('The shape of images_timewin is', images_timewin.shape)   # (7, 2670, 32, 32, 3)
254
        return images_timewin, labels
255
    
256
    else:
257
        if os.path.exists(file_path + 'images_average.mat'):
258
            images_average = scipy.io.loadmat(file_path + 'images_average.mat')['images_average']
259
            print('\n')
260
            print('Load images_average done!')
261
        else:
262
            print('\n')
263
            print('Generating average images over time windows...')
264
            # Find the average response over time windows
265
            for i in range(7):
266
                if i == 0:
267
                    temp = feats[:, i*192:(i+1)*192]
268
                else:
269
                    temp += feats[:, i*192:(i+1)*192]
270
            av_feats = temp / 7
271
            images_average = gen_images(np.array(locs_2d), av_feats, 32, normalize=False)
272
            scipy.io.savemat( file_path+'images_average.mat', {'images_average':images_average})
273
            print('Saving images_average done!')
274
275
        if os.path.exists(file_path + 'images_timewin.mat'):
276
            images_timewin = scipy.io.loadmat(file_path + 'images_timewin.mat')['images_timewin']
277
            print('\n')    
278
            print('Load images_timewin done!')
279
        else:
280
            print('\n')
281
            print('Generating images for all time windows...')
282
            images_timewin = np.array([
283
                gen_images(
284
                    np.array(locs_2d),
285
                    feats[:, i*192:(i+1)*192], 32, normalize=False) for i in range(feats.shape[1]//192)
286
                ])
287
            scipy.io.savemat(file_path + 'images_timewin.mat', {'images_timewin':images_timewin})
288
            print('Saving images for all time windows done!')
289
290
        del feats
291
        images_average = images_average[np.newaxis,:]
292
        print('The shape of labels.shape', labels.shape)
293
        print('The shape of images_average.shape', images_average.shape)    # (1, 2670, 32, 32, 3)
294
        print('The shape of images_timewin is', images_timewin.shape)   # (7, 2670, 32, 32, 3)
295
        return images_average, images_timewin, labels