|
a |
|
b/EEGLearn/utils.py |
|
|
1 |
#coding:utf-8 |
|
|
2 |
|
|
|
3 |
import numpy as np |
|
|
4 |
import math as m |
|
|
5 |
import os |
|
|
6 |
import scipy.io |
|
|
7 |
from scipy.interpolate import griddata |
|
|
8 |
from sklearn.preprocessing import scale |
|
|
9 |
from functools import reduce |
|
|
10 |
|
|
|
11 |
|
|
|
12 |
def cart2sph(x, y, z): |
|
|
13 |
""" |
|
|
14 |
Transform Cartesian coordinates to spherical |
|
|
15 |
:param x: X coordinate |
|
|
16 |
:param y: Y coordinate |
|
|
17 |
:param z: Z coordinate |
|
|
18 |
:return: radius, elevation, azimuth |
|
|
19 |
""" |
|
|
20 |
x2_y2 = x**2 + y**2 |
|
|
21 |
r = m.sqrt(x2_y2 + z**2) # r tant^(-1)(y/x) |
|
|
22 |
elev = m.atan2(z, m.sqrt(x2_y2)) # Elevation |
|
|
23 |
az = m.atan2(y, x) # Azimuth |
|
|
24 |
return r, elev, az |
|
|
25 |
|
|
|
26 |
|
|
|
27 |
def pol2cart(theta, rho): |
|
|
28 |
""" |
|
|
29 |
Transform polar coordinates to Cartesian |
|
|
30 |
:param theta: angle value |
|
|
31 |
:param rho: radius value |
|
|
32 |
:return: X, Y |
|
|
33 |
""" |
|
|
34 |
return rho * m.cos(theta), rho * m.sin(theta) |
|
|
35 |
|
|
|
36 |
def azim_proj(pos): |
|
|
37 |
""" |
|
|
38 |
Computes the Azimuthal Equidistant Projection of input point in 3D Cartesian Coordinates. |
|
|
39 |
Imagine a plane being placed against (tangent to) a globe. If |
|
|
40 |
a light source inside the globe projects the graticule onto |
|
|
41 |
the plane the result would be a planar, or azimuthal, map |
|
|
42 |
projection. |
|
|
43 |
|
|
|
44 |
:param pos: position in 3D Cartesian coordinates [x, y, z] |
|
|
45 |
:return: projected coordinates using Azimuthal Equidistant Projection |
|
|
46 |
""" |
|
|
47 |
[r, elev, az] = cart2sph(pos[0], pos[1], pos[2]) |
|
|
48 |
return pol2cart(az, m.pi / 2 - elev) |
|
|
49 |
|
|
|
50 |
|
|
|
51 |
def load_data(data_file, classification=True): |
|
|
52 |
""" |
|
|
53 |
Loads the data from MAT file. MAT file should contain two |
|
|
54 |
variables. 'featMat' which contains the feature matrix in the |
|
|
55 |
shape of [samples, features] and 'labels' which contains the output |
|
|
56 |
labels as a vector. Label numbers are assumed to start from 1. |
|
|
57 |
|
|
|
58 |
Parameters |
|
|
59 |
---------- |
|
|
60 |
data_file: str |
|
|
61 |
# load data from .mat [samples, (features:labels)] |
|
|
62 |
Returns |
|
|
63 |
------- |
|
|
64 |
data: array_like |
|
|
65 |
""" |
|
|
66 |
print("Loading data from %s" % (data_file)) |
|
|
67 |
dataMat = scipy.io.loadmat(data_file, mat_dtype=True) |
|
|
68 |
print("Data loading complete. Shape is %r" % (dataMat['features'].shape,)) |
|
|
69 |
if classification: |
|
|
70 |
return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1 |
|
|
71 |
else: |
|
|
72 |
return dataMat['features'][:, :-1], dataMat['features'][:, -1] |
|
|
73 |
|
|
|
74 |
|
|
|
75 |
def reformatInput(data, labels, indices): |
|
|
76 |
""" |
|
|
77 |
Receives the indices for train and test datasets. |
|
|
78 |
param indices: tuple of (train, test) index numbers |
|
|
79 |
Outputs the train, validation, and test data and label datasets. |
|
|
80 |
""" |
|
|
81 |
np.random.shuffle(indices[0]) |
|
|
82 |
np.random.shuffle(indices[0]) |
|
|
83 |
trainIndices = indices[0][len(indices[1]):] |
|
|
84 |
validIndices = indices[0][:len(indices[1])] |
|
|
85 |
testIndices = indices[1] |
|
|
86 |
|
|
|
87 |
if data.ndim == 4: |
|
|
88 |
return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), |
|
|
89 |
(data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), |
|
|
90 |
(data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] |
|
|
91 |
elif data.ndim == 5: |
|
|
92 |
return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), |
|
|
93 |
(data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), |
|
|
94 |
(data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] |
|
|
95 |
|
|
|
96 |
def iterate_minibatches(inputs, targets, batchsize, shuffle=False): |
|
|
97 |
""" |
|
|
98 |
Iterates over the samples returing batches of size batchsize. |
|
|
99 |
:param inputs: input data array. It should be a 4D numpy array for images [n_samples, n_colors, W, H] and 5D numpy |
|
|
100 |
array if working with sequence of images [n_timewindows, n_samples, n_colors, W, H]. |
|
|
101 |
:param targets: vector of target labels. |
|
|
102 |
:param batchsize: Batch size |
|
|
103 |
:param shuffle: Flag whether to shuffle the samples before iterating or not. |
|
|
104 |
:return: images and labels for a batch |
|
|
105 |
""" |
|
|
106 |
if inputs.ndim == 4: |
|
|
107 |
input_len = inputs.shape[0] |
|
|
108 |
elif inputs.ndim == 5: |
|
|
109 |
input_len = inputs.shape[1] |
|
|
110 |
assert input_len == len(targets) |
|
|
111 |
|
|
|
112 |
if shuffle: |
|
|
113 |
indices = np.arange(input_len) |
|
|
114 |
np.random.shuffle(indices) |
|
|
115 |
for start_idx in range(0, input_len, batchsize): |
|
|
116 |
if shuffle: |
|
|
117 |
excerpt = indices[start_idx:start_idx + batchsize] |
|
|
118 |
else: |
|
|
119 |
excerpt = slice(start_idx, start_idx + batchsize) |
|
|
120 |
if inputs.ndim == 4: |
|
|
121 |
yield inputs[excerpt], targets[excerpt] |
|
|
122 |
elif inputs.ndim == 5: |
|
|
123 |
yield inputs[:, excerpt], targets[excerpt] |
|
|
124 |
|
|
|
125 |
|
|
|
126 |
def gen_images(locs, features, n_gridpoints=32, normalize=True, edgeless=False): |
|
|
127 |
""" |
|
|
128 |
Generates EEG images given electrode locations in 2D space and multiple feature values for each electrode |
|
|
129 |
:param locs: An array with shape [n_electrodes, 2] containing X, Y |
|
|
130 |
coordinates for each electrode. |
|
|
131 |
:param features: Feature matrix as [n_samples, n_features] |
|
|
132 |
Features are as columns. |
|
|
133 |
Features corresponding to each frequency band are concatenated. |
|
|
134 |
(alpha1, alpha2, ..., beta1, beta2,...) |
|
|
135 |
:param n_gridpoints: Number of pixels in the output images |
|
|
136 |
:param normalize: Flag for whether to normalize each band over all samples |
|
|
137 |
:param edgeless: If True generates edgeless images by adding artificial channels |
|
|
138 |
at four corners of the image with value = 0 (default=False). |
|
|
139 |
:return: Tensor of size [samples, colors, W, H] containing generated |
|
|
140 |
images. |
|
|
141 |
""" |
|
|
142 |
feat_array_temp = [] |
|
|
143 |
nElectrodes = locs.shape[0] # Number of electrodes |
|
|
144 |
# Test whether the feature vector length is divisible by number of electrodes |
|
|
145 |
assert features.shape[1] % nElectrodes == 0 |
|
|
146 |
n_colors = features.shape[1] // nElectrodes |
|
|
147 |
for c in range(n_colors): |
|
|
148 |
feat_array_temp.append(features[:, c * nElectrodes : nElectrodes * (c+1)]) # features.shape为[samples, 3*nElectrodes] |
|
|
149 |
|
|
|
150 |
nSamples = features.shape[0] # sample number 2670 |
|
|
151 |
# Interpolate the values # print(np.mgrid[-1:1:5j]) get [-1. -0.5 0. 0.5 1. ] |
|
|
152 |
grid_x, grid_y = np.mgrid[ |
|
|
153 |
min(locs[:, 0]):max(locs[:, 0]):n_gridpoints*1j, |
|
|
154 |
min(locs[:, 1]):max(locs[:, 1]):n_gridpoints*1j |
|
|
155 |
] |
|
|
156 |
|
|
|
157 |
temp_interp = [] |
|
|
158 |
for c in range(n_colors): |
|
|
159 |
temp_interp.append(np.zeros([nSamples, n_gridpoints, n_gridpoints])) |
|
|
160 |
|
|
|
161 |
|
|
|
162 |
# Generate edgeless images |
|
|
163 |
if edgeless: |
|
|
164 |
min_x, min_y = np.min(locs, axis=0) |
|
|
165 |
max_x, max_y = np.max(locs, axis=0) |
|
|
166 |
locs = np.append(locs, np.array([[min_x, min_y], [min_x, max_y],[max_x, min_y],[max_x, max_y]]),axis=0) |
|
|
167 |
for c in range(n_colors): |
|
|
168 |
feat_array_temp[c] = np.append(feat_array_temp[c], np.zeros((nSamples, 4)), axis=1) |
|
|
169 |
|
|
|
170 |
# Interpolating |
|
|
171 |
for i in range(nSamples): |
|
|
172 |
for c in range(n_colors): |
|
|
173 |
temp_interp[c][i, :, :] = griddata(locs, feat_array_temp[c][i, :], (grid_x, grid_y), # cubic |
|
|
174 |
method='cubic', fill_value=np.nan) |
|
|
175 |
|
|
|
176 |
# Normalizing |
|
|
177 |
for c in range(n_colors): |
|
|
178 |
if normalize: |
|
|
179 |
temp_interp[c][~np.isnan(temp_interp[c])] = \ |
|
|
180 |
scale(temp_interp[c][~np.isnan(temp_interp[c])]) |
|
|
181 |
|
|
|
182 |
temp_interp[c] = np.nan_to_num(temp_interp[c]) |
|
|
183 |
|
|
|
184 |
temp_interp = np.swapaxes(np.asarray(temp_interp), 0, 1) # swap axes to have [samples, colors, W, H] # WH xy |
|
|
185 |
temp_interp = np.swapaxes(temp_interp, 1, 2) |
|
|
186 |
temp_interp = np.swapaxes(temp_interp, 2, 3) # [samples, W, H,colors] |
|
|
187 |
return temp_interp |
|
|
188 |
|
|
|
189 |
|
|
|
190 |
|
|
|
191 |
def load_or_generate_images(file_path, average_image=3): |
|
|
192 |
""" |
|
|
193 |
Generates EEG images |
|
|
194 |
:param average_image: average_image 1 for CNN model only, 2 for multi-frame model |
|
|
195 |
sucn as lstm, 3 for both. |
|
|
196 |
|
|
|
197 |
:return: Tensor of size [window_size, samples, W, H, channel] containing generated |
|
|
198 |
images. |
|
|
199 |
""" |
|
|
200 |
print('-'*100) |
|
|
201 |
print('Loading original data...') |
|
|
202 |
locs = scipy.io.loadmat('../SampleData/Neuroscan_locs_orig.mat') |
|
|
203 |
locs_3d = locs['A'] |
|
|
204 |
locs_2d = [] |
|
|
205 |
# Convert to 2D |
|
|
206 |
for e in locs_3d: |
|
|
207 |
locs_2d.append(azim_proj(e)) |
|
|
208 |
|
|
|
209 |
# Class labels should start from 0 |
|
|
210 |
feats, labels = load_data('../SampleData/FeatureMat_timeWin.mat') # 2670*1344 和 2670*1 |
|
|
211 |
|
|
|
212 |
|
|
|
213 |
if average_image == 1: # for CNN only |
|
|
214 |
if os.path.exists(file_path + 'images_average.mat'): |
|
|
215 |
images_average = scipy.io.loadmat(file_path + 'images_average.mat')['images_average'] |
|
|
216 |
print('\n') |
|
|
217 |
print('Load images_average done!') |
|
|
218 |
else: |
|
|
219 |
print('\n') |
|
|
220 |
print('Generating average images over time windows...') |
|
|
221 |
# Find the average response over time windows |
|
|
222 |
for i in range(7): |
|
|
223 |
if i == 0: |
|
|
224 |
temp = feats[:, i*192:(i+1)*192] # each window contains 64*3=192 data |
|
|
225 |
else: |
|
|
226 |
temp += feats[:, i*192:(i+1)*192] |
|
|
227 |
av_feats = temp / 7 |
|
|
228 |
images_average = gen_images(np.array(locs_2d), av_feats, 32, normalize=False) |
|
|
229 |
scipy.io.savemat( file_path+'images_average.mat', {'images_average':images_average}) |
|
|
230 |
print('Saving images_average done!') |
|
|
231 |
|
|
|
232 |
del feats |
|
|
233 |
images_average = images_average[np.newaxis,:] |
|
|
234 |
print('The shape of images_average.shape', images_average.shape) |
|
|
235 |
return images_average, labels |
|
|
236 |
|
|
|
237 |
elif average_image == 2: # for mulit-frame model such as LSTM |
|
|
238 |
if os.path.exists(file_path + 'images_timewin.mat'): |
|
|
239 |
images_timewin = scipy.io.loadmat(file_path + 'images_timewin.mat')['images_timewin'] |
|
|
240 |
print('\n') |
|
|
241 |
print('Load images_timewin done!') |
|
|
242 |
else: |
|
|
243 |
print('Generating images for all time windows...') |
|
|
244 |
images_timewin = np.array([ |
|
|
245 |
gen_images( |
|
|
246 |
np.array(locs_2d), |
|
|
247 |
feats[:, i*192:(i+1)*192], 32, normalize=False) for i in range(feats.shape[1]//192) |
|
|
248 |
]) |
|
|
249 |
scipy.io.savemat(file_path + 'images_timewin.mat', {'images_timewin':images_timewin}) |
|
|
250 |
print('Saving images for all time windows done!') |
|
|
251 |
|
|
|
252 |
del feats |
|
|
253 |
print('The shape of images_timewin is', images_timewin.shape) # (7, 2670, 32, 32, 3) |
|
|
254 |
return images_timewin, labels |
|
|
255 |
|
|
|
256 |
else: |
|
|
257 |
if os.path.exists(file_path + 'images_average.mat'): |
|
|
258 |
images_average = scipy.io.loadmat(file_path + 'images_average.mat')['images_average'] |
|
|
259 |
print('\n') |
|
|
260 |
print('Load images_average done!') |
|
|
261 |
else: |
|
|
262 |
print('\n') |
|
|
263 |
print('Generating average images over time windows...') |
|
|
264 |
# Find the average response over time windows |
|
|
265 |
for i in range(7): |
|
|
266 |
if i == 0: |
|
|
267 |
temp = feats[:, i*192:(i+1)*192] |
|
|
268 |
else: |
|
|
269 |
temp += feats[:, i*192:(i+1)*192] |
|
|
270 |
av_feats = temp / 7 |
|
|
271 |
images_average = gen_images(np.array(locs_2d), av_feats, 32, normalize=False) |
|
|
272 |
scipy.io.savemat( file_path+'images_average.mat', {'images_average':images_average}) |
|
|
273 |
print('Saving images_average done!') |
|
|
274 |
|
|
|
275 |
if os.path.exists(file_path + 'images_timewin.mat'): |
|
|
276 |
images_timewin = scipy.io.loadmat(file_path + 'images_timewin.mat')['images_timewin'] |
|
|
277 |
print('\n') |
|
|
278 |
print('Load images_timewin done!') |
|
|
279 |
else: |
|
|
280 |
print('\n') |
|
|
281 |
print('Generating images for all time windows...') |
|
|
282 |
images_timewin = np.array([ |
|
|
283 |
gen_images( |
|
|
284 |
np.array(locs_2d), |
|
|
285 |
feats[:, i*192:(i+1)*192], 32, normalize=False) for i in range(feats.shape[1]//192) |
|
|
286 |
]) |
|
|
287 |
scipy.io.savemat(file_path + 'images_timewin.mat', {'images_timewin':images_timewin}) |
|
|
288 |
print('Saving images for all time windows done!') |
|
|
289 |
|
|
|
290 |
del feats |
|
|
291 |
images_average = images_average[np.newaxis,:] |
|
|
292 |
print('The shape of labels.shape', labels.shape) |
|
|
293 |
print('The shape of images_average.shape', images_average.shape) # (1, 2670, 32, 32, 3) |
|
|
294 |
print('The shape of images_timewin is', images_timewin.shape) # (7, 2670, 32, 32, 3) |
|
|
295 |
return images_average, images_timewin, labels |