|
a |
|
b/eeglearn/utils.py |
|
|
1 |
__author__ = 'Pouya Bashivan' |
|
|
2 |
from __future__ import print_function |
|
|
3 |
|
|
|
4 |
import math as m |
|
|
5 |
import numpy as np |
|
|
6 |
np.random.seed(123) |
|
|
7 |
import scipy.io |
|
|
8 |
from sklearn.decomposition import PCA |
|
|
9 |
|
|
|
10 |
|
|
|
11 |
def cart2sph(x, y, z): |
|
|
12 |
""" |
|
|
13 |
Transform Cartesian coordinates to spherical |
|
|
14 |
:param x: X coordinate |
|
|
15 |
:param y: Y coordinate |
|
|
16 |
:param z: Z coordinate |
|
|
17 |
:return: radius, elevation, azimuth |
|
|
18 |
""" |
|
|
19 |
x2_y2 = x**2 + y**2 |
|
|
20 |
r = m.sqrt(x2_y2 + z**2) # r |
|
|
21 |
elev = m.atan2(z, m.sqrt(x2_y2)) # Elevation |
|
|
22 |
az = m.atan2(y, x) # Azimuth |
|
|
23 |
return r, elev, az |
|
|
24 |
|
|
|
25 |
|
|
|
26 |
def pol2cart(theta, rho): |
|
|
27 |
""" |
|
|
28 |
Transform polar coordinates to Cartesian |
|
|
29 |
:param theta: angle value |
|
|
30 |
:param rho: radius value |
|
|
31 |
:return: X, Y |
|
|
32 |
""" |
|
|
33 |
return rho * m.cos(theta), rho * m.sin(theta) |
|
|
34 |
|
|
|
35 |
|
|
|
36 |
def augment_EEG(data, stdMult, pca=False, n_components=2): |
|
|
37 |
""" |
|
|
38 |
Augment data by adding normal noise to each feature. |
|
|
39 |
|
|
|
40 |
:param data: EEG feature data as a matrix (n_samples x n_features) |
|
|
41 |
:param stdMult: Multiplier for std of added noise |
|
|
42 |
:param pca: if True will perform PCA on data and add noise proportional to PCA components. |
|
|
43 |
:param n_components: Number of components to consider when using PCA. |
|
|
44 |
:return: Augmented data as a matrix (n_samples x n_features) |
|
|
45 |
""" |
|
|
46 |
augData = np.zeros(data.shape) |
|
|
47 |
if pca: |
|
|
48 |
pca = PCA(n_components=n_components) |
|
|
49 |
pca.fit(data) |
|
|
50 |
components = pca.components_ |
|
|
51 |
variances = pca.explained_variance_ratio_ |
|
|
52 |
coeffs = np.random.normal(scale=stdMult, size=pca.n_components) * variances |
|
|
53 |
for s, sample in enumerate(data): |
|
|
54 |
augData[s, :] = sample + (components * coeffs.reshape((n_components, -1))).sum(axis=0) |
|
|
55 |
else: |
|
|
56 |
# Add Gaussian noise with std determined by weighted std of each feature |
|
|
57 |
for f, feat in enumerate(data.transpose()): |
|
|
58 |
augData[:, f] = feat + np.random.normal(scale=stdMult*np.std(feat), size=feat.size) |
|
|
59 |
return augData |
|
|
60 |
|
|
|
61 |
|
|
|
62 |
def augment_EEG_image(image, std_mult, pca=False, n_components=2): |
|
|
63 |
""" |
|
|
64 |
Augment data by adding normal noise to each feature. |
|
|
65 |
|
|
|
66 |
:param image: EEG feature data as a a colored image [n_samples, n_colors, W, H] |
|
|
67 |
:param std_mult: Multiplier for std of added noise |
|
|
68 |
:param pca: if True will perform PCA on data and add noise proportional to PCA components. |
|
|
69 |
:param n_components: Number of components to consider when using PCA. |
|
|
70 |
:return: Augmented data as a matrix (n_samples x n_features) |
|
|
71 |
""" |
|
|
72 |
augData = np.zeros((data.shape[0], data.shape[1], data.shape[2] * data.shape[3])) |
|
|
73 |
for c in range(image.shape[1]): |
|
|
74 |
reshData = np.reshape(data['featMat'][:, c, :, :], (data['featMat'].shape[0], -1)) |
|
|
75 |
if pca: |
|
|
76 |
augData[:, c, :] = augment_EEG(reshData, std_mult, pca=True, n_components=n_components) |
|
|
77 |
else: |
|
|
78 |
augData[:, c, :] = augment_EEG(reshData, std_mult, pca=False) |
|
|
79 |
return np.reshape(augData, data['featMat'].shape) |
|
|
80 |
|
|
|
81 |
|
|
|
82 |
def load_data(data_file): |
|
|
83 |
""" |
|
|
84 |
Loads the data from MAT file. MAT file should contain two |
|
|
85 |
variables. 'featMat' which contains the feature matrix in the |
|
|
86 |
shape of [samples, features] and 'labels' which contains the output |
|
|
87 |
labels as a vector. Label numbers are assumed to start from 1. |
|
|
88 |
|
|
|
89 |
Parameters |
|
|
90 |
---------- |
|
|
91 |
data_file: str |
|
|
92 |
|
|
|
93 |
Returns |
|
|
94 |
------- |
|
|
95 |
data: array_like |
|
|
96 |
""" |
|
|
97 |
print("Loading data from %s" % (data_file)) |
|
|
98 |
|
|
|
99 |
dataMat = scipy.io.loadmat(data_file, mat_dtype=True) |
|
|
100 |
|
|
|
101 |
print("Data loading complete. Shape is %r" % (dataMat['featMat'].shape,)) |
|
|
102 |
return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1 # Sequential indices |
|
|
103 |
|
|
|
104 |
|
|
|
105 |
def reformatInput(data, labels, indices): |
|
|
106 |
""" |
|
|
107 |
Receives the the indices for train and test datasets. |
|
|
108 |
Outputs the train, validation, and test data and label datasets. |
|
|
109 |
""" |
|
|
110 |
|
|
|
111 |
trainIndices = indices[0][len(indices[1]):] |
|
|
112 |
validIndices = indices[0][:len(indices[1])] |
|
|
113 |
testIndices = indices[1] |
|
|
114 |
# Shuffling training data |
|
|
115 |
# shuffledIndices = np.random.permutation(len(trainIndices)) |
|
|
116 |
# trainIndices = trainIndices[shuffledIndices] |
|
|
117 |
if data.ndim == 4: |
|
|
118 |
return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), |
|
|
119 |
(data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), |
|
|
120 |
(data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] |
|
|
121 |
elif data.ndim == 5: |
|
|
122 |
return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), |
|
|
123 |
(data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), |
|
|
124 |
(data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] |
|
|
125 |
|
|
|
126 |
|
|
|
127 |
if __name__ == '__main__': |
|
|
128 |
data = np.random.normal(size=(100, 10)) |
|
|
129 |
print("Original: {0}".format(data)) |
|
|
130 |
print("Augmented: {0}".format(augment_EEG(data, 0.1, pca=True))) |