Diff of /eeglearn/utils.py [000000] .. [f0f48e]

Switch to unified view

a b/eeglearn/utils.py
1
__author__ = 'Pouya Bashivan'
2
from __future__ import print_function
3
4
import math as m
5
import numpy as np
6
np.random.seed(123)
7
import scipy.io
8
from sklearn.decomposition import PCA
9
10
11
def cart2sph(x, y, z):
12
    """
13
    Transform Cartesian coordinates to spherical
14
    :param x: X coordinate
15
    :param y: Y coordinate
16
    :param z: Z coordinate
17
    :return: radius, elevation, azimuth
18
    """
19
    x2_y2 = x**2 + y**2
20
    r = m.sqrt(x2_y2 + z**2)                    # r
21
    elev = m.atan2(z, m.sqrt(x2_y2))            # Elevation
22
    az = m.atan2(y, x)                          # Azimuth
23
    return r, elev, az
24
25
26
def pol2cart(theta, rho):
27
    """
28
    Transform polar coordinates to Cartesian
29
    :param theta: angle value
30
    :param rho: radius value
31
    :return: X, Y
32
    """
33
    return rho * m.cos(theta), rho * m.sin(theta)
34
35
36
def augment_EEG(data, stdMult, pca=False, n_components=2):
37
    """
38
    Augment data by adding normal noise to each feature.
39
40
    :param data: EEG feature data as a matrix (n_samples x n_features)
41
    :param stdMult: Multiplier for std of added noise
42
    :param pca: if True will perform PCA on data and add noise proportional to PCA components.
43
    :param n_components: Number of components to consider when using PCA.
44
    :return: Augmented data as a matrix (n_samples x n_features)
45
    """
46
    augData = np.zeros(data.shape)
47
    if pca:
48
        pca = PCA(n_components=n_components)
49
        pca.fit(data)
50
        components = pca.components_
51
        variances = pca.explained_variance_ratio_
52
        coeffs = np.random.normal(scale=stdMult, size=pca.n_components) * variances
53
        for s, sample in enumerate(data):
54
            augData[s, :] = sample + (components * coeffs.reshape((n_components, -1))).sum(axis=0)
55
    else:
56
        # Add Gaussian noise with std determined by weighted std of each feature
57
        for f, feat in enumerate(data.transpose()):
58
            augData[:, f] = feat + np.random.normal(scale=stdMult*np.std(feat), size=feat.size)
59
    return augData
60
61
62
def augment_EEG_image(image, std_mult, pca=False, n_components=2):
63
    """
64
    Augment data by adding normal noise to each feature.
65
66
    :param image: EEG feature data as a a colored image [n_samples, n_colors, W, H]
67
    :param std_mult: Multiplier for std of added noise
68
    :param pca: if True will perform PCA on data and add noise proportional to PCA components.
69
    :param n_components: Number of components to consider when using PCA.
70
    :return: Augmented data as a matrix (n_samples x n_features)
71
    """
72
    augData = np.zeros((data.shape[0], data.shape[1], data.shape[2] * data.shape[3]))
73
    for c in range(image.shape[1]):
74
        reshData = np.reshape(data['featMat'][:, c, :, :], (data['featMat'].shape[0], -1))
75
        if pca:
76
            augData[:, c, :] = augment_EEG(reshData, std_mult, pca=True, n_components=n_components)
77
        else:
78
            augData[:, c, :] = augment_EEG(reshData, std_mult, pca=False)
79
    return np.reshape(augData, data['featMat'].shape)
80
81
82
def load_data(data_file):
83
    """
84
    Loads the data from MAT file. MAT file should contain two
85
    variables. 'featMat' which contains the feature matrix in the
86
    shape of [samples, features] and 'labels' which contains the output
87
    labels as a vector. Label numbers are assumed to start from 1.
88
89
    Parameters
90
    ----------
91
    data_file: str
92
93
    Returns
94
    -------
95
    data: array_like
96
    """
97
    print("Loading data from %s" % (data_file))
98
99
    dataMat = scipy.io.loadmat(data_file, mat_dtype=True)
100
101
    print("Data loading complete. Shape is %r" % (dataMat['featMat'].shape,))
102
    return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1   # Sequential indices
103
104
105
def reformatInput(data, labels, indices):
106
    """
107
    Receives the the indices for train and test datasets.
108
    Outputs the train, validation, and test data and label datasets.
109
    """
110
111
    trainIndices = indices[0][len(indices[1]):]
112
    validIndices = indices[0][:len(indices[1])]
113
    testIndices = indices[1]
114
    # Shuffling training data
115
    # shuffledIndices = np.random.permutation(len(trainIndices))
116
    # trainIndices = trainIndices[shuffledIndices]
117
    if data.ndim == 4:
118
        return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
119
                (data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
120
                (data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
121
    elif data.ndim == 5:
122
        return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
123
                (data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
124
                (data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
125
126
127
if __name__ == '__main__':
128
    data = np.random.normal(size=(100, 10))
129
    print("Original: {0}".format(data))
130
    print("Augmented: {0}".format(augment_EEG(data, 0.1, pca=True)))