--- a +++ b/eeglearn/utils.py @@ -0,0 +1,130 @@ +__author__ = 'Pouya Bashivan' +from __future__ import print_function + +import math as m +import numpy as np +np.random.seed(123) +import scipy.io +from sklearn.decomposition import PCA + + +def cart2sph(x, y, z): + """ + Transform Cartesian coordinates to spherical + :param x: X coordinate + :param y: Y coordinate + :param z: Z coordinate + :return: radius, elevation, azimuth + """ + x2_y2 = x**2 + y**2 + r = m.sqrt(x2_y2 + z**2) # r + elev = m.atan2(z, m.sqrt(x2_y2)) # Elevation + az = m.atan2(y, x) # Azimuth + return r, elev, az + + +def pol2cart(theta, rho): + """ + Transform polar coordinates to Cartesian + :param theta: angle value + :param rho: radius value + :return: X, Y + """ + return rho * m.cos(theta), rho * m.sin(theta) + + +def augment_EEG(data, stdMult, pca=False, n_components=2): + """ + Augment data by adding normal noise to each feature. + + :param data: EEG feature data as a matrix (n_samples x n_features) + :param stdMult: Multiplier for std of added noise + :param pca: if True will perform PCA on data and add noise proportional to PCA components. + :param n_components: Number of components to consider when using PCA. + :return: Augmented data as a matrix (n_samples x n_features) + """ + augData = np.zeros(data.shape) + if pca: + pca = PCA(n_components=n_components) + pca.fit(data) + components = pca.components_ + variances = pca.explained_variance_ratio_ + coeffs = np.random.normal(scale=stdMult, size=pca.n_components) * variances + for s, sample in enumerate(data): + augData[s, :] = sample + (components * coeffs.reshape((n_components, -1))).sum(axis=0) + else: + # Add Gaussian noise with std determined by weighted std of each feature + for f, feat in enumerate(data.transpose()): + augData[:, f] = feat + np.random.normal(scale=stdMult*np.std(feat), size=feat.size) + return augData + + +def augment_EEG_image(image, std_mult, pca=False, n_components=2): + """ + Augment data by adding normal noise to each feature. + + :param image: EEG feature data as a a colored image [n_samples, n_colors, W, H] + :param std_mult: Multiplier for std of added noise + :param pca: if True will perform PCA on data and add noise proportional to PCA components. + :param n_components: Number of components to consider when using PCA. + :return: Augmented data as a matrix (n_samples x n_features) + """ + augData = np.zeros((data.shape[0], data.shape[1], data.shape[2] * data.shape[3])) + for c in range(image.shape[1]): + reshData = np.reshape(data['featMat'][:, c, :, :], (data['featMat'].shape[0], -1)) + if pca: + augData[:, c, :] = augment_EEG(reshData, std_mult, pca=True, n_components=n_components) + else: + augData[:, c, :] = augment_EEG(reshData, std_mult, pca=False) + return np.reshape(augData, data['featMat'].shape) + + +def load_data(data_file): + """ + Loads the data from MAT file. MAT file should contain two + variables. 'featMat' which contains the feature matrix in the + shape of [samples, features] and 'labels' which contains the output + labels as a vector. Label numbers are assumed to start from 1. + + Parameters + ---------- + data_file: str + + Returns + ------- + data: array_like + """ + print("Loading data from %s" % (data_file)) + + dataMat = scipy.io.loadmat(data_file, mat_dtype=True) + + print("Data loading complete. Shape is %r" % (dataMat['featMat'].shape,)) + return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1 # Sequential indices + + +def reformatInput(data, labels, indices): + """ + Receives the the indices for train and test datasets. + Outputs the train, validation, and test data and label datasets. + """ + + trainIndices = indices[0][len(indices[1]):] + validIndices = indices[0][:len(indices[1])] + testIndices = indices[1] + # Shuffling training data + # shuffledIndices = np.random.permutation(len(trainIndices)) + # trainIndices = trainIndices[shuffledIndices] + if data.ndim == 4: + return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), + (data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), + (data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] + elif data.ndim == 5: + return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), + (data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), + (data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] + + +if __name__ == '__main__': + data = np.random.normal(size=(100, 10)) + print("Original: {0}".format(data)) + print("Augmented: {0}".format(augment_EEG(data, 0.1, pca=True))) \ No newline at end of file