Diff of /train_pipeline_2.py [000000] .. [45bab5]

Switch to side-by-side view

--- a
+++ b/train_pipeline_2.py
@@ -0,0 +1,241 @@
+#train_pipeline
+from eeg_learn_functions import *
+import pandas as pd
+import numpy as np
+import scipy.stats as scs
+import re
+from numpy import genfromtxt
+
+from IPython.core.display import display, HTML
+display(HTML("<style>.container { width:100% !important; }</style>"))
+pd.options.display.max_columns = None
+pd.options.display.precision = 4
+
+theta = (4,8)
+alpha = (8,12)
+beta = (12,40)
+
+def get_fft(snippet):
+    Fs = 128.0;  # sampling rate
+    #Ts = len(snippet)/Fs/Fs; # sampling interval
+    snippet_time = len(snippet)/Fs
+    Ts = 1.0/Fs; # sampling interval
+    t = np.arange(0,snippet_time,Ts) # time vector
+
+    # ff = 5;   # frequency of the signal
+    # y = np.sin(2*np.pi*ff*t)
+    y = snippet
+#     print('Ts: ',Ts)
+#     print(t)
+#     print(y.shape)
+    n = len(y) # length of the signal
+    k = np.arange(n)
+    T = n/Fs
+    frq = k/T # two sides frequency range
+    frq = frq[range(n//2)] # one side frequency range
+
+    Y = np.fft.fft(y)/n # fft computing and normalization
+    Y = Y[range(n//2)]
+    #Added in: (To remove bias.)
+    #Y[0] = 0
+    return frq,abs(Y)
+
+def theta_alpha_beta_averages(f,Y):
+    theta_range = (4,8)
+    alpha_range = (8,12)
+    beta_range = (12,40)
+    theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean()
+    alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean()
+    beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean()
+    return theta, alpha, beta
+
+def make_steps(samples,frame_duration,overlap):
+    '''
+    in:
+    samples - number of samples in the session
+    frame_duration - frame duration in seconds
+    overlap - float fraction of frame to overlap in range (0,1)
+
+    out: list of tuple ranges
+    '''
+    #steps = np.arange(0,len(df),frame_length)
+    Fs = 128
+    i = 0
+    intervals = []
+    samples_per_frame = Fs * frame_duration
+    while i+samples_per_frame <= samples:
+        intervals.append((i,i+samples_per_frame))
+        i = i + samples_per_frame - int(samples_per_frame*overlap)
+    return intervals
+
+def make_frames(df,frame_duration):
+    '''
+    in: dataframe or array with all channels, frame duration in seconds
+    out: array of theta, alpha, beta averages for each probe for each time step
+        shape: (n-frames,m-probes,k-brainwave bands)
+    '''
+    Fs = 128.0
+    frame_length = Fs*frame_duration
+    frames = []
+    steps = make_steps(len(df),frame_duration,overlap)
+    for i,_ in enumerate(steps):
+        frame = []
+        if i == 0:
+            continue
+        else:
+            for channel in df.columns:
+                snippet = np.array(df.loc[steps[i][0]:steps[i][1],int(channel)])
+                f,Y =  get_fft(snippet)
+                theta, alpha, beta = theta_alpha_beta_averages(f,Y)
+                frame.append([theta, alpha, beta])
+
+        frames.append(frame)
+    return np.array(frames)
+
+locs_2d = [(-2.0,4.0),
+           (2.0,4.0),
+           (-1.0,3.0),
+           (1.0,3.0),
+           (-3.0,3.0),
+           (3.0,3.0),
+           (-2.0,2.0),
+           (2.0,2.0),
+           (-2.0,-2.0),
+           (2.0,-2.0),
+           (-4.0,1.0),
+           (4.0,1.0),
+           (-1.0,-3.0),
+           (1.0,-3.0)]
+
+def make_data_pipeline(file_names,labels,image_size,frame_duration,overlap):
+    '''
+    IN:
+    file_names - list of strings for each input file (one for each subject)
+    labels - list of labels for each
+    image_size - int size of output images in form (x, x)
+    frame_duration - time length of each frame (seconds)
+    overlap - float fraction of frame to overlap in range (0,1)
+
+    OUT:
+    X: np array of frames (unshuffled)
+    y: np array of label for each frame (1 or 0)
+    '''
+
+    Fs = 128.0   #sampling rate
+    frame_length = Fs * frame_duration
+
+    print('Generating training data...')
+
+
+    for i, file in enumerate(file_names):
+        print ('Processing session: ',file, '. (',i+1,' of ',len(file_names),')')
+        data = genfromtxt(file, delimiter=',').T
+        df = pd.DataFrame(data)
+
+        X_0 = make_frames(df,frame_duration)
+        #steps = np.arange(0,len(df),frame_length)
+        X_1 = X_0.reshape(len(X_0),14*3)
+
+        images = gen_images(np.array(locs_2d),X_1, image_size, normalize=False)
+        images = np.swapaxes(images, 1, 3)
+        print(len(images), ' frames generated with label ', labels[i], '.')
+        print('\n')
+        if i == 0:
+            X = images
+            y = np.ones(len(images))*labels[0]
+        else:
+            X = np.concatenate((X,images),axis = 0)
+            y = np.concatenate((y,np.ones(len(images))*labels[i]),axis = 0)
+
+
+    return X,np.array(y)
+
+file_names = ['data/ML101_KS.csv',
+              'data/ML101_US.csv',
+              'data/ML102_KS.csv',
+              'data/ML102_US.csv',
+              'data/ML103_KS.csv',
+              'data/ML103_US.csv',
+              'data/ML104_KS.csv',
+              'data/ML104_US.csv',
+              'data/ML105_KS.csv',
+              'data/ML105_US.csv',
+              'data/ML106_KS.csv',
+              'data/ML106_US.csv',
+              'data/ML107_KS.csv',
+              'data/ML107_US.csv',
+              'data/ML108_KS.csv',
+              'data/ML108_US.csv']
+labels = [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]
+image_size = 28
+frame_duration = 1.0
+overlap = 0.5
+X, y = make_data_pipeline(file_names,labels,image_size,frame_duration,overlap)
+
+from sklearn.model_selection import train_test_split
+x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20,shuffle=True)
+
+# input image dimensions
+img_rows, img_cols = 28, 28
+
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_test.shape[0], 'test samples')
+
+input_shape = (img_rows, img_cols, 3)
+
+import keras
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation, Flatten
+from keras.layers import Conv2D, MaxPooling2D
+from keras.utils import np_utils
+
+batch_size = 128
+num_classes = 2
+epochs = 500
+
+# convert class vectors to binary class matrices
+y_train = np_utils.to_categorical(y_train, num_classes)
+y_test = np_utils.to_categorical(y_test, num_classes)
+
+
+model = Sequential()
+model.add(Conv2D(32, (3, 3), padding='same',input_shape=input_shape))
+model.add(Activation('relu'))
+model.add(Conv2D(32, (3, 3)))
+model.add(Activation('relu'))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+model.add(Conv2D(32, (3, 3)))
+model.add(Activation('relu'))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+model.add(Conv2D(32, (3, 3)))
+model.add(Activation('relu'))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+#model.add(Dropout(0.25))
+
+model.add(Flatten())
+model.add(Dense(10))
+model.add(Activation('relu'))
+#model.add(Dropout(0.5))
+model.add(Dense(num_classes))
+model.add(Activation('softmax'))
+
+# initiate RMSprop optimizer
+opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)
+
+# Let's train the model using RMSprop
+model.compile(loss='categorical_crossentropy',
+              optimizer=opt,
+              metrics=['accuracy'])
+
+x_train = x_train.astype('float32')
+x_test = x_test.astype('float32')
+#x_train /= 255
+#x_test /= 255
+
+
+model.fit(x_train, y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          validation_data=(x_test, y_test),
+          shuffle=True)