Diff of /train_pipeline.py [000000] .. [45bab5]

Switch to unified view

a b/train_pipeline.py
1
#train_pipeline
2
from eeg_learn_functions import *
3
import pandas as pd
4
import numpy as np
5
import scipy.stats as scs
6
import re
7
from numpy import genfromtxt
8
9
from IPython.core.display import display, HTML
10
display(HTML("<style>.container { width:100% !important; }</style>"))
11
pd.options.display.max_columns = None
12
pd.options.display.precision = 4
13
14
theta = (4,8)
15
alpha = (8,12)
16
beta = (12,40)
17
18
def get_fft(snippet):
19
    Fs = 128.0;  # sampling rate
20
    #Ts = len(snippet)/Fs/Fs; # sampling interval
21
    snippet_time = len(snippet)/Fs
22
    Ts = 1.0/Fs; # sampling interval
23
    t = np.arange(0,snippet_time,Ts) # time vector
24
25
    # ff = 5;   # frequency of the signal
26
    # y = np.sin(2*np.pi*ff*t)
27
    y = snippet
28
#     print('Ts: ',Ts)
29
#     print(t)
30
#     print(y.shape)
31
    n = len(y) # length of the signal
32
    k = np.arange(n)
33
    T = n/Fs
34
    frq = k/T # two sides frequency range
35
    frq = frq[range(n//2)] # one side frequency range
36
37
    Y = np.fft.fft(y)/n # fft computing and normalization
38
    Y = Y[range(n//2)]
39
    #Added in: (To remove bias.)
40
    #Y[0] = 0
41
    return frq,abs(Y)
42
43
def theta_alpha_beta_averages(f,Y):
44
    theta_range = (4,8)
45
    alpha_range = (8,12)
46
    beta_range = (12,40)
47
    theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean()
48
    alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean()
49
    beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean()
50
    return theta, alpha, beta
51
52
def make_steps(samples,frame_duration,overlap):
53
    '''
54
    in:
55
    samples - number of samples in the session
56
    frame_duration - frame duration in seconds
57
    overlap - float fraction of frame to overlap in range (0,1)
58
59
    out: list of tuple ranges
60
    '''
61
    #steps = np.arange(0,len(df),frame_length)
62
    Fs = 128
63
    i = 0
64
    intervals = []
65
    samples_per_frame = Fs * frame_duration
66
    while i+samples_per_frame <= samples:
67
        intervals.append((i,i+samples_per_frame))
68
        i = i + samples_per_frame - int(samples_per_frame*overlap)
69
    return intervals
70
71
def make_frames(df,frame_duration):
72
    '''
73
    in: dataframe or array with all channels, frame duration in seconds
74
    out: array of theta, alpha, beta averages for each probe for each time step
75
        shape: (n-frames,m-probes,k-brainwave bands)
76
    '''
77
    Fs = 128.0
78
    frame_length = Fs*frame_duration
79
    frames = []
80
    steps = make_steps(len(df),frame_duration,overlap)
81
    for i,_ in enumerate(steps):
82
        frame = []
83
        if i == 0:
84
            continue
85
        else:
86
            for channel in df.columns:
87
                snippet = np.array(df.loc[steps[i][0]:steps[i][1],int(channel)])
88
                f,Y =  get_fft(snippet)
89
                theta, alpha, beta = theta_alpha_beta_averages(f,Y)
90
                frame.append([theta, alpha, beta])
91
92
        frames.append(frame)
93
    return np.array(frames)
94
95
locs_2d = [(-2.0,4.0),
96
           (2.0,4.0),
97
           (-1.0,3.0),
98
           (1.0,3.0),
99
           (-3.0,3.0),
100
           (3.0,3.0),
101
           (-2.0,2.0),
102
           (2.0,2.0),
103
           (-2.0,-2.0),
104
           (2.0,-2.0),
105
           (-4.0,1.0),
106
           (4.0,1.0),
107
           (-1.0,-3.0),
108
           (1.0,-3.0)]
109
110
def make_data_pipeline(file_names,labels,image_size,frame_duration,overlap):
111
    '''
112
    IN:
113
    file_names - list of strings for each input file (one for each subject)
114
    labels - list of labels for each
115
    image_size - int size of output images in form (x, x)
116
    frame_duration - time length of each frame (seconds)
117
    overlap - float fraction of frame to overlap in range (0,1)
118
119
    OUT:
120
    X: np array of frames (unshuffled)
121
    y: np array of label for each frame (1 or 0)
122
    '''
123
124
    Fs = 128.0   #sampling rate
125
    frame_length = Fs * frame_duration
126
127
    print('Generating training data...')
128
129
130
    for i, file in enumerate(file_names):
131
        print ('Processing session: ',file, '. (',i+1,' of ',len(file_names),')')
132
        data = genfromtxt(file, delimiter=',').T
133
        df = pd.DataFrame(data)
134
135
        X_0 = make_frames(df,frame_duration)
136
        #steps = np.arange(0,len(df),frame_length)
137
        X_1 = X_0.reshape(len(X_0),14*3)
138
139
        images = gen_images(np.array(locs_2d),X_1, image_size, normalize=False)
140
        images = np.swapaxes(images, 1, 3)
141
        print(len(images), ' frames generated with label ', labels[i], '.')
142
        print('\n')
143
        if i == 0:
144
            X = images
145
            y = np.ones(len(images))*labels[0]
146
        else:
147
            X = np.concatenate((X,images),axis = 0)
148
            y = np.concatenate((y,np.ones(len(images))*labels[i]),axis = 0)
149
150
151
    return X,np.array(y)
152
153
file_names = ['data/ML101_KS.csv',
154
              'data/ML101_US.csv',
155
              'data/ML102_KS.csv',
156
              'data/ML102_US.csv',
157
              'data/ML103_KS.csv',
158
              'data/ML103_US.csv',
159
              'data/ML104_KS.csv',
160
              'data/ML104_US.csv',
161
              'data/ML105_KS.csv',
162
              'data/ML105_US.csv',
163
              'data/ML106_KS.csv',
164
              'data/ML106_US.csv',
165
              'data/ML107_KS.csv',
166
              'data/ML107_US.csv',
167
              'data/ML108_KS.csv',
168
              'data/ML108_US.csv']
169
labels = [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]
170
image_size = 28
171
frame_duration = 1.0
172
overlap = 0.5
173
X, y = make_data_pipeline(file_names,labels,image_size,frame_duration,overlap)
174
175
from sklearn.model_selection import train_test_split
176
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20,shuffle=True)
177
178
# input image dimensions
179
img_rows, img_cols = 28, 28
180
181
print('x_train shape:', x_train.shape)
182
print(x_train.shape[0], 'train samples')
183
print(x_test.shape[0], 'test samples')
184
185
input_shape = (img_rows, img_cols, 3)
186
187
import keras
188
from keras.models import Sequential
189
from keras.layers import Dense, Dropout, Activation, Flatten
190
from keras.layers import Conv2D, MaxPooling2D
191
from keras.utils import np_utils
192
193
batch_size = 128
194
num_classes = 2
195
epochs = 400
196
197
# convert class vectors to binary class matrices
198
y_train = np_utils.to_categorical(y_train, num_classes)
199
y_test = np_utils.to_categorical(y_test, num_classes)
200
201
202
model = Sequential()
203
model.add(Conv2D(32, (3, 3), padding='same',input_shape=input_shape))
204
model.add(Activation('relu'))
205
model.add(Conv2D(32, (3, 3)))
206
model.add(Activation('relu'))
207
model.add(MaxPooling2D(pool_size=(2, 2)))
208
#model.add(Dropout(0.25))
209
210
model.add(Flatten())
211
model.add(Dense(10))
212
model.add(Activation('relu'))
213
#model.add(Dropout(0.5))
214
model.add(Dense(num_classes))
215
model.add(Activation('softmax'))
216
217
# initiate RMSprop optimizer
218
opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)
219
220
# Let's train the model using RMSprop
221
model.compile(loss='categorical_crossentropy',
222
              optimizer=opt,
223
              metrics=['accuracy'])
224
225
x_train = x_train.astype('float32')
226
x_test = x_test.astype('float32')
227
#x_train /= 255
228
#x_test /= 255
229
230
231
model.fit(x_train, y_train,
232
          batch_size=batch_size,
233
          epochs=epochs,
234
          validation_data=(x_test, y_test),
235
          shuffle=True)