|
a |
|
b/train_pipeline.py |
|
|
1 |
#train_pipeline |
|
|
2 |
from eeg_learn_functions import * |
|
|
3 |
import pandas as pd |
|
|
4 |
import numpy as np |
|
|
5 |
import scipy.stats as scs |
|
|
6 |
import re |
|
|
7 |
from numpy import genfromtxt |
|
|
8 |
|
|
|
9 |
from IPython.core.display import display, HTML |
|
|
10 |
display(HTML("<style>.container { width:100% !important; }</style>")) |
|
|
11 |
pd.options.display.max_columns = None |
|
|
12 |
pd.options.display.precision = 4 |
|
|
13 |
|
|
|
14 |
theta = (4,8) |
|
|
15 |
alpha = (8,12) |
|
|
16 |
beta = (12,40) |
|
|
17 |
|
|
|
18 |
def get_fft(snippet): |
|
|
19 |
Fs = 128.0; # sampling rate |
|
|
20 |
#Ts = len(snippet)/Fs/Fs; # sampling interval |
|
|
21 |
snippet_time = len(snippet)/Fs |
|
|
22 |
Ts = 1.0/Fs; # sampling interval |
|
|
23 |
t = np.arange(0,snippet_time,Ts) # time vector |
|
|
24 |
|
|
|
25 |
# ff = 5; # frequency of the signal |
|
|
26 |
# y = np.sin(2*np.pi*ff*t) |
|
|
27 |
y = snippet |
|
|
28 |
# print('Ts: ',Ts) |
|
|
29 |
# print(t) |
|
|
30 |
# print(y.shape) |
|
|
31 |
n = len(y) # length of the signal |
|
|
32 |
k = np.arange(n) |
|
|
33 |
T = n/Fs |
|
|
34 |
frq = k/T # two sides frequency range |
|
|
35 |
frq = frq[range(n//2)] # one side frequency range |
|
|
36 |
|
|
|
37 |
Y = np.fft.fft(y)/n # fft computing and normalization |
|
|
38 |
Y = Y[range(n//2)] |
|
|
39 |
#Added in: (To remove bias.) |
|
|
40 |
#Y[0] = 0 |
|
|
41 |
return frq,abs(Y) |
|
|
42 |
|
|
|
43 |
def theta_alpha_beta_averages(f,Y): |
|
|
44 |
theta_range = (4,8) |
|
|
45 |
alpha_range = (8,12) |
|
|
46 |
beta_range = (12,40) |
|
|
47 |
theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean() |
|
|
48 |
alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean() |
|
|
49 |
beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean() |
|
|
50 |
return theta, alpha, beta |
|
|
51 |
|
|
|
52 |
def make_steps(samples,frame_duration,overlap): |
|
|
53 |
''' |
|
|
54 |
in: |
|
|
55 |
samples - number of samples in the session |
|
|
56 |
frame_duration - frame duration in seconds |
|
|
57 |
overlap - float fraction of frame to overlap in range (0,1) |
|
|
58 |
|
|
|
59 |
out: list of tuple ranges |
|
|
60 |
''' |
|
|
61 |
#steps = np.arange(0,len(df),frame_length) |
|
|
62 |
Fs = 128 |
|
|
63 |
i = 0 |
|
|
64 |
intervals = [] |
|
|
65 |
samples_per_frame = Fs * frame_duration |
|
|
66 |
while i+samples_per_frame <= samples: |
|
|
67 |
intervals.append((i,i+samples_per_frame)) |
|
|
68 |
i = i + samples_per_frame - int(samples_per_frame*overlap) |
|
|
69 |
return intervals |
|
|
70 |
|
|
|
71 |
def make_frames(df,frame_duration): |
|
|
72 |
''' |
|
|
73 |
in: dataframe or array with all channels, frame duration in seconds |
|
|
74 |
out: array of theta, alpha, beta averages for each probe for each time step |
|
|
75 |
shape: (n-frames,m-probes,k-brainwave bands) |
|
|
76 |
''' |
|
|
77 |
Fs = 128.0 |
|
|
78 |
frame_length = Fs*frame_duration |
|
|
79 |
frames = [] |
|
|
80 |
steps = make_steps(len(df),frame_duration,overlap) |
|
|
81 |
for i,_ in enumerate(steps): |
|
|
82 |
frame = [] |
|
|
83 |
if i == 0: |
|
|
84 |
continue |
|
|
85 |
else: |
|
|
86 |
for channel in df.columns: |
|
|
87 |
snippet = np.array(df.loc[steps[i][0]:steps[i][1],int(channel)]) |
|
|
88 |
f,Y = get_fft(snippet) |
|
|
89 |
theta, alpha, beta = theta_alpha_beta_averages(f,Y) |
|
|
90 |
frame.append([theta, alpha, beta]) |
|
|
91 |
|
|
|
92 |
frames.append(frame) |
|
|
93 |
return np.array(frames) |
|
|
94 |
|
|
|
95 |
locs_2d = [(-2.0,4.0), |
|
|
96 |
(2.0,4.0), |
|
|
97 |
(-1.0,3.0), |
|
|
98 |
(1.0,3.0), |
|
|
99 |
(-3.0,3.0), |
|
|
100 |
(3.0,3.0), |
|
|
101 |
(-2.0,2.0), |
|
|
102 |
(2.0,2.0), |
|
|
103 |
(-2.0,-2.0), |
|
|
104 |
(2.0,-2.0), |
|
|
105 |
(-4.0,1.0), |
|
|
106 |
(4.0,1.0), |
|
|
107 |
(-1.0,-3.0), |
|
|
108 |
(1.0,-3.0)] |
|
|
109 |
|
|
|
110 |
def make_data_pipeline(file_names,labels,image_size,frame_duration,overlap): |
|
|
111 |
''' |
|
|
112 |
IN: |
|
|
113 |
file_names - list of strings for each input file (one for each subject) |
|
|
114 |
labels - list of labels for each |
|
|
115 |
image_size - int size of output images in form (x, x) |
|
|
116 |
frame_duration - time length of each frame (seconds) |
|
|
117 |
overlap - float fraction of frame to overlap in range (0,1) |
|
|
118 |
|
|
|
119 |
OUT: |
|
|
120 |
X: np array of frames (unshuffled) |
|
|
121 |
y: np array of label for each frame (1 or 0) |
|
|
122 |
''' |
|
|
123 |
|
|
|
124 |
Fs = 128.0 #sampling rate |
|
|
125 |
frame_length = Fs * frame_duration |
|
|
126 |
|
|
|
127 |
print('Generating training data...') |
|
|
128 |
|
|
|
129 |
|
|
|
130 |
for i, file in enumerate(file_names): |
|
|
131 |
print ('Processing session: ',file, '. (',i+1,' of ',len(file_names),')') |
|
|
132 |
data = genfromtxt(file, delimiter=',').T |
|
|
133 |
df = pd.DataFrame(data) |
|
|
134 |
|
|
|
135 |
X_0 = make_frames(df,frame_duration) |
|
|
136 |
#steps = np.arange(0,len(df),frame_length) |
|
|
137 |
X_1 = X_0.reshape(len(X_0),14*3) |
|
|
138 |
|
|
|
139 |
images = gen_images(np.array(locs_2d),X_1, image_size, normalize=False) |
|
|
140 |
images = np.swapaxes(images, 1, 3) |
|
|
141 |
print(len(images), ' frames generated with label ', labels[i], '.') |
|
|
142 |
print('\n') |
|
|
143 |
if i == 0: |
|
|
144 |
X = images |
|
|
145 |
y = np.ones(len(images))*labels[0] |
|
|
146 |
else: |
|
|
147 |
X = np.concatenate((X,images),axis = 0) |
|
|
148 |
y = np.concatenate((y,np.ones(len(images))*labels[i]),axis = 0) |
|
|
149 |
|
|
|
150 |
|
|
|
151 |
return X,np.array(y) |
|
|
152 |
|
|
|
153 |
file_names = ['data/ML101_KS.csv', |
|
|
154 |
'data/ML101_US.csv', |
|
|
155 |
'data/ML102_KS.csv', |
|
|
156 |
'data/ML102_US.csv', |
|
|
157 |
'data/ML103_KS.csv', |
|
|
158 |
'data/ML103_US.csv', |
|
|
159 |
'data/ML104_KS.csv', |
|
|
160 |
'data/ML104_US.csv', |
|
|
161 |
'data/ML105_KS.csv', |
|
|
162 |
'data/ML105_US.csv', |
|
|
163 |
'data/ML106_KS.csv', |
|
|
164 |
'data/ML106_US.csv', |
|
|
165 |
'data/ML107_KS.csv', |
|
|
166 |
'data/ML107_US.csv', |
|
|
167 |
'data/ML108_KS.csv', |
|
|
168 |
'data/ML108_US.csv'] |
|
|
169 |
labels = [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0] |
|
|
170 |
image_size = 28 |
|
|
171 |
frame_duration = 1.0 |
|
|
172 |
overlap = 0.5 |
|
|
173 |
X, y = make_data_pipeline(file_names,labels,image_size,frame_duration,overlap) |
|
|
174 |
|
|
|
175 |
from sklearn.model_selection import train_test_split |
|
|
176 |
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20,shuffle=True) |
|
|
177 |
|
|
|
178 |
# input image dimensions |
|
|
179 |
img_rows, img_cols = 28, 28 |
|
|
180 |
|
|
|
181 |
print('x_train shape:', x_train.shape) |
|
|
182 |
print(x_train.shape[0], 'train samples') |
|
|
183 |
print(x_test.shape[0], 'test samples') |
|
|
184 |
|
|
|
185 |
input_shape = (img_rows, img_cols, 3) |
|
|
186 |
|
|
|
187 |
import keras |
|
|
188 |
from keras.models import Sequential |
|
|
189 |
from keras.layers import Dense, Dropout, Activation, Flatten |
|
|
190 |
from keras.layers import Conv2D, MaxPooling2D |
|
|
191 |
from keras.utils import np_utils |
|
|
192 |
|
|
|
193 |
batch_size = 128 |
|
|
194 |
num_classes = 2 |
|
|
195 |
epochs = 400 |
|
|
196 |
|
|
|
197 |
# convert class vectors to binary class matrices |
|
|
198 |
y_train = np_utils.to_categorical(y_train, num_classes) |
|
|
199 |
y_test = np_utils.to_categorical(y_test, num_classes) |
|
|
200 |
|
|
|
201 |
|
|
|
202 |
model = Sequential() |
|
|
203 |
model.add(Conv2D(32, (3, 3), padding='same',input_shape=input_shape)) |
|
|
204 |
model.add(Activation('relu')) |
|
|
205 |
model.add(Conv2D(32, (3, 3))) |
|
|
206 |
model.add(Activation('relu')) |
|
|
207 |
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
208 |
#model.add(Dropout(0.25)) |
|
|
209 |
|
|
|
210 |
model.add(Flatten()) |
|
|
211 |
model.add(Dense(10)) |
|
|
212 |
model.add(Activation('relu')) |
|
|
213 |
#model.add(Dropout(0.5)) |
|
|
214 |
model.add(Dense(num_classes)) |
|
|
215 |
model.add(Activation('softmax')) |
|
|
216 |
|
|
|
217 |
# initiate RMSprop optimizer |
|
|
218 |
opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6) |
|
|
219 |
|
|
|
220 |
# Let's train the model using RMSprop |
|
|
221 |
model.compile(loss='categorical_crossentropy', |
|
|
222 |
optimizer=opt, |
|
|
223 |
metrics=['accuracy']) |
|
|
224 |
|
|
|
225 |
x_train = x_train.astype('float32') |
|
|
226 |
x_test = x_test.astype('float32') |
|
|
227 |
#x_train /= 255 |
|
|
228 |
#x_test /= 255 |
|
|
229 |
|
|
|
230 |
|
|
|
231 |
model.fit(x_train, y_train, |
|
|
232 |
batch_size=batch_size, |
|
|
233 |
epochs=epochs, |
|
|
234 |
validation_data=(x_test, y_test), |
|
|
235 |
shuffle=True) |