|
a |
|
b/random_forest/rf_test1.py |
|
|
1 |
import os |
|
|
2 |
import sys |
|
|
3 |
import pywt |
|
|
4 |
from pywt import wavedec |
|
|
5 |
from __init__ import ap_entropy, samp_entropy |
|
|
6 |
import numpy as np |
|
|
7 |
import matplotlib |
|
|
8 |
from matplotlib import pyplot as plt |
|
|
9 |
from mnist_new import mlp, create_training_set |
|
|
10 |
from sklearn.ensemble import RandomForestClassifier |
|
|
11 |
A=[] |
|
|
12 |
B=[] |
|
|
13 |
C=[] |
|
|
14 |
D=[] |
|
|
15 |
E=[] |
|
|
16 |
for fl in os.listdir("../../../ALL/A/"): |
|
|
17 |
inp = [] |
|
|
18 |
path = "../../../ALL/A/" + fl |
|
|
19 |
txt = open(path,'r') |
|
|
20 |
for line in txt: |
|
|
21 |
feature = line.split()[0] |
|
|
22 |
inp.append(feature) |
|
|
23 |
a = np.array(inp) |
|
|
24 |
A.append(a) |
|
|
25 |
|
|
|
26 |
for fl in os.listdir("../../../ALL/B/"): |
|
|
27 |
inp = [] |
|
|
28 |
path = "../../../ALL/B/" + fl |
|
|
29 |
txt = open(path,'r') |
|
|
30 |
for line in txt: |
|
|
31 |
feature = line.split()[0] |
|
|
32 |
inp.append(feature) |
|
|
33 |
a = np.array(inp) |
|
|
34 |
B.append(a) |
|
|
35 |
|
|
|
36 |
for fl in os.listdir("../../../ALL/C/"): |
|
|
37 |
inp = [] |
|
|
38 |
path = "../../../ALL/C/" + fl |
|
|
39 |
txt = open(path,'r') |
|
|
40 |
for line in txt: |
|
|
41 |
feature = line.split()[0] |
|
|
42 |
inp.append(feature) |
|
|
43 |
a = np.array(inp) |
|
|
44 |
C.append(a) |
|
|
45 |
|
|
|
46 |
for fl in os.listdir("../../../ALL/D/"): |
|
|
47 |
inp = [] |
|
|
48 |
path = "../../../ALL/D/" + fl |
|
|
49 |
txt = open(path,'r') |
|
|
50 |
for line in txt: |
|
|
51 |
feature = line.split()[0] |
|
|
52 |
inp.append(feature) |
|
|
53 |
a = np.array(inp) |
|
|
54 |
D.append(a) |
|
|
55 |
|
|
|
56 |
for fl in os.listdir("../../../ALL/E/"): |
|
|
57 |
inp = [] |
|
|
58 |
path = "../../../ALL/E/" + fl |
|
|
59 |
txt = open(path,'r') |
|
|
60 |
for line in txt: |
|
|
61 |
feature = line.split()[0] |
|
|
62 |
inp.append(feature) |
|
|
63 |
a = np.array(inp) |
|
|
64 |
E.append(a) |
|
|
65 |
|
|
|
66 |
A_ = [] |
|
|
67 |
B_ = [] |
|
|
68 |
C_ = [] |
|
|
69 |
D_ = [] |
|
|
70 |
E_ = [] |
|
|
71 |
for x in A: |
|
|
72 |
coeffs = wavedec(x,'db4',level=8) |
|
|
73 |
A_.append(coeffs) |
|
|
74 |
|
|
|
75 |
for x in B: |
|
|
76 |
coeffs = wavedec(x,'db4',level=8) |
|
|
77 |
B_.append(coeffs) |
|
|
78 |
|
|
|
79 |
for x in C: |
|
|
80 |
coeffs = wavedec(x,'db4',level=8) |
|
|
81 |
C_.append(coeffs) |
|
|
82 |
for x in D: |
|
|
83 |
coeffs = wavedec(x,'db4',level=8) |
|
|
84 |
D_.append(coeffs) |
|
|
85 |
for x in E: |
|
|
86 |
coeffs = wavedec(x,'db4',level=8) |
|
|
87 |
E_.append(coeffs) |
|
|
88 |
|
|
|
89 |
a=[] |
|
|
90 |
b=[] |
|
|
91 |
c=[] |
|
|
92 |
d=[] |
|
|
93 |
e=[] |
|
|
94 |
y_a = [] |
|
|
95 |
y_b = [] |
|
|
96 |
y_c = [] |
|
|
97 |
y_d = [] |
|
|
98 |
y_e = [] |
|
|
99 |
f=[] |
|
|
100 |
y_f=[] |
|
|
101 |
inputs = [] |
|
|
102 |
outputs=[] |
|
|
103 |
minm = [1000000000000 for i in range(1,37)] |
|
|
104 |
maxm = [0 for i in range(1,37)] |
|
|
105 |
inp = [] |
|
|
106 |
out = [] |
|
|
107 |
for x in A_: |
|
|
108 |
features = [] |
|
|
109 |
j=0 |
|
|
110 |
for y in x: |
|
|
111 |
coef = np.array(y) |
|
|
112 |
energy = np.sum(coef**2) |
|
|
113 |
minm[j] = min(minm[j],energy) |
|
|
114 |
maxm[j] = max(maxm[j],energy) |
|
|
115 |
j=j+1 |
|
|
116 |
approx_en = ap_entropy(coef,2,0.5) |
|
|
117 |
minm[j] = min(minm[j],approx_en) |
|
|
118 |
maxm[j] = max(maxm[j],approx_en) |
|
|
119 |
j=j+1 |
|
|
120 |
#samp_en = samp_entropy(coef,2,0.5) |
|
|
121 |
#minm[j] = min(minm[j],samp_en) |
|
|
122 |
#maxm[j] = max(maxm[j],samp_en) |
|
|
123 |
#j=j+1 |
|
|
124 |
mean = np.mean(coef) |
|
|
125 |
minm[j]= min(minm[j],mean) |
|
|
126 |
maxm[j] = max(maxm[j],mean) |
|
|
127 |
j=j+1 |
|
|
128 |
std = np.std(coef) |
|
|
129 |
minm[j] = min(minm[j],std) |
|
|
130 |
maxm[j] = max(maxm[j],std) |
|
|
131 |
j=j+1 |
|
|
132 |
features.append(energy) |
|
|
133 |
features.append(approx_en) |
|
|
134 |
#features.append(samp_en) |
|
|
135 |
features.append(mean) |
|
|
136 |
features.append(std) |
|
|
137 |
a.append(features) |
|
|
138 |
y_a.append(0) |
|
|
139 |
inputs.append(features) |
|
|
140 |
outputs.append("Class A") |
|
|
141 |
|
|
|
142 |
print("A done") |
|
|
143 |
|
|
|
144 |
for x in B_: |
|
|
145 |
features = [] |
|
|
146 |
j=0 |
|
|
147 |
for y in x: |
|
|
148 |
coef = np.array(y) |
|
|
149 |
energy = np.sum(coef**2) |
|
|
150 |
minm[j] = min(minm[j],energy) |
|
|
151 |
maxm[j] = max(maxm[j],energy) |
|
|
152 |
j=j+1 |
|
|
153 |
approx_en = ap_entropy(coef,2,0.5) |
|
|
154 |
minm[j] = min(minm[j],approx_en) |
|
|
155 |
maxm[j] = max(maxm[j],approx_en) |
|
|
156 |
j=j+1 |
|
|
157 |
#samp_en = samp_entropy(coef,2,0.5) |
|
|
158 |
#minm[j] = min(minm[j],samp_en) |
|
|
159 |
#maxm[j] = max(maxm[j],samp_en) |
|
|
160 |
#j=j+1 |
|
|
161 |
mean = np.mean(coef) |
|
|
162 |
minm[j]= min(minm[j],mean) |
|
|
163 |
maxm[j] = max(maxm[j],mean) |
|
|
164 |
j=j+1 |
|
|
165 |
std = np.std(coef) |
|
|
166 |
minm[j] = min(minm[j],std) |
|
|
167 |
maxm[j]= max(maxm[j],std) |
|
|
168 |
j=j+1 |
|
|
169 |
|
|
|
170 |
features.append(energy) |
|
|
171 |
features.append(approx_en) |
|
|
172 |
#features.append(samp_en) |
|
|
173 |
features.append(mean) |
|
|
174 |
features.append(std) |
|
|
175 |
b.append(features) |
|
|
176 |
y_b.append(0) |
|
|
177 |
inputs.append(features) |
|
|
178 |
outputs.append("Class A") |
|
|
179 |
|
|
|
180 |
print("B done") |
|
|
181 |
for x in C_: |
|
|
182 |
features = [] |
|
|
183 |
j=0 |
|
|
184 |
for y in x: |
|
|
185 |
coef = np.array(y) |
|
|
186 |
energy = np.sum(coef**2) |
|
|
187 |
minm[j] = min(minm[j],energy) |
|
|
188 |
maxm[j] = max(maxm[j],energy) |
|
|
189 |
j=j+1 |
|
|
190 |
approx_en = ap_entropy(coef,2,0.5) |
|
|
191 |
minm[j] = min(minm[j],approx_en) |
|
|
192 |
maxm[j] = max(maxm[j],approx_en) |
|
|
193 |
j=j+1 |
|
|
194 |
#samp_en = samp_entropy(coef,2,0.5) |
|
|
195 |
#minm[j] = min(minm[j],samp_en) |
|
|
196 |
#maxm[j] = max(maxm[j],samp_en) |
|
|
197 |
#j=j+1 |
|
|
198 |
mean = np.mean(coef) |
|
|
199 |
minm[j]= min(minm[j],mean) |
|
|
200 |
maxm[j] = max(maxm[j],mean) |
|
|
201 |
j=j+1 |
|
|
202 |
std = np.std(coef) |
|
|
203 |
minm[j] = min(minm[j],std) |
|
|
204 |
maxm[j]= max(maxm[j],std) |
|
|
205 |
j=j+1 |
|
|
206 |
|
|
|
207 |
|
|
|
208 |
features.append(energy) |
|
|
209 |
features.append(approx_en) |
|
|
210 |
#features.append(samp_en) |
|
|
211 |
features.append(mean) |
|
|
212 |
features.append(std) |
|
|
213 |
c.append(features) |
|
|
214 |
y_c.append(1) |
|
|
215 |
inputs.append(features) |
|
|
216 |
outputs.append("Class B") |
|
|
217 |
|
|
|
218 |
print("C done") |
|
|
219 |
for x in D_: |
|
|
220 |
features = [] |
|
|
221 |
j=0 |
|
|
222 |
for y in x: |
|
|
223 |
coef = np.array(y) |
|
|
224 |
energy = np.sum(coef**2) |
|
|
225 |
|
|
|
226 |
minm[j] = min(minm[j],energy) |
|
|
227 |
maxm[j] = max(maxm[j],energy) |
|
|
228 |
j=j+1 |
|
|
229 |
approx_en = ap_entropy(coef,2,0.5) |
|
|
230 |
minm[j] = min(minm[j],approx_en) |
|
|
231 |
maxm[j] = max(maxm[j],approx_en) |
|
|
232 |
j=j+1 |
|
|
233 |
#samp_en = samp_entropy(coef,2,0.5) |
|
|
234 |
#minm[j] = min(minm[j],samp_en) |
|
|
235 |
#maxm[j] = max(maxm[j],samp_en) |
|
|
236 |
#j=j+1 |
|
|
237 |
mean = np.mean(coef) |
|
|
238 |
minm[j]= min(minm[j],mean) |
|
|
239 |
maxm[j] = max(maxm[j],mean) |
|
|
240 |
j=j+1 |
|
|
241 |
std = np.std(coef) |
|
|
242 |
minm[j] = min(minm[j],std) |
|
|
243 |
maxm[j]= max(maxm[j],std) |
|
|
244 |
j=j+1 |
|
|
245 |
|
|
|
246 |
|
|
|
247 |
features.append(energy) |
|
|
248 |
features.append(approx_en) |
|
|
249 |
#features.append(samp_en) |
|
|
250 |
features.append(mean) |
|
|
251 |
features.append(std) |
|
|
252 |
d.append(features) |
|
|
253 |
y_d.append(1) |
|
|
254 |
inputs.append(features) |
|
|
255 |
outputs.append("Class B") |
|
|
256 |
|
|
|
257 |
print("D Done") |
|
|
258 |
|
|
|
259 |
for x in E_: |
|
|
260 |
features = [] |
|
|
261 |
j=0 |
|
|
262 |
for y in x: |
|
|
263 |
coef = np.array(y) |
|
|
264 |
energy = np.sum(coef**2) |
|
|
265 |
|
|
|
266 |
minm[j] = min(minm[j],energy) |
|
|
267 |
maxm[j] = max(maxm[j],energy) |
|
|
268 |
j=j+1 |
|
|
269 |
approx_en = ap_entropy(coef,2,0.5) |
|
|
270 |
minm[j] = min(minm[j],approx_en) |
|
|
271 |
maxm[j] = max(maxm[j],approx_en) |
|
|
272 |
j=j+1 |
|
|
273 |
#samp_en = samp_entropy(coef,2,0.5) |
|
|
274 |
#minm[j] = min(minm[j],samp_en) |
|
|
275 |
#maxm[j] = max(maxm[j],samp_en) |
|
|
276 |
#j=j+1 |
|
|
277 |
mean = np.mean(coef) |
|
|
278 |
minm[j]= min(minm[j],mean) |
|
|
279 |
maxm[j] = max(maxm[j],mean) |
|
|
280 |
j=j+1 |
|
|
281 |
std = np.std(coef) |
|
|
282 |
minm[j] = min(minm[j],std) |
|
|
283 |
maxm[j]= max(maxm[j],std) |
|
|
284 |
j=j+1 |
|
|
285 |
|
|
|
286 |
|
|
|
287 |
|
|
|
288 |
features.append(energy) |
|
|
289 |
features.append(approx_en) |
|
|
290 |
#features.append(samp_en) |
|
|
291 |
features.append(mean) |
|
|
292 |
features.append(std) |
|
|
293 |
e.append(features) |
|
|
294 |
f.append(features) |
|
|
295 |
y_e.append(2) |
|
|
296 |
y_f.append(2) |
|
|
297 |
inputs.append(features) |
|
|
298 |
inputs.append(features) |
|
|
299 |
outputs.append("Class C") |
|
|
300 |
outputs.append("Class C") |
|
|
301 |
print("E done") |
|
|
302 |
i=0 |
|
|
303 |
while i < 100: |
|
|
304 |
inp.append(a[i]) |
|
|
305 |
out.append(y_a[i]) |
|
|
306 |
inp.append(b[i]) |
|
|
307 |
out.append(y_b[i]) |
|
|
308 |
inp.append(c[i]) |
|
|
309 |
out.append(y_c[i]) |
|
|
310 |
inp.append(d[i]) |
|
|
311 |
out.append(y_d[i]) |
|
|
312 |
inp.append(e[i]) |
|
|
313 |
out.append(y_e[i]) |
|
|
314 |
inp.append(f[i]) |
|
|
315 |
out.append(y_f[i]) |
|
|
316 |
i=i+1 |
|
|
317 |
|
|
|
318 |
print("done") |
|
|
319 |
i=0 |
|
|
320 |
z= [i for i in range(1,len(e)+1)] |
|
|
321 |
import matplotlib.pyplot as plt |
|
|
322 |
"""i=0 |
|
|
323 |
while i < 27: |
|
|
324 |
j=0 |
|
|
325 |
p=[] |
|
|
326 |
q=[] |
|
|
327 |
r=[] |
|
|
328 |
s=[] |
|
|
329 |
t=[] |
|
|
330 |
u=[] |
|
|
331 |
while j < len(e): |
|
|
332 |
p.append(a[j][i]) |
|
|
333 |
q.append(b[j][i]) |
|
|
334 |
r.append(c[j][i]) |
|
|
335 |
s.append(d[j][i]) |
|
|
336 |
t.append(e[j][i]) |
|
|
337 |
j=j+1 |
|
|
338 |
plt.plot(z,np.array(p)) |
|
|
339 |
plt.plot(z,np.array(q)) |
|
|
340 |
plt.plot(z,np.array(r)) |
|
|
341 |
plt.plot(z,np.array(s)) |
|
|
342 |
plt.plot(z,np.array(t)) |
|
|
343 |
plt.savefig("images/"+str(i)+"_out.jpg") |
|
|
344 |
print("saved") |
|
|
345 |
plt.clf() |
|
|
346 |
i=i+1 |
|
|
347 |
""" |
|
|
348 |
i=0 |
|
|
349 |
for x in inputs: |
|
|
350 |
j=0 |
|
|
351 |
for y in x: |
|
|
352 |
den = maxm[j]-minm[j] |
|
|
353 |
if den ==0: |
|
|
354 |
den=np.amax(numpy.array(maxm)) - np.amin(numpy.array(minm)) |
|
|
355 |
inputs[i][j] = (inputs[i][j]- minm[j])/den |
|
|
356 |
j=j+1 |
|
|
357 |
i=i+1 |
|
|
358 |
i=0 |
|
|
359 |
n=len(inputs) |
|
|
360 |
while i < n: |
|
|
361 |
inputs.append(inputs[i]) |
|
|
362 |
outputs.append(outputs[i]) |
|
|
363 |
i=i+1 |
|
|
364 |
arr = [ i for i in range(0,len(inputs))] |
|
|
365 |
arr = np.random.permutation(arr) |
|
|
366 |
new_inputs = [] |
|
|
367 |
new_outputs = [] |
|
|
368 |
for x in arr: |
|
|
369 |
new_inputs.append(inputs[x]) |
|
|
370 |
new_outputs.append(outputs[x]) |
|
|
371 |
|
|
|
372 |
new_outputs = np.asarray(new_outputs) |
|
|
373 |
rfc = RandomForestClassifier(n_estimators=1000) |
|
|
374 |
rfc.fit(new_inputs,new_outputs) |
|
|
375 |
importances = rfc.feature_importances_ |
|
|
376 |
import ipdb; ipdb.set_trace() |
|
|
377 |
from sklearn.cross_validation import cross_val_score |
|
|
378 |
arr = [ i for i in range(0,len(inputs))] |
|
|
379 |
arr = np.random.permutation(arr) |
|
|
380 |
new_inputs = [] |
|
|
381 |
new_outputs = [] |
|
|
382 |
for x in arr: |
|
|
383 |
new_inputs.append(inputs[x]) |
|
|
384 |
new_outputs.append(outputs[x]) |
|
|
385 |
new_outputs = np.asarray(new_outputs) |
|
|
386 |
print("cross validating") |
|
|
387 |
scores = cross_val_score(rfc,new_inputs,new_outputs,cv=100) |
|
|
388 |
print("Accuracy: %0.2f (+/-%0.2f)" %(scores.mean(),scores.std()*2)) |
|
|
389 |
#import ipdb;ipdb.set_trace() |
|
|
390 |
inputs1 = new_inputs[0:3*(len(new_inputs)/4)] |
|
|
391 |
outputs1 = new_outputs[0:3*(len(new_inputs)/4)] |
|
|
392 |
inputs2 = new_inputs |
|
|
393 |
outputs2 = new_outputs |
|
|
394 |
outputs2 = np.asarray(outputs2) |
|
|
395 |
rfc = RandomForestClassifier(n_estimators=400) |
|
|
396 |
rfc.fit(inputs2,outputs2) |
|
|
397 |
from sklearn.cross_validation import cross_val_score |
|
|
398 |
arr = [ i for i in range(0,len(inputs1))] |
|
|
399 |
arr = np.random.permutation(arr) |
|
|
400 |
new_inputs = [] |
|
|
401 |
new_outputs = [] |
|
|
402 |
for x in arr: |
|
|
403 |
new_inputs.append(inputs1[x]) |
|
|
404 |
new_outputs.append(outputs1[x]) |
|
|
405 |
new_outputs = np.asarray(new_outputs) |
|
|
406 |
print("cross validating again") |
|
|
407 |
scores = cross_val_score(rfc,new_inputs,new_outputs,cv=100) |
|
|
408 |
test = open("results2.txt",'w') |
|
|
409 |
for x in scores: |
|
|
410 |
test.write(str(x)+ "\n") |
|
|
411 |
import ipdb; ipdb.set_trace() |
|
|
412 |
test.close() |
|
|
413 |
|
|
|
414 |
print("Accuracy: %0.2f (+/-%0.2f)" %(scores.mean(),scores.std()*2)) |
|
|
415 |
|
|
|
416 |
|
|
|
417 |
#P,Q,R,S,T,U = create_training_set(inp,out,minm,maxm) |
|
|
418 |
#mlp(P,Q,R,S,T,U) |
|
|
419 |
|