a b/random_forest/rf_test1.py
1
import os
2
import sys
3
import pywt
4
from pywt import wavedec
5
from __init__ import ap_entropy, samp_entropy
6
import numpy as np
7
import matplotlib
8
from matplotlib import pyplot as plt
9
from mnist_new import mlp, create_training_set
10
from sklearn.ensemble import RandomForestClassifier
11
A=[]
12
B=[]
13
C=[]
14
D=[]
15
E=[]
16
for fl in os.listdir("../../../ALL/A/"):
17
                inp = []
18
                path = "../../../ALL/A/" + fl
19
                txt = open(path,'r')
20
                for line in txt:
21
                        feature = line.split()[0]
22
                        inp.append(feature)
23
        a = np.array(inp)
24
                A.append(a)
25
26
for fl in os.listdir("../../../ALL/B/"):
27
                inp = []
28
                path = "../../../ALL/B/" + fl
29
                txt = open(path,'r')
30
                for line in txt:
31
                        feature = line.split()[0]
32
                        inp.append(feature)
33
                a = np.array(inp)
34
                B.append(a)
35
36
for fl in os.listdir("../../../ALL/C/"):
37
                inp = []
38
                path = "../../../ALL/C/" + fl
39
                txt = open(path,'r')
40
                for line in txt:
41
                        feature = line.split()[0]
42
                        inp.append(feature)
43
                a = np.array(inp)
44
                C.append(a)
45
46
for fl in os.listdir("../../../ALL/D/"):
47
                inp = []
48
                path = "../../../ALL/D/" + fl
49
                txt = open(path,'r')
50
                for line in txt:
51
                        feature = line.split()[0]
52
                        inp.append(feature)
53
                a = np.array(inp)
54
                D.append(a)
55
56
for fl in os.listdir("../../../ALL/E/"):
57
                inp = []
58
                path = "../../../ALL/E/" + fl
59
                txt = open(path,'r')
60
                for line in txt:
61
                        feature = line.split()[0]
62
                        inp.append(feature)
63
                a = np.array(inp)
64
                E.append(a)
65
66
A_ = []
67
B_ = []
68
C_ = []
69
D_ = []
70
E_ = []
71
for x in A:
72
    coeffs = wavedec(x,'db4',level=8)
73
    A_.append(coeffs)
74
    
75
for x in B:
76
    coeffs = wavedec(x,'db4',level=8)
77
    B_.append(coeffs)
78
    
79
for x in C:
80
    coeffs = wavedec(x,'db4',level=8)
81
    C_.append(coeffs)
82
for x in D:
83
    coeffs = wavedec(x,'db4',level=8)
84
    D_.append(coeffs)
85
for x in E:
86
    coeffs = wavedec(x,'db4',level=8)
87
    E_.append(coeffs)
88
89
a=[]
90
b=[]
91
c=[]
92
d=[]
93
e=[]
94
y_a = []
95
y_b = []
96
y_c = []
97
y_d = []
98
y_e = []
99
f=[]
100
y_f=[]
101
inputs = []
102
outputs=[]
103
minm = [1000000000000 for i in range(1,37)]
104
maxm = [0 for i in range(1,37)]
105
inp = []
106
out = []
107
for x in A_:
108
    features = []
109
    j=0
110
    for y in x:
111
        coef = np.array(y)
112
        energy = np.sum(coef**2)
113
        minm[j] = min(minm[j],energy)
114
        maxm[j] = max(maxm[j],energy)
115
        j=j+1
116
        approx_en = ap_entropy(coef,2,0.5)
117
        minm[j] = min(minm[j],approx_en)
118
        maxm[j] = max(maxm[j],approx_en)
119
        j=j+1
120
        #samp_en = samp_entropy(coef,2,0.5)
121
        #minm[j] = min(minm[j],samp_en)
122
        #maxm[j] = max(maxm[j],samp_en)
123
        #j=j+1
124
        mean = np.mean(coef)
125
        minm[j]= min(minm[j],mean)
126
        maxm[j] = max(maxm[j],mean)
127
        j=j+1
128
        std = np.std(coef)
129
        minm[j] = min(minm[j],std)
130
        maxm[j] = max(maxm[j],std)
131
        j=j+1
132
        features.append(energy)
133
        features.append(approx_en)
134
        #features.append(samp_en)
135
        features.append(mean)
136
        features.append(std)
137
    a.append(features)
138
    y_a.append(0)
139
    inputs.append(features)
140
    outputs.append("Class A")
141
142
print("A done")
143
144
for x in B_:
145
        features = []
146
    j=0
147
        for y in x:
148
                coef = np.array(y)
149
                energy = np.sum(coef**2)
150
        minm[j] = min(minm[j],energy)
151
        maxm[j] = max(maxm[j],energy)
152
                j=j+1
153
                approx_en = ap_entropy(coef,2,0.5)
154
                minm[j] = min(minm[j],approx_en)
155
        maxm[j] = max(maxm[j],approx_en)
156
                j=j+1
157
                #samp_en = samp_entropy(coef,2,0.5)
158
                #minm[j] = min(minm[j],samp_en)
159
        #maxm[j] = max(maxm[j],samp_en)
160
                #j=j+1
161
                mean = np.mean(coef)
162
                minm[j]= min(minm[j],mean)
163
        maxm[j] = max(maxm[j],mean)
164
                j=j+1
165
                std = np.std(coef)
166
                minm[j] = min(minm[j],std)
167
        maxm[j]= max(maxm[j],std)
168
                j=j+1
169
170
                features.append(energy)
171
                features.append(approx_en)
172
                #features.append(samp_en)
173
                features.append(mean)
174
                features.append(std)
175
        b.append(features)
176
    y_b.append(0)
177
    inputs.append(features)
178
    outputs.append("Class A")
179
180
print("B done")
181
for x in C_:
182
        features = []
183
    j=0
184
        for y in x:
185
                coef = np.array(y)
186
                energy = np.sum(coef**2)
187
        minm[j] = min(minm[j],energy)
188
                maxm[j] = max(maxm[j],energy)
189
                j=j+1
190
                approx_en = ap_entropy(coef,2,0.5)
191
                minm[j] = min(minm[j],approx_en)
192
                maxm[j] = max(maxm[j],approx_en)
193
                j=j+1
194
                #samp_en = samp_entropy(coef,2,0.5)
195
                #minm[j] = min(minm[j],samp_en)
196
                #maxm[j] = max(maxm[j],samp_en)
197
                #j=j+1
198
                mean = np.mean(coef)
199
                minm[j]= min(minm[j],mean)
200
                maxm[j] = max(maxm[j],mean)
201
                j=j+1
202
                std = np.std(coef)
203
                minm[j] = min(minm[j],std)
204
                maxm[j]= max(maxm[j],std)
205
                j=j+1
206
207
208
                features.append(energy)
209
                features.append(approx_en)
210
                #features.append(samp_en)
211
                features.append(mean)
212
                features.append(std)
213
        c.append(features)
214
    y_c.append(1)
215
    inputs.append(features)
216
    outputs.append("Class B")
217
218
print("C done")
219
for x in D_:
220
        features = []
221
    j=0
222
        for y in x:
223
                coef = np.array(y)
224
                energy = np.sum(coef**2)
225
        
226
        minm[j] = min(minm[j],energy)
227
                maxm[j] = max(maxm[j],energy)
228
                j=j+1
229
                approx_en = ap_entropy(coef,2,0.5)
230
                minm[j] = min(minm[j],approx_en)
231
                maxm[j] = max(maxm[j],approx_en)
232
                j=j+1
233
                #samp_en = samp_entropy(coef,2,0.5)
234
                #minm[j] = min(minm[j],samp_en)
235
                #maxm[j] = max(maxm[j],samp_en)
236
                #j=j+1
237
                mean = np.mean(coef)
238
                minm[j]= min(minm[j],mean)
239
                maxm[j] = max(maxm[j],mean)
240
                j=j+1
241
                std = np.std(coef)
242
                minm[j] = min(minm[j],std)
243
                maxm[j]= max(maxm[j],std)
244
                j=j+1
245
246
247
                features.append(energy)
248
                features.append(approx_en)
249
                #features.append(samp_en)
250
                features.append(mean)
251
                features.append(std)
252
        d.append(features)
253
    y_d.append(1)
254
    inputs.append(features)
255
    outputs.append("Class B")
256
257
print("D Done")
258
259
for x in E_:
260
        features = []
261
    j=0
262
        for y in x:
263
                coef = np.array(y)
264
                energy = np.sum(coef**2)
265
            
266
        minm[j] = min(minm[j],energy)
267
                maxm[j] = max(maxm[j],energy)
268
                j=j+1
269
                approx_en = ap_entropy(coef,2,0.5)
270
                minm[j] = min(minm[j],approx_en)
271
                maxm[j] = max(maxm[j],approx_en)
272
                j=j+1
273
                #samp_en = samp_entropy(coef,2,0.5)
274
                #minm[j] = min(minm[j],samp_en)
275
                #maxm[j] = max(maxm[j],samp_en)
276
                #j=j+1
277
                mean = np.mean(coef)
278
                minm[j]= min(minm[j],mean)
279
                maxm[j] = max(maxm[j],mean)
280
                j=j+1
281
                std = np.std(coef)
282
                minm[j] = min(minm[j],std)
283
                maxm[j]= max(maxm[j],std)
284
                j=j+1
285
286
287
288
                features.append(energy)
289
                features.append(approx_en)
290
                #features.append(samp_en)
291
                features.append(mean)
292
                features.append(std)
293
        e.append(features)
294
    f.append(features)
295
    y_e.append(2)
296
    y_f.append(2)
297
    inputs.append(features)
298
    inputs.append(features)
299
    outputs.append("Class C")
300
    outputs.append("Class C")
301
print("E done")
302
i=0
303
while i < 100:
304
    inp.append(a[i])
305
    out.append(y_a[i])
306
    inp.append(b[i])
307
    out.append(y_b[i])
308
    inp.append(c[i])
309
    out.append(y_c[i])
310
    inp.append(d[i])
311
    out.append(y_d[i])
312
    inp.append(e[i])
313
    out.append(y_e[i])
314
    inp.append(f[i])
315
    out.append(y_f[i])
316
    i=i+1
317
318
print("done")
319
i=0
320
z= [i for i in range(1,len(e)+1)]
321
import matplotlib.pyplot as plt
322
"""i=0
323
while i < 27:
324
        j=0
325
        p=[]
326
        q=[]
327
        r=[]
328
        s=[]
329
        t=[]
330
        u=[]
331
        while j < len(e):
332
                p.append(a[j][i])
333
                q.append(b[j][i])
334
                r.append(c[j][i])
335
                s.append(d[j][i])
336
                t.append(e[j][i])
337
                j=j+1
338
        plt.plot(z,np.array(p))
339
        plt.plot(z,np.array(q))
340
        plt.plot(z,np.array(r))
341
        plt.plot(z,np.array(s))
342
        plt.plot(z,np.array(t))
343
        plt.savefig("images/"+str(i)+"_out.jpg")
344
    print("saved")
345
        plt.clf()
346
        i=i+1
347
"""
348
i=0
349
for x in inputs:
350
    j=0
351
    for y in x:
352
        den = maxm[j]-minm[j]
353
        if den ==0:
354
            den=np.amax(numpy.array(maxm)) - np.amin(numpy.array(minm))
355
        inputs[i][j] = (inputs[i][j]- minm[j])/den
356
        j=j+1
357
    i=i+1
358
i=0
359
n=len(inputs)
360
while i < n:
361
    inputs.append(inputs[i])
362
    outputs.append(outputs[i])
363
    i=i+1
364
arr = [ i for i in range(0,len(inputs))]
365
arr = np.random.permutation(arr)
366
new_inputs = []
367
new_outputs = []
368
for x in arr:
369
    new_inputs.append(inputs[x])
370
    new_outputs.append(outputs[x])
371
372
new_outputs = np.asarray(new_outputs)
373
rfc = RandomForestClassifier(n_estimators=1000)
374
rfc.fit(new_inputs,new_outputs)
375
importances = rfc.feature_importances_
376
import ipdb; ipdb.set_trace()
377
from sklearn.cross_validation import cross_val_score
378
arr = [ i for i in range(0,len(inputs))]
379
arr = np.random.permutation(arr)
380
new_inputs = []
381
new_outputs = []
382
for x in arr:
383
        new_inputs.append(inputs[x])
384
        new_outputs.append(outputs[x])
385
new_outputs = np.asarray(new_outputs)
386
print("cross validating")
387
scores = cross_val_score(rfc,new_inputs,new_outputs,cv=100)
388
print("Accuracy: %0.2f (+/-%0.2f)" %(scores.mean(),scores.std()*2))
389
#import ipdb;ipdb.set_trace()
390
inputs1 = new_inputs[0:3*(len(new_inputs)/4)]
391
outputs1 = new_outputs[0:3*(len(new_inputs)/4)]
392
inputs2 = new_inputs
393
outputs2 = new_outputs
394
outputs2 = np.asarray(outputs2)
395
rfc = RandomForestClassifier(n_estimators=400)
396
rfc.fit(inputs2,outputs2)
397
from sklearn.cross_validation import cross_val_score
398
arr = [ i for i in range(0,len(inputs1))]
399
arr = np.random.permutation(arr)
400
new_inputs = []
401
new_outputs = []
402
for x in arr:
403
        new_inputs.append(inputs1[x])
404
        new_outputs.append(outputs1[x])
405
new_outputs = np.asarray(new_outputs)
406
print("cross validating again")
407
scores = cross_val_score(rfc,new_inputs,new_outputs,cv=100)
408
test = open("results2.txt",'w')
409
for x in scores:
410
    test.write(str(x)+ "\n")
411
import ipdb; ipdb.set_trace()
412
test.close()
413
414
print("Accuracy: %0.2f (+/-%0.2f)" %(scores.mean(),scores.std()*2))
415
416
    
417
#P,Q,R,S,T,U = create_training_set(inp,out,minm,maxm)
418
#mlp(P,Q,R,S,T,U)
419