Switch to unified view

a b/lstm_kmean/find_bestckpt.py
1
import tensorflow as tf
2
import numpy as np
3
from glob import glob
4
from natsort import natsorted
5
import os
6
from model import TripleNet, train_step, test_step
7
from utils import load_complete_data
8
from tqdm import tqdm
9
from sklearn.manifold import TSNE
10
import matplotlib.pyplot as plt
11
from matplotlib import style
12
import seaborn as sns
13
import pandas as pd
14
import pickle
15
from sklearn.cluster import KMeans
16
from scipy.optimize import linear_sum_assignment as linear_assignment
17
18
style.use('seaborn')
19
20
os.environ["CUDA_DEVICE_ORDER"]= "PCI_BUS_ID"
21
os.environ["CUDA_VISIBLE_DEVICES"]= '3'
22
23
24
# Thanks to: https://github.com/k-han/DTC/blob/master/utils/util.py
25
def cluster_acc(y_true, y_pred):
26
    """
27
    Calculate clustering accuracy. Require scikit-learn installed
28
    # Arguments
29
        y: true labels, numpy.array with shape `(n_samples,)`
30
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
31
    # Return
32
        accuracy, in [0,1]
33
    """
34
    y_true = y_true.astype(np.int64)
35
    assert y_pred.size == y_true.size
36
    D = max(y_pred.max(), y_true.max()) + 1
37
    w = np.zeros((D, D), dtype=np.int64)
38
    for i in range(y_pred.size):
39
        w[y_pred[i], y_true[i]] += 1
40
    ind = linear_assignment(w.max() - w)
41
    return sum([w[i, j] for i, j in zip(*ind)]) * 1.0 / y_pred.size
42
43
if __name__ == '__main__':
44
45
    n_channels  = 14
46
    n_feat      = 128
47
    batch_size  = 256
48
    test_batch_size  = 256
49
    n_classes   = 10
50
51
    # data_cls = natsorted(glob('data/thoughtviz_eeg_data/*'))
52
    # cls2idx  = {key.split(os.path.sep)[-1]:idx for idx, key in enumerate(data_cls, start=0)}
53
    # idx2cls  = {value:key for key, value in cls2idx.items()}
54
55
    with open('../../data/b2i_data/eeg/image/data.pkl', 'rb') as file:
56
        data = pickle.load(file, encoding='latin1')
57
        train_X = data['x_train']
58
        train_Y = data['y_train']
59
        test_X = data['x_test']
60
        test_Y = data['y_test']
61
62
63
    # train_batch = load_complete_data('data/thoughtviz_eeg_data/*/train/*', batch_size=batch_size)
64
    # val_batch   = load_complete_data('data/thoughtviz_eeg_data/*/val/*', batch_size=batch_size)
65
    # test_batch  = load_complete_data('data/thoughtviz_eeg_data/*/test/*', batch_size=test_batch_size)
66
    train_batch = load_complete_data(train_X, train_Y, batch_size=batch_size)
67
    val_batch   = load_complete_data(test_X, test_Y, batch_size=batch_size)
68
    test_batch  = load_complete_data(test_X, test_Y, batch_size=test_batch_size)
69
    # X, Y = next(iter(train_batch))
70
    # print(X.shape, Y.shape)
71
    triplenet = TripleNet(n_classes=n_classes)  
72
    opt     = tf.keras.optimizers.Adam(learning_rate=3e-4)
73
    triplenet_ckpt    = tf.train.Checkpoint(step=tf.Variable(1), model=triplenet, optimizer=opt)
74
    triplenet_ckptman = tf.train.CheckpointManager(triplenet_ckpt, directory='experiments/best_ckpt', max_to_keep=5000)
75
76
    best_ckpt_file = ''
77
    best_ckpt_acc  = 1e-15
78
79
    for ckpt_file in tqdm(triplenet_ckptman.checkpoints):
80
81
        triplenet_ckpt.restore(ckpt_file)
82
83
        test_loss = tf.keras.metrics.Mean()
84
        test_acc  = tf.keras.metrics.SparseCategoricalAccuracy()
85
        tq = tqdm(test_batch)
86
        feat_X  = np.array([])
87
        feat_Y  = np.array([])
88
        for idx, (X, Y) in enumerate(tq, start=1):
89
            _, feat = triplenet(X, training=False)
90
            feat_X = np.concatenate((feat_X, feat.numpy()), axis=0) if feat_X.size else feat.numpy()
91
            feat_Y = np.concatenate((feat_Y, Y.numpy()), axis=0) if feat_Y.size else Y.numpy()
92
93
        # feat_X = np.array(feat_X)
94
        # feat_Y = np.array(feat_Y)
95
        # print(feat_X.shape, feat_Y.shape)
96
        # colors = list(plt.cm.get_cmap('viridis', 10))
97
        # print(colors)
98
        # colors  = [np.random.rand(3,) for _ in range(10)]
99
        # print(colors)
100
        # Y_color = [colors[label] for label in feat_Y]
101
102
        # tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=700)
103
        # tsne_results = tsne.fit_transform(feat_X)
104
        # df = pd.DataFrame()
105
        # df['label'] = feat_Y
106
        # df['x1'] = tsne_results[:, 0]
107
        # df['x2'] = tsne_results[:, 1]
108
        # # df['x3'] = tsne_results[:, 2]
109
        # df.to_csv('experiments/infer_triplet_embed2D.csv')    
110
        # # df.to_csv('experiments/triplenet_embed3D.csv')
111
        # # df = pd.read_csv('experiments/triplenet_embed2D.csv')
112
        
113
        # df = pd.read_csv('experiments/infer_triplet_embed2D.csv')
114
115
        # plt.figure(figsize=(16,10))
116
        
117
        # # ax = plt.axes(projection='3d')
118
        # sns.scatterplot(
119
        #     x="x1", y="x2",
120
        #     data=df,
121
        #     hue='label',
122
        #     palette=sns.color_palette("hls", n_classes),
123
        #     legend="full",
124
        #     alpha=0.4
125
        # )
126
        # # ax.scatter3D(df['x1'], df['x2'], df['x3'], c=df['x3'], alpha=0.4)
127
        # # plt.scatter(df['x1'], df['x2'], c=df['x2'], alpha=0.4)
128
        # # min_x, max_x = np.min(feat_X), np.max(feat_X)
129
        # # min_x, max_x = -10, 10
130
131
        # # for c in range(len(np.unique(feat_Y))):
132
        # #     # ax.scatter(feat_X[feat_Y==c, 0], feat_X[feat_Y==c, 1], feat_X[feat_Y==c, 2], '.', alpha=0.5, c=colors[c], s=0.3)
133
        # #     plt.scatter(feat_X[feat_Y==c, 0], feat_X[feat_Y==c, 1], marker='.', alpha=0.5, c=colors[c], s=1.0)
134
        # # plt.title('Triple Loss')
135
136
        # # W = triplenet.cls_layer.get_weights()[0].T
137
138
        # # x = np.linspace(min_x, max_x, 50)
139
        # # y = W[0][1]*x + W[0][0]
140
        # # plt.plot(x, y, c=colors[0])
141
142
        # # x = np.linspace(min_x, max_x, 50)
143
        # # y = W[1][1]*x + W[1][0]
144
        # # plt.plot(x, y, c=colors[1])
145
146
        # # x = np.linspace(min_x, max_x, 50)
147
        # # y = W[2][1]*x + W[2][0]
148
        # # plt.plot(x, y, c=colors[2])
149
150
        # # x = np.linspace(min_x, max_x, 50)
151
        # # y = W[3][1]*x + W[3][0]
152
        # # plt.plot(x, y, c=colors[3])
153
154
        # # x = np.linspace(min_x, max_x, 50)
155
        # # y = W[4][1]*x + W[4][0]
156
        # # plt.plot(x, y, c=colors[4])
157
        
158
        # # plt.savefig('experiments/embedding.png')
159
160
        # plt.show()
161
162
        # # plt.clf()
163
        # # plt.close()
164
        # # featX = df[['x1', 'x2']].to_numpy()
165
        # # print(featX.shape)
166
167
        kmeans = KMeans(n_clusters=n_classes,random_state=45)
168
        kmeans.fit(feat_X)
169
        labels = kmeans.labels_
170
        kmeanacc = cluster_acc(feat_Y, labels)
171
        # correct_labels = sum(feat_Y == labels)
172
        # print("Result: %d out of %d samples were correctly labeled." % (correct_labels, feat_Y.shape[0]))
173
        # kmeanacc = correct_labels/float(feat_Y.shape[0])
174
        if best_ckpt_acc < kmeanacc:
175
            best_ckpt_acc = kmeanacc
176
            best_ckpt_file = ckpt_file
177
        
178
        print('Checkpoint file: {}'.format(ckpt_file))
179
        print('Checkpoint test acc: {}'.format(kmeanacc))
180
181
    print('\n===============================================')
182
    print('Best acc file: {}'.format(best_ckpt_file))
183
    print('Best acc: {}'.format(best_ckpt_acc))
184
    print('===============================================\n')