Diff of /lstm_kmean/inference.py [000000] .. [277df6]

Switch to side-by-side view

--- a
+++ b/lstm_kmean/inference.py
@@ -0,0 +1,161 @@
+import tensorflow as tf
+import numpy as np
+from glob import glob
+from natsort import natsorted
+import os
+from model import TripleNet, train_step, test_step
+from utils import load_complete_data
+from tqdm import tqdm
+from sklearn.manifold import TSNE
+import matplotlib.pyplot as plt
+from matplotlib import style
+import seaborn as sns
+import pandas as pd
+import pickle
+from sklearn.cluster import KMeans
+from scipy.optimize import linear_sum_assignment as linear_assignment
+
+style.use('seaborn')
+
+os.environ["CUDA_DEVICE_ORDER"]= "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"]= '0'
+
+
+# Thanks to: https://github.com/k-han/DTC/blob/master/utils/util.py
+def cluster_acc(y_true, y_pred):
+    """
+    Calculate clustering accuracy. Require scikit-learn installed
+    # Arguments
+        y: true labels, numpy.array with shape `(n_samples,)`
+        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
+    # Return
+        accuracy, in [0,1]
+    """
+    y_true = y_true.astype(np.int64)
+    assert y_pred.size == y_true.size
+    D = max(y_pred.max(), y_true.max()) + 1
+    w = np.zeros((D, D), dtype=np.int64)
+    for i in range(y_pred.size):
+        w[y_pred[i], y_true[i]] += 1
+    ind = linear_assignment(w.max() - w)
+    return sum([w[i, j] for i, j in zip(*ind)]) * 1.0 / y_pred.size
+
+if __name__ == '__main__':
+
+	n_channels  = 14
+	n_feat      = 128
+	batch_size  = 256
+	test_batch_size  = 256
+	n_classes   = 10
+
+	# data_cls = natsorted(glob('data/thoughtviz_eeg_data/*'))
+	# cls2idx  = {key.split(os.path.sep)[-1]:idx for idx, key in enumerate(data_cls, start=0)}
+	# idx2cls  = {value:key for key, value in cls2idx.items()}
+
+	with open('../../data/b2i_data/eeg/image/data.pkl', 'rb') as file:
+		data = pickle.load(file, encoding='latin1')
+		train_X = data['x_train']
+		train_Y = data['y_train']
+		test_X = data['x_test']
+		test_Y = data['y_test']
+
+
+	# train_batch = load_complete_data('data/thoughtviz_eeg_data/*/train/*', batch_size=batch_size)
+	# val_batch   = load_complete_data('data/thoughtviz_eeg_data/*/val/*', batch_size=batch_size)
+	# test_batch  = load_complete_data('data/thoughtviz_eeg_data/*/test/*', batch_size=test_batch_size)
+	train_batch = load_complete_data(train_X, train_Y, batch_size=batch_size)
+	val_batch   = load_complete_data(test_X, test_Y, batch_size=batch_size)
+	test_batch  = load_complete_data(test_X, test_Y, batch_size=test_batch_size)
+	# X, Y = next(iter(train_batch))
+	# print(X.shape, Y.shape)
+	triplenet = TripleNet(n_classes=n_classes)
+	opt     = tf.keras.optimizers.Adam(learning_rate=3e-4)
+	triplenet_ckpt    = tf.train.Checkpoint(step=tf.Variable(1), model=triplenet, optimizer=opt)
+	triplenet_ckpt.restore('experiments/best_ckpt/ckpt-89')
+
+	tq = tqdm(test_batch)
+	feat_X  = np.array([])
+	feat_Y  = np.array([])
+	for idx, (X, Y) in enumerate(tq, start=1):
+		_, feat = triplenet(X, training=False)
+		feat_X = np.concatenate((feat_X, feat.numpy()), axis=0) if feat_X.size else feat.numpy()
+		feat_Y = np.concatenate((feat_Y, Y.numpy()), axis=0) if feat_Y.size else Y.numpy()
+
+	print(feat_X.shape, feat_Y.shape)
+	# colors = list(plt.cm.get_cmap('viridis', 10))
+	# print(colors)
+	# colors  = [np.random.rand(3,) for _ in range(10)]
+	# print(colors)
+	# Y_color = [colors[label] for label in feat_Y]
+	kmeans = KMeans(n_clusters=n_classes,random_state=45)
+	kmeans.fit(feat_X)
+	labels = kmeans.labels_
+	kmeanacc = cluster_acc(feat_Y, labels)
+	# correct_labels = sum(feat_Y == labels)
+	# print("Result: %d out of %d samples were correctly labeled." % (correct_labels, feat_Y.shape[0]))
+	# kmeanacc = correct_labels/float(feat_Y.shape[0])
+	print('Accuracy score: {0:0.2f}'. format(kmeanacc))
+
+	tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=700)
+	tsne_results = tsne.fit_transform(feat_X)
+	df = pd.DataFrame()
+	df['label'] = feat_Y
+	df['x1'] = tsne_results[:, 0]
+	df['x2'] = tsne_results[:, 1]
+	# df['x3'] = tsne_results[:, 2]
+	df.to_csv('experiments/infer_triplet_embed2D.csv')	
+	# df.to_csv('experiments/triplenet_embed3D.csv')
+	# df = pd.read_csv('experiments/triplenet_embed2D.csv')
+	
+	df = pd.read_csv('experiments/infer_triplet_embed2D.csv')
+
+	plt.figure(figsize=(16,10))
+	
+	# ax = plt.axes(projection='3d')
+	sns.scatterplot(
+	    x="x1", y="x2",
+	    data=df,
+	    hue='label',
+	    palette=sns.color_palette("hls", n_classes),
+	    legend="full",
+	    alpha=0.4
+	)
+	# ax.scatter3D(df['x1'], df['x2'], df['x3'], c=df['x3'], alpha=0.4)
+	# plt.scatter(df['x1'], df['x2'], c=df['x2'], alpha=0.4)
+	# min_x, max_x = np.min(feat_X), np.max(feat_X)
+	# min_x, max_x = -10, 10
+
+	# for c in range(len(np.unique(feat_Y))):
+	# 	# ax.scatter(feat_X[feat_Y==c, 0], feat_X[feat_Y==c, 1], feat_X[feat_Y==c, 2], '.', alpha=0.5, c=colors[c], s=0.3)
+	# 	plt.scatter(feat_X[feat_Y==c, 0], feat_X[feat_Y==c, 1], marker='.', alpha=0.5, c=colors[c], s=1.0)
+	# plt.title('Triple Loss')
+
+	# W = triplenet.cls_layer.get_weights()[0].T
+
+	# x = np.linspace(min_x, max_x, 50)
+	# y = W[0][1]*x + W[0][0]
+	# plt.plot(x, y, c=colors[0])
+
+	# x = np.linspace(min_x, max_x, 50)
+	# y = W[1][1]*x + W[1][0]
+	# plt.plot(x, y, c=colors[1])
+
+	# x = np.linspace(min_x, max_x, 50)
+	# y = W[2][1]*x + W[2][0]
+	# plt.plot(x, y, c=colors[2])
+
+	# x = np.linspace(min_x, max_x, 50)
+	# y = W[3][1]*x + W[3][0]
+	# plt.plot(x, y, c=colors[3])
+
+	# x = np.linspace(min_x, max_x, 50)
+	# y = W[4][1]*x + W[4][0]
+	# plt.plot(x, y, c=colors[4])
+	# plt.clf()
+	# plt.close()
+	# featX = df[['x1', 'x2']].to_numpy()
+	# print(featX.shape)
+
+	plt.title('k-means accuracy: {}%'.format(kmeanacc*100))
+	plt.savefig('experiments/embedding.png')
+	# plt.show()
\ No newline at end of file