369 lines (368 with data), 12.8 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What this notebook is for\n",
"\n",
"This notebook aids the process of evaluating accuracy measures and predicting class outputs of already trained models."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import h5py as h5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Reading data\n",
"\n",
"Throughout this project we've used these functions in order to read the processed data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def make_dimensions_compatible(arr):\n",
" \n",
" return arr.reshape(arr.shape[0],-1,1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<blockquote>\n",
" <b>Note:</b> We're using Python <b>global variables</b> in the load_dataset method. They'll be available throughout this notebook.\n",
" </blockquote>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<blockquote>\n",
" <b>Important:</b> This method uses default folder names, prefixes and suffixes (herd-coded) as chosen by the author. If you've not changed anything these names in other files, it should work without any issues.\n",
"</blockquote>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def load_dataset(dataset_iter=1, window_size=512):\n",
" global X_train, Y_train, X_dev, Y_dev, X_test, Y_test\n",
" \n",
" dataset_relative_path = 'dataset/random-iter-%d/' % dataset_iter\n",
" \n",
" datafile = dataset_relative_path + 'datafile%d.h5' % window_size\n",
"\n",
" with h5.File(datafile, 'r') as datafile:\n",
" X_train = np.array(datafile['X_train'])\n",
" Y_train = np.array(datafile['Y_train'])\n",
"\n",
" X_dev = np.array(datafile['X_dev'])\n",
" Y_dev = np.array(datafile['Y_dev'])\n",
"\n",
" X_test = np.array(datafile['X_test'])\n",
" Y_test = np.array(datafile['Y_test'])\n",
" \n",
" # setting the rank of the data to be compatible with 1d convolution functions\n",
" # defined in tensorflow\n",
" X_train = make_dimensions_compatible(X_train)\n",
" X_dev = make_dimensions_compatible(X_dev)\n",
" X_test = make_dimensions_compatible(X_test)\n",
" \n",
" # normalization\n",
" X_train = X_train / 1000\n",
" X_dev = X_dev / 1000\n",
" X_test = X_test / 1000"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_session_path(dataset_iter=1, window_size=512, model_num=1, model_prefix='cnn', model_suffix='_lr-0.00002_mbs-128'):\n",
" return ('train/dataset-%d-%d/' + model_prefix + '%d' + model_suffix + '/') % (window_size, dataset_iter, model_num)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prediction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def predict(X_test, session_path, model_file, Y_test_onehot=None):\n",
"\n",
" tf.reset_default_graph()\n",
"\n",
" checkpoint_path = session_path\n",
" model_path = session_path + model_file\n",
"\n",
" with tf.Session() as sess:\n",
" loader = tf.train.import_meta_graph(model_path)\n",
" loader.restore(sess, tf.train.latest_checkpoint(checkpoint_path))\n",
"\n",
" graph = tf.get_default_graph()\n",
"\n",
" X = graph.get_tensor_by_name('X:0')\n",
" Y = graph.get_tensor_by_name('Y:0')\n",
" is_train = graph.get_tensor_by_name('is_train:0')\n",
" \n",
"# epoch_counter = graph.get_tensor_by_name('epoch_counter:0')\n",
"# print(epoch_counter.eval())\n",
"\n",
" Y_hat = graph.get_tensor_by_name('softmax_output:0')\n",
"\n",
" predict_op = tf.argmax(Y_hat, 1)\n",
"\n",
" y_hat_test = predict_op.eval({X: X_test, is_train: False})\n",
" \n",
" # print the accuracy of the test set if the labels are provided\n",
" if (Y_test_onehot is not None):\n",
" y_test = np.argmax(Y_test_onehot, 1)\n",
" acc = (y_hat_test == y_test).mean()\n",
" \n",
"\n",
" return y_hat_test, acc\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prediction with majority voting on ensemble network"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def predict_voting(X_test_voting, session_path, model_file):\n",
"\n",
" tf.reset_default_graph()\n",
"\n",
" checkpoint_path = session_path\n",
" model_path = session_path + model_file\n",
" \n",
" y_hat_test_voting = []\n",
"\n",
" with tf.Session() as sess:\n",
" loader = tf.train.import_meta_graph(model_path)\n",
" loader.restore(sess, tf.train.latest_checkpoint(checkpoint_path))\n",
"\n",
" graph = tf.get_default_graph()\n",
"\n",
" X = graph.get_tensor_by_name('X:0')\n",
" is_train = graph.get_tensor_by_name('is_train:0')\n",
"\n",
" Y_hat = graph.get_tensor_by_name('softmax_output:0')\n",
"\n",
" predict_op = tf.argmax(Y_hat, 1)\n",
" \n",
" classname, idx, counts = tf.unique_with_counts(predict_op)\n",
" predict_voting_op = tf.gather(classname, tf.argmax(counts))\n",
"\n",
" # no. of training examples with the original feature size\n",
" m = X_test_voting.shape[0]\n",
" \n",
" # no. of split training examples of each original example\n",
" m_each = X_test_voting.shape[1]\n",
" \n",
" for ex in range(m):\n",
" x_test_voting = make_dimensions_compatible(X_test_voting[ex])\n",
" pred = predict_voting_op.eval({X: x_test_voting, is_train: False})\n",
" \n",
" y_hat_test_voting.append(pred)\n",
"\n",
" return y_hat_test_voting"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accuracy measure\n",
"\n",
"Change the values of `model_num`, `window_size`, and `dataset_iters` to test on the different models (cnn1, cnn2, and so on... as described in the literature) you've trained on. If you've generated multiple datasets for k-fold cross validation use `dataset_iters`. In the literature, we've used window sizes of 512 and 1024."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# Run the following code to call the above define predict method and check accuracy of the models\n",
"\n",
"model_num = 8\n",
"window_size = 1024\n",
"dataset_iters = (1, 2, 3, 4, 5, 6, 7, 8)\n",
"\n",
"accuracies = np.array([])\n",
"\n",
"for dataset_iter in dataset_iters:\n",
" load_dataset(dataset_iter=dataset_iter, window_size=window_size)\n",
" predictions, acc = predict(X_test, get_session_path(dataset_iter=dataset_iter, window_size=window_size, model_num=model_num), 'model.meta', Y_test_onehot=Y_test)\n",
" accuracies = np.append(accuracies, acc)\n",
"\n",
"accuracies = accuracies * 100\n",
"print(accuracies)\n",
"\n",
"print(\"Mean accuracy: %f\" % accuracies.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Single model accuracy with voting measure"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<blockquote>\n",
" <b>Note:</b> This method uses default folder names and suffixes chosen by the author.\n",
"</blockquote>\n",
"\n",
"Calculate accuracy with voting, F-score, and other relevant measures. Change the values of `model_num`, `window_size`, and `dataset_iters` to try on different models and random permuations (random-iter-) of dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_num = 8\n",
"window_size = 1024\n",
"dataset_iters = (1, 2, 3, 4, 5, 6, 7, 8)\n",
"\n",
"accuracies_voting = np.array([])\n",
"sensitivities_voting = np.array([])\n",
"specificities_voting = np.array([])\n",
"precisions_voting = np.array([])\n",
"\n",
"for dataset_iter in dataset_iters:\n",
" dataset_relative_path = 'dataset/random-iter-%d/' % dataset_iter\n",
" testfile = (dataset_relative_path + 'testset_voting_%d.h5') % window_size\n",
" session_path = get_session_path(dataset_iter=dataset_iter, window_size=window_size, model_num=model_num)\n",
" model_file = 'model.meta'\n",
"\n",
" with h5.File(testfile, 'r') as testfile:\n",
" X_test_voting = testfile['X']\n",
" X_test_voting = np.array(X_test_voting) / 1000\n",
" y_test_voting = np.array(testfile['Y'])\n",
" \n",
" no_of_classes = 3\n",
"\n",
" y_hat_test_voting = np.array(predict_voting(X_test_voting, session_path, model_file))\n",
"\n",
" acc_voting_model = (y_test_voting == y_hat_test_voting).mean()\n",
" accuracies_voting = np.append(accuracies_voting, acc_voting_model)\n",
" \n",
" # specificity and sensitivity\n",
" specificity = 0\n",
" sensitivity = 0\n",
" precision = 0\n",
" for i in range(no_of_classes):\n",
" specificity_vector = ((y_hat_test_voting != i) == (y_test_voting != i))[y_test_voting != i]\n",
" sensitivity_vector = ((y_hat_test_voting == i) == (y_test_voting == i))[y_test_voting == i]\n",
" precision_vector = ((y_hat_test_voting == i) == (y_test_voting == i))[y_hat_test_voting == i]\n",
" \n",
" specificity = specificity + specificity_vector.sum() / len(specificity_vector)\n",
" sensitivity = sensitivity + sensitivity_vector.sum() / len(sensitivity_vector)\n",
" precision = precision + precision_vector.sum() / len(precision_vector)\n",
" \n",
" specificity = specificity / no_of_classes\n",
" sensitivity = sensitivity / no_of_classes\n",
" precision = precision / no_of_classes\n",
" \n",
" specificities_voting = np.append(specificities_voting, specificity)\n",
" sensitivities_voting = np.append(sensitivities_voting, sensitivity)\n",
" precisions_voting = np.append(precisions_voting, precision)\n",
"\n",
"\n",
"# Computing F-Score\n",
"Pre = precisions_voting.mean()\n",
"Sen = sensitivities_voting.mean()\n",
"F_Score = (2 * Pre * Sen) / (Pre + Sen)\n",
" \n",
"accuracies_voting = accuracies_voting * 100\n",
"print(\"Accuracy with voting: \", accuracies_voting)\n",
"print(\"Mean accuracy with voting: %f\" % accuracies_voting.mean())\n",
"print(\"\\n\\n\")\n",
"print(\"Specifities: \", specificities_voting)\n",
"print(\"Mean specificity with voting: %f\" % specificities_voting.mean())\n",
"print(\"\\n\\n\")\n",
"print(\"Sensitivities: \", sensitivities_voting)\n",
"print(\"Mean sensitivity with voting: %f\" % sensitivities_voting.mean())\n",
"# print(\"Mean precision with voting: %f\" % Pre)\n",
"print(\"Mean F-Score with voting: %f\" % F_Score)\n",
"print(\"Standard deviation: %f\" % accuracies_voting.std())\n",
"\n",
"# print(\"Actual labels:\\n\", y_test_voting)\n",
"# print(\"\\n\\nPredictions:\\n\", y_hat_test_voting)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}