[d395cf]: / SWELL-KW / SWELL-KW_Scores.ipynb

Download this file

587 lines (586 with data), 18.2 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SWELL-KW Scores"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Adapted from Microsoft's notebooks, available at https://github.com/microsoft/EdgeML authored by Dennis et al."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:51:58.751435Z",
     "start_time": "2019-04-30T09:51:57.442626Z"
    }
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "import os\n",
    "import sys\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "# To include edgeml in python path\n",
    "sys.path.insert(0, '../../')\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
    "\n",
    "# MI-RNN and EMI-RNN imports\n",
    "from edgeml.graph.rnn import EMI_DataPipeline\n",
    "from edgeml.graph.rnn import EMI_FastGRNN,EMI_FastRNN\n",
    "from edgeml.trainer.emirnnTrainer import EMI_Trainer, EMI_Driver\n",
    "import edgeml.utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-28T09:37:09.981820Z",
     "start_time": "2019-07-28T09:37:09.975515Z"
    }
   },
   "outputs": [],
   "source": [
    "NUM_HIDDEN = 128\n",
    "NUM_TIMESTEPS = 30\n",
    "NUM_FEATS = 22\n",
    "FORGET_BIAS = 1.0\n",
    "NUM_OUTPUT = 3\n",
    "USE_DROPOUT = 0\n",
    "\n",
    "KEEP_PROB = 0.9\n",
    "UPDATE_NL = \"quantTanh\"\n",
    "GATE_NL = \"quantSigm\"\n",
    "WRANK = 5\n",
    "URANK = 6\n",
    "PREFETCH_NUM = 5\n",
    "BATCH_SIZE = 32\n",
    "NUM_EPOCHS = 50\n",
    "NUM_ITER = 4\n",
    "NUM_ROUNDS = 2\n",
    "\n",
    "# A staging direcory to store models\n",
    "MODEL_PREFIX = '/home/sf/data/SWELL-KW/FGModels_30_10/'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loading Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:00.022110Z",
     "start_time": "2019-04-30T09:51:59.925101Z"
    }
   },
   "outputs": [],
   "source": [
    "path=\"/home/sf/data/SWELL-KW/30_10/\"\n",
    "x_train, y_train = np.load(path + 'x_train.npy'), np.load(path + 'y_train.npy')\n",
    "x_test, y_test = np.load(path + 'x_test.npy'), np.load(path + 'y_test.npy')\n",
    "x_val, y_val = np.load(path + 'x_val.npy'), np.load(path + 'y_val.npy')\n",
    "\n",
    "# BAG_TEST, BAG_TRAIN, BAG_VAL represent bag_level labels. These are used for the label update\n",
    "# step of EMI/MI RNN\n",
    "BAG_TEST = np.argmax(y_test[:, 0, :], axis=1)\n",
    "BAG_TRAIN = np.argmax(y_train[:, 0, :], axis=1)\n",
    "BAG_VAL = np.argmax(y_val[:, 0, :], axis=1)\n",
    "NUM_SUBINSTANCE = x_train.shape[1]\n",
    "print(\"x_train shape is:\", x_train.shape)\n",
    "print(\"y_train shape is:\", y_train.shape)\n",
    "print(\"x_test shape is:\", x_val.shape)\n",
    "print(\"y_test shape is:\", y_val.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:00.604049Z",
     "start_time": "2019-04-30T09:52:00.589634Z"
    }
   },
   "outputs": [],
   "source": [
    "# Define the linear secondary classifier\n",
    "def createExtendedGraph(self, baseOutput, *args, **kwargs):\n",
    "    W1 = tf.Variable(np.random.normal(size=[NUM_HIDDEN, NUM_OUTPUT]).astype('float32'), name='W1')\n",
    "    B1 = tf.Variable(np.random.normal(size=[NUM_OUTPUT]).astype('float32'), name='B1')\n",
    "    y_cap = tf.add(tf.tensordot(baseOutput, W1, axes=1), B1, name='y_cap_tata')\n",
    "    self.output = y_cap\n",
    "    self.graphCreated = True\n",
    "    \n",
    "def addExtendedAssignOps(self, graph, W_val=None, B_val=None):\n",
    "    W1 = graph.get_tensor_by_name('W1:0')\n",
    "    B1 = graph.get_tensor_by_name('B1:0')\n",
    "    W1_op = tf.assign(W1, W_val)\n",
    "    B1_op = tf.assign(B1, B_val)\n",
    "    self.assignOps.extend([W1_op, B1_op])\n",
    "\n",
    "def restoreExtendedGraph(self, graph, *args, **kwargs):\n",
    "    y_cap = graph.get_tensor_by_name('y_cap_tata:0')\n",
    "    self.output = y_cap\n",
    "    self.graphCreated = True\n",
    "    \n",
    "def feedDictFunc(self, keep_prob, **kwargs):\n",
    "    feedDict = {self._emiGraph.keep_prob: keep_prob}\n",
    "    return feedDict\n",
    "    \n",
    "EMI_FastGRNN._createExtendedGraph = createExtendedGraph\n",
    "EMI_FastGRNN._restoreExtendedGraph = restoreExtendedGraph\n",
    "EMI_FastGRNN.addExtendedAssignOps = addExtendedAssignOps\n",
    "\n",
    "def earlyPolicy_minProb(instanceOut, minProb, **kwargs):\n",
    "    assert instanceOut.ndim == 2\n",
    "    classes = np.argmax(instanceOut, axis=1)\n",
    "    prob = np.max(instanceOut, axis=1)\n",
    "    index = np.where(prob >= minProb)[0]\n",
    "    if len(index) == 0:\n",
    "        assert (len(instanceOut) - 1) == (len(classes) - 1)\n",
    "        return classes[-1], len(instanceOut) - 1\n",
    "    index = index[0]\n",
    "    return classes[index], index\n",
    "\n",
    "\n",
    "if USE_DROPOUT is True:\n",
    "    EMI_Driver.feedDictFunc = feedDictFunc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T09:34:06.288012Z",
     "start_time": "2018-08-19T09:34:06.285286Z"
    }
   },
   "source": [
    "## 1. Initializing a New Computation Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:10.701762Z",
     "start_time": "2019-04-30T09:52:02.074816Z"
    }
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT)\n",
    "emiLSTM = EMI_FastGRNN(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS, wRank=WRANK, uRank=URANK, \n",
    "                              gate_non_linearity=GATE_NL, update_non_linearity=UPDATE_NL, useDropout=USE_DROPOUT)\n",
    "emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')\n",
    "\n",
    "# Construct the graph\n",
    "g1 = tf.Graph()    \n",
    "with g1.as_default():\n",
    "    x_batch, y_batch = inputPipeline()\n",
    "    y_cap = emiLSTM(x_batch)\n",
    "    emiTrainer(y_cap, y_batch)\n",
    "    \n",
    "with g1.as_default():\n",
    "    emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-25T06:25:20.220063Z",
     "start_time": "2019-04-25T06:25:19.987538Z"
    }
   },
   "outputs": [],
   "source": [
    "emiDriver.initializeSession(g1)\n",
    "#y_updated, modelStats = emiDriver.run(numClasses=NUM_OUTPUT, x_train=x_train,\n",
    "#                                      y_train=y_train, bag_train=BAG_TRAIN,\n",
    "#                                      x_val=x_val, y_val=y_val, bag_val=BAG_VAL,\n",
    "#                                      numIter=NUM_ITER, keep_prob=KEEP_PROB,\n",
    "#                                      numRounds=NUM_ROUNDS, batchSize=BATCH_SIZE,\n",
    "#                                      numEpochs=NUM_EPOCHS, modelPrefix=MODEL_PREFIX,\n",
    "#                                      fracEMI=0.5, updatePolicy='top-k', k=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T11:48:33.294431Z",
     "start_time": "2018-08-19T11:48:32.897376Z"
    }
   },
   "outputs": [],
   "source": [
    "def earlyPolicy_minProb(instanceOut, minProb, **kwargs):\n",
    "    assert instanceOut.ndim == 2\n",
    "    classes = np.argmax(instanceOut, axis=1)\n",
    "    prob = np.max(instanceOut, axis=1)\n",
    "    index = np.where(prob >= minProb)[0]\n",
    "    if len(index) == 0:\n",
    "        assert (len(instanceOut) - 1) == (len(classes) - 1)\n",
    "        return classes[-1], len(instanceOut) - 1\n",
    "    index = index[0]\n",
    "    return classes[index], index\n",
    "\n",
    "emiDriver.initializeSession(g1)\n",
    "\n",
    "k = 2\n",
    "predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,\n",
    "                                                               minProb=0.99, keep_prob=1.0)\n",
    "bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)\n",
    "print('Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Loading a Saved Graph into EMI-Driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-25T06:24:09.713351Z",
     "start_time": "2019-04-25T06:24:09.638610Z"
    }
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "emiDriver.initializeSession(g1)\n",
    "emiDriver.loadSavedGraphToNewSession(MODEL_PREFIX , 1039)\n",
    "k = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-25T06:24:09.713351Z",
     "start_time": "2019-04-25T06:24:09.638610Z"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "predictions, predictionStep = emiDriver.getInstancePredictions(x_test[:64], y_test[:64], earlyPolicy_minProb,\n",
    "                                                               minProb=0.99, keep_prob=1.0)\n",
    "bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-25T06:24:09.713351Z",
     "start_time": "2019-04-25T06:24:09.638610Z"
    }
   },
   "outputs": [],
   "source": [
    "print('Accuracy at k = %d: %f' % (k,  np.mean(int(bagPredictions == BAG_TEST))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Initializing using a Saved Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-24T12:07:09.616748Z",
     "start_time": "2019-04-24T12:07:09.596906Z"
    }
   },
   "outputs": [],
   "source": [
    "# Making sure the old graph and sessions are closed\n",
    "sess = emiDriver.getCurrentSession()\n",
    "sess.close()\n",
    "tf.reset_default_graph()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:19.568739Z",
     "start_time": "2019-04-30T09:52:10.703663Z"
    }
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "sess = tf.Session()\n",
    "graphManager = edgeml.utils.GraphManager()\n",
    "graph = graphManager.loadCheckpoint(sess, MODEL_PREFIX, globalStep=1004)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:20.570380Z",
     "start_time": "2019-04-30T09:52:19.571022Z"
    }
   },
   "outputs": [],
   "source": [
    "inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT, graph=graph)\n",
    "emiLSTM = EMI_FastGRNN(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS, wRank=WRANK, uRank=URANK, \n",
    "                               gate_non_linearity=GATE_NL, update_non_linearity=UPDATE_NL, useDropout=USE_DROPOUT)\n",
    "emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy', graph=graph)\n",
    "\n",
    "g1 = graph\n",
    "with g1.as_default():\n",
    "    x_batch, y_batch = inputPipeline()\n",
    "    y_cap = emiLSTM(x_batch)\n",
    "    emiTrainer(y_cap, y_batch)\n",
    "    \n",
    "with g1.as_default():\n",
    "    emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:20.574716Z",
     "start_time": "2019-04-30T09:52:20.572193Z"
    }
   },
   "outputs": [],
   "source": [
    "emiDriver.setSession(sess)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:34.913965Z",
     "start_time": "2019-04-30T09:52:32.795936Z"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "# tf.reset_default_graph()\n",
    "# emiDriver.initializeSession(g1)\n",
    "# emiDriver.loadSavedGraphToNewSession(MODEL_PREFIX, 1007)\n",
    "k = 1\n",
    "predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,\n",
    "                                                            minProb=0.99, keep_prob=1.0)\n",
    "bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)\n",
    "print('Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-04-30T09:52:42.334950Z",
     "start_time": "2019-04-30T09:52:42.318103Z"
    }
   },
   "outputs": [],
   "source": [
    "x_test.shape, bagPredictions.shape,y_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Restoring from Numpy Matrices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T11:48:44.379901Z",
     "start_time": "2018-08-19T11:48:44.326706Z"
    }
   },
   "outputs": [],
   "source": [
    "graph = tf.get_default_graph()\n",
    "W1 = graph.get_tensor_by_name('W1:0')\n",
    "B1 = graph.get_tensor_by_name('B1:0')\n",
    "allVars = emiLSTM.varList + [W1, B1]\n",
    "sess = emiDriver.getCurrentSession()\n",
    "allVars = sess.run(allVars)\n",
    "\n",
    "base = '/tmp/models/'\n",
    "np.save(base + 'kernel.npy', allVars[0])\n",
    "np.save(base + 'bias.npy', allVars[1])\n",
    "np.save(base + 'W1.npy', allVars[2])\n",
    "np.save(base + 'B1.npy', allVars[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T11:48:44.389724Z",
     "start_time": "2018-08-19T11:48:44.381802Z"
    }
   },
   "outputs": [],
   "source": [
    "sess = emiDriver.getCurrentSession()\n",
    "sess.close()\n",
    "tf.reset_default_graph()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T11:48:44.442241Z",
     "start_time": "2018-08-19T11:48:44.391384Z"
    }
   },
   "outputs": [],
   "source": [
    "base = '/home/iot/Documents/EdgeML-master/tf/examples/EMI-RNN/GRNN model'\n",
    "kernel = np.load(base + 'kernel.npy')\n",
    "bias = np.load(base + 'bias.npy')\n",
    "W = np.load(base + 'W1.npy')\n",
    "B = np.load(base + 'B1.npy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T11:48:51.378377Z",
     "start_time": "2018-08-19T11:48:44.444182Z"
    }
   },
   "outputs": [],
   "source": [
    "inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS,\n",
    "                                 NUM_OUTPUT)\n",
    "emiLSTM = EMI_Fast(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS,\n",
    "                        forgetBias=FORGET_BIAS, useDropout=USE_DROPOUT)\n",
    "emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')\n",
    "\n",
    "tf.reset_default_graph()\n",
    "graph = tf.Graph()\n",
    "\n",
    "with graph.as_default():\n",
    "    x_batch, y_batch = inputPipeline()\n",
    "    y_cap = emiLSTM(x_batch)\n",
    "    emiTrainer(y_cap, y_batch)\n",
    "    # Add the assignment operations\n",
    "    emiLSTM.addBaseAssignOps(graph, [kernel, bias])\n",
    "    emiLSTM.addExtendedAssignOps(graph, W, B)\n",
    "    # Setup the driver. You can run the initializations manually as well\n",
    "    emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)\n",
    "\n",
    "emiDriver.initializeSession(graph)\n",
    "# Run the assignment operations\n",
    "sess = emiDriver.getCurrentSession()\n",
    "sess.run(emiLSTM.assignOps)\n",
    "\n",
    "k = 2\n",
    "predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test,\n",
    "                                                               earlyPolicy_minProb,\n",
    "                                                               minProb=0.99,\n",
    "                                                               keep_prob=1.0)\n",
    "bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k,\n",
    "                                             numClass=NUM_OUTPUT)\n",
    "print('PART IV: Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions ==\n",
    "                                                        BAG_TEST).astype(int))))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}