[d395cf]: / DREAMER / DREAMER_Valence_FastGRNN_48_16.ipynb

Download this file

840 lines (839 with data), 37.7 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DREAMER Valence FastGRNN 48_16"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Adapted from Microsoft's notebooks, available at https://github.com/microsoft/EdgeML authored by Dennis et al."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tabulate import tabulate\n",
    "import os\n",
    "import datetime as datetime\n",
    "import pickle as pkl\n",
    "from sklearn.model_selection import train_test_split\n",
    "import pathlib\n",
    "from os import mkdir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadData(dirname):\n",
    "    x_train = np.load(dirname + '/' + 'x_train.npy')\n",
    "    y_train = np.load(dirname + '/' + 'y_train.npy')\n",
    "    x_test = np.load(dirname + '/' + 'x_test.npy')\n",
    "    y_test = np.load(dirname + '/' + 'y_test.npy')\n",
    "    x_val = np.load(dirname + '/' + 'x_val.npy')\n",
    "    y_val = np.load(dirname + '/' + 'y_val.npy')\n",
    "    return x_train, y_train, x_test, y_test, x_val, y_val\n",
    "def makeEMIData(subinstanceLen, subinstanceStride, sourceDir, outDir):\n",
    "    x_train, y_train, x_test, y_test, x_val, y_val = loadData(sourceDir)\n",
    "    x, y = bagData(x_train, y_train, subinstanceLen, subinstanceStride)\n",
    "    np.save(outDir + '/x_train.npy', x)\n",
    "    np.save(outDir + '/y_train.npy', y)\n",
    "    print('Num train %d' % len(x))\n",
    "    x, y = bagData(x_test, y_test, subinstanceLen, subinstanceStride)\n",
    "    np.save(outDir + '/x_test.npy', x)\n",
    "    np.save(outDir + '/y_test.npy', y)\n",
    "    print('Num test %d' % len(x))\n",
    "    x, y = bagData(x_val, y_val, subinstanceLen, subinstanceStride)\n",
    "    np.save(outDir + '/x_val.npy', x)\n",
    "    np.save(outDir + '/y_val.npy', y)\n",
    "    print('Num val %d' % len(x))\n",
    "def bagData(X, Y, subinstanceLen, subinstanceStride):\n",
    "    numClass = 5\n",
    "    numSteps = 128\n",
    "    numFeats = 16\n",
    "    assert X.ndim == 3\n",
    "    assert X.shape[1] == numSteps\n",
    "    assert X.shape[2] == numFeats\n",
    "    assert subinstanceLen <= numSteps\n",
    "    assert subinstanceLen > 0\n",
    "    assert subinstanceStride <= numSteps\n",
    "    assert subinstanceStride >= 0\n",
    "    assert len(X) == len(Y)\n",
    "    assert Y.ndim == 2\n",
    "    assert Y.shape[1] == numClass\n",
    "    x_bagged = []\n",
    "    y_bagged = []\n",
    "    for i, point in enumerate(X[:, :, :]):\n",
    "        instanceList = []\n",
    "        start = 0\n",
    "        end = subinstanceLen\n",
    "        while True:\n",
    "            x = point[start:end, :]\n",
    "            if len(x) < subinstanceLen:\n",
    "                x_ = np.zeros([subinstanceLen, x.shape[1]])\n",
    "                x_[:len(x), :] = x[:, :]\n",
    "                x = x_\n",
    "            instanceList.append(x)\n",
    "            if end >= numSteps:\n",
    "                break\n",
    "            start += subinstanceStride\n",
    "            end += subinstanceStride\n",
    "        bag = np.array(instanceList)\n",
    "        numSubinstance = bag.shape[0]\n",
    "        label = Y[i]\n",
    "        label = np.argmax(label)\n",
    "        labelBag = np.zeros([numSubinstance, numClass])\n",
    "        labelBag[:, label] = 1\n",
    "        x_bagged.append(bag)\n",
    "        label = np.array(labelBag)\n",
    "        y_bagged.append(label)\n",
    "    return np.array(x_bagged), np.array(y_bagged)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# mkdir('/home/sf/data/DREAMER/Valence/Fast_GRNN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num train 61735\n",
      "Num test 17149\n",
      "Num val 6860\n"
     ]
    }
   ],
   "source": [
    "subinstanceLen=48\n",
    "subinstanceStride=16\n",
    "extractedDir = '/home/sf/data/DREAMER/Valence/'\n",
    "# mkdir('/home/sf/data/DREAMER/Valence/Fast_GRNN/48_16')\n",
    "rawDir = extractedDir + '/RAW'\n",
    "sourceDir = rawDir\n",
    "outDir = extractedDir + 'Fast_GRNN' '/%d_%d/' % (subinstanceLen, subinstanceStride)\n",
    "makeEMIData(subinstanceLen, subinstanceStride, sourceDir, outDir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:39:06.272261Z",
     "start_time": "2018-08-19T12:39:05.330668Z"
    }
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "import os\n",
    "import sys\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] ='1'\n",
    "\n",
    "# FastGRNN and FastRNN imports\n",
    "from edgeml.graph.rnn import EMI_DataPipeline\n",
    "from edgeml.graph.rnn import EMI_FastGRNN\n",
    "from edgeml.graph.rnn import EMI_FastRNN\n",
    "from edgeml.trainer.emirnnTrainer import EMI_Trainer, EMI_Driver\n",
    "import edgeml.utils"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let us set up some network parameters for the computation graph."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:39:06.292205Z",
     "start_time": "2018-08-19T12:39:06.274254Z"
    }
   },
   "outputs": [],
   "source": [
    "# Network parameters for our FastGRNN + FC Layer\n",
    "NUM_HIDDEN = 128\n",
    "NUM_TIMESTEPS = 48\n",
    "NUM_FEATS = 16\n",
    "FORGET_BIAS = 1.0\n",
    "NUM_OUTPUT = 5\n",
    "USE_DROPOUT = False\n",
    "KEEP_PROB = 0.9\n",
    "\n",
    "# Non-linearities can be chosen among \"tanh, sigmoid, relu, quantTanh, quantSigm\"\n",
    "UPDATE_NL = \"quantTanh\"\n",
    "GATE_NL = \"quantSigm\"\n",
    "\n",
    "# Ranks of Parameter matrices for low-rank parameterisation to compress models.\n",
    "WRANK = 5\n",
    "URANK = 6\n",
    "\n",
    "# For dataset API\n",
    "PREFETCH_NUM = 5\n",
    "BATCH_SIZE = 32\n",
    "\n",
    "# Number of epochs in *one iteration*\n",
    "NUM_EPOCHS = 3\n",
    "\n",
    "# Number of iterations in *one round*. After each iteration,\n",
    "# the model is dumped to disk. At the end of the current\n",
    "# round, the best model among all the dumped models in the\n",
    "# current round is picked up..\n",
    "NUM_ITER = 4\n",
    "\n",
    "# A round consists of multiple training iterations and a belief\n",
    "# update step using the best model from all of these iterations\n",
    "NUM_ROUNDS = 10\n",
    "\n",
    "# A staging direcory to store models\n",
    "MODEL_PREFIX = '/home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loading Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:39:06.410372Z",
     "start_time": "2018-08-19T12:39:06.294014Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x_train shape is: (61735, 6, 48, 16)\n",
      "y_train shape is: (61735, 6, 5)\n",
      "x_test shape is: (6860, 6, 48, 16)\n",
      "y_test shape is: (6860, 6, 5)\n"
     ]
    }
   ],
   "source": [
    "# Loading the data\n",
    "path='/home/sf/data/DREAMER/Valence/Fast_GRNN/48_16/'\n",
    "x_train, y_train = np.load(path + 'x_train.npy'), np.load(path + 'y_train.npy')\n",
    "x_test, y_test = np.load(path + 'x_test.npy'), np.load(path + 'y_test.npy')\n",
    "x_val, y_val = np.load(path + 'x_val.npy'), np.load(path + 'y_val.npy')\n",
    "\n",
    "# BAG_TEST, BAG_TRAIN, BAG_VAL represent bag_level labels. These are used for the label update\n",
    "# step of EMI/MI RNN\n",
    "BAG_TEST = np.argmax(y_test[:, 0, :], axis=1)\n",
    "BAG_TRAIN = np.argmax(y_train[:, 0, :], axis=1)\n",
    "BAG_VAL = np.argmax(y_val[:, 0, :], axis=1)\n",
    "NUM_SUBINSTANCE = x_train.shape[1]\n",
    "print(\"x_train shape is:\", x_train.shape)\n",
    "print(\"y_train shape is:\", y_train.shape)\n",
    "print(\"x_test shape is:\", x_val.shape)\n",
    "print(\"y_test shape is:\", y_val.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Computation Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:39:06.653612Z",
     "start_time": "2018-08-19T12:39:06.412290Z"
    }
   },
   "outputs": [],
   "source": [
    "# Define the linear secondary classifier\n",
    "def createExtendedGraph(self, baseOutput, *args, **kwargs):\n",
    "    W1 = tf.Variable(np.random.normal(size=[NUM_HIDDEN, NUM_OUTPUT]).astype('float32'), name='W1')\n",
    "    B1 = tf.Variable(np.random.normal(size=[NUM_OUTPUT]).astype('float32'), name='B1')\n",
    "    y_cap = tf.add(tf.tensordot(baseOutput, W1, axes=1), B1, name='y_cap_tata')\n",
    "    self.output = y_cap\n",
    "    self.graphCreated = True\n",
    "\n",
    "def restoreExtendedGraph(self, graph, *args, **kwargs):\n",
    "    y_cap = graph.get_tensor_by_name('y_cap_tata:0')\n",
    "    self.output = y_cap\n",
    "    self.graphCreated = True\n",
    "    \n",
    "def feedDictFunc(self, keep_prob=None, inference=False, **kwargs):\n",
    "    if inference is False:\n",
    "        feedDict = {self._emiGraph.keep_prob: keep_prob}\n",
    "    else:\n",
    "        feedDict = {self._emiGraph.keep_prob: 1.0}\n",
    "    return feedDict\n",
    "\n",
    "    \n",
    "EMI_FastGRNN._createExtendedGraph = createExtendedGraph\n",
    "EMI_FastGRNN._restoreExtendedGraph = restoreExtendedGraph\n",
    "if USE_DROPOUT is True:\n",
    "    EMI_FastGRNN.feedDictFunc = feedDictFunc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:39:06.701740Z",
     "start_time": "2018-08-19T12:39:06.655328Z"
    }
   },
   "outputs": [],
   "source": [
    "inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT)\n",
    "emiFastGRNN = EMI_FastGRNN(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS, wRank=WRANK, uRank=URANK, \n",
    "                           gate_non_linearity=GATE_NL, update_non_linearity=UPDATE_NL, useDropout=USE_DROPOUT)\n",
    "emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x_train shape is: (61735, 6, 48, 16)\n",
      "y_train shape is: (61735, 6, 5)\n",
      "x_test shape is: (6860, 6, 48, 16)\n",
      "y_test shape is: (6860, 6, 5)\n"
     ]
    }
   ],
   "source": [
    "print(\"x_train shape is:\", x_train.shape)\n",
    "print(\"y_train shape is:\", y_train.shape)\n",
    "print(\"x_test shape is:\", x_val.shape)\n",
    "print(\"y_test shape is:\", y_val.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:39:14.187456Z",
     "start_time": "2018-08-19T12:39:06.703481Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "g1 = tf.Graph()    \n",
    "with g1.as_default():\n",
    "    # Obtain the iterators to each batch of the data\n",
    "    x_batch, y_batch = inputPipeline()\n",
    "    # Create the forward computation graph based on the iterators\n",
    "    y_cap = emiFastGRNN(x_batch)\n",
    "    # Create loss graphs and training routines\n",
    "    emiTrainer(y_cap, y_batch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# EMI Driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:51:45.803360Z",
     "start_time": "2018-08-19T12:39:14.189648Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Update policy: top-k\n",
      "Training with MI-RNN loss for 5 rounds\n",
      "Round: 0\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.03056 Acc 0.33854 | Val acc 0.31589 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1000\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.03063 Acc 0.32292 | Val acc 0.34810 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1001\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.03102 Acc 0.31771 | Val acc 0.36341 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1002\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.03103 Acc 0.36458 | Val acc 0.37187 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1003\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1003\n",
      "Round: 1\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.03091 Acc 0.39062 | Val acc 0.38338 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1004\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.03020 Acc 0.40104 | Val acc 0.39111 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1005\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02976 Acc 0.39062 | Val acc 0.39767 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1006\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02913 Acc 0.43229 | Val acc 0.40087 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1007\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1007\n",
      "Round: 2\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02894 Acc 0.45833 | Val acc 0.40991 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1008\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02868 Acc 0.47917 | Val acc 0.41385 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1009\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02856 Acc 0.47917 | Val acc 0.41618 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1010\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02889 Acc 0.40625 | Val acc 0.42143 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1011\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1011\n",
      "Round: 3\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02842 Acc 0.40104 | Val acc 0.41983 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1012\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02767 Acc 0.40625 | Val acc 0.42974 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1013\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02758 Acc 0.43229 | Val acc 0.42886 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1014\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02829 Acc 0.36979 | Val acc 0.42551 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1015\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1013\n",
      "Round: 4\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02758 Acc 0.43229 | Val acc 0.42886 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1016\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02829 Acc 0.36979 | Val acc 0.42551 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1017\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02760 Acc 0.40104 | Val acc 0.43149 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1018\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 0.02749 Acc 0.40625 | Val acc 0.43309 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1019\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1019\n",
      "Round: 5\n",
      "Switching to EMI-Loss function\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.38360 Acc 0.45833 | Val acc 0.42493 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1020\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.36713 Acc 0.43750 | Val acc 0.42566 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1021\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.37216 Acc 0.45312 | Val acc 0.43513 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1022\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.37237 Acc 0.43750 | Val acc 0.43397 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1023\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1022\n",
      "Round: 6\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.37237 Acc 0.43750 | Val acc 0.43397 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1024\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.38747 Acc 0.41146 | Val acc 0.43980 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1025\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.38066 Acc 0.42708 | Val acc 0.44227 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1026\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.39000 Acc 0.39583 | Val acc 0.36924 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1027\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1026\n",
      "Round: 7\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.39000 Acc 0.39583 | Val acc 0.36924 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1028\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.34749 Acc 0.43750 | Val acc 0.42522 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1029\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.38835 Acc 0.39583 | Val acc 0.44621 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1030\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.53749 Acc 0.30729 | Val acc 0.37230 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1031\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1030\n",
      "Round: 8\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.53749 Acc 0.30729 | Val acc 0.37230 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1032\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.45764 Acc 0.44792 | Val acc 0.38280 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1033\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.35094 Acc 0.46354 | Val acc 0.42464 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1034\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.44224 Acc 0.38021 | Val acc 0.40146 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1035\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1034\n",
      "Round: 9\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.44224 Acc 0.38021 | Val acc 0.40146 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1036\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.34025 Acc 0.52083 | Val acc 0.43280 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1037\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.36460 Acc 0.43229 | Val acc 0.41735 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1038\n",
      "Epoch   2 Batch  1915 ( 5775) Loss 1.34492 Acc 0.45833 | Val acc 0.43557 | Model saved to /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn, global_step 1039\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1039\n"
     ]
    }
   ],
   "source": [
    "with g1.as_default():\n",
    "    emiDriver = EMI_Driver(inputPipeline, emiFastGRNN, emiTrainer)\n",
    "\n",
    "emiDriver.initializeSession(g1)\n",
    "y_updated, modelStats = emiDriver.run(numClasses=NUM_OUTPUT, x_train=x_train,\n",
    "                                      y_train=y_train, bag_train=BAG_TRAIN,\n",
    "                                      x_val=x_val, y_val=y_val, bag_val=BAG_VAL,\n",
    "                                      numIter=NUM_ITER, keep_prob=KEEP_PROB,\n",
    "                                      numRounds=NUM_ROUNDS, batchSize=BATCH_SIZE,\n",
    "                                      numEpochs=NUM_EPOCHS, modelPrefix=MODEL_PREFIX,\n",
    "                                      fracEMI=0.5, updatePolicy='top-k', k=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluating the  trained model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:51:45.832728Z",
     "start_time": "2018-08-19T12:51:45.805984Z"
    }
   },
   "outputs": [],
   "source": [
    "# Early Prediction Policy: We make an early prediction based on the predicted classes\n",
    "#     probability. If the predicted class probability > minProb at some step, we make\n",
    "#     a prediction at that step.\n",
    "def earlyPolicy_minProb(instanceOut, minProb, **kwargs):\n",
    "    assert instanceOut.ndim == 2\n",
    "    classes = np.argmax(instanceOut, axis=1)\n",
    "    prob = np.max(instanceOut, axis=1)\n",
    "    index = np.where(prob >= minProb)[0]\n",
    "    if len(index) == 0:\n",
    "        assert (len(instanceOut) - 1) == (len(classes) - 1)\n",
    "        return classes[-1], len(instanceOut) - 1\n",
    "    index = index[0]\n",
    "    return classes[index], index\n",
    "\n",
    "def getEarlySaving(predictionStep, numTimeSteps, returnTotal=False):\n",
    "    predictionStep = predictionStep + 1\n",
    "    predictionStep = np.reshape(predictionStep, -1)\n",
    "    totalSteps = np.sum(predictionStep)\n",
    "    maxSteps = len(predictionStep) * numTimeSteps\n",
    "    savings = 1.0 - (totalSteps / maxSteps)\n",
    "    if returnTotal:\n",
    "        return savings, totalSteps\n",
    "    return savings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:51:46.210240Z",
     "start_time": "2018-08-19T12:51:45.834534Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Accuracy (k = 2): 0.446207\n",
      "\n",
      "Total Savings: 0.631420\n"
     ]
    }
   ],
   "source": [
    "k = 2\n",
    "ORIGINAL_NUM_TIMESTEPS = 128\n",
    "predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb, minProb=0.99)\n",
    "bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)\n",
    "print('Test Accuracy (k = %d): %f\\n' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))))\n",
    "mi_savings = (1 - NUM_TIMESTEPS / ORIGINAL_NUM_TIMESTEPS)\n",
    "emi_savings = getEarlySaving(predictionStep, NUM_TIMESTEPS)\n",
    "total_savings = mi_savings + (1 - mi_savings) * emi_savings\n",
    "print(\"Total Savings: %f\" % total_savings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T12:51:46.677691Z",
     "start_time": "2018-08-19T12:51:46.212285Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   len       acc  macro-fsc  macro-pre  macro-rec  micro-fsc  micro-pre  \\\n",
      "0    1  0.431629   0.421293   0.472705   0.422115   0.431629   0.431629   \n",
      "1    2  0.446207   0.439769   0.454054   0.436935   0.446207   0.446207   \n",
      "2    3  0.425914   0.418580   0.468471   0.418384   0.425914   0.425914   \n",
      "3    4  0.389469   0.378138   0.510431   0.383665   0.389469   0.389469   \n",
      "4    5  0.360954   0.343713   0.555920   0.356331   0.360954   0.360954   \n",
      "5    6  0.340136   0.315597   0.597815   0.336736   0.340136   0.340136   \n",
      "\n",
      "   micro-rec  \n",
      "0   0.431629  \n",
      "1   0.446207  \n",
      "2   0.425914  \n",
      "3   0.389469  \n",
      "4   0.360954  \n",
      "5   0.340136  \n",
      "Max accuracy 0.446207 at subsequencelength 2\n",
      "Max micro-f 0.446207 at subsequencelength 2\n",
      "Micro-precision 0.446207 at subsequencelength 2\n",
      "Micro-recall 0.446207 at subsequencelength 2\n",
      "Max macro-f 0.439769 at subsequencelength 2\n",
      "macro-precision 0.454054 at subsequencelength 2\n",
      "macro-recall 0.436935 at subsequencelength 2\n"
     ]
    }
   ],
   "source": [
    "# A slightly more detailed analysis method is provided. \n",
    "df = emiDriver.analyseModel(predictions, BAG_TEST, NUM_SUBINSTANCE, NUM_OUTPUT)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Picking the best model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-08-19T13:06:04.024660Z",
     "start_time": "2018-08-19T13:04:47.045787Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1003\n",
      "Round:  0, Validation accuracy: 0.3528, Test Accuracy (k = 2): 0.360079, Additional savings: 0.001984\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1007\n",
      "Round:  1, Validation accuracy: 0.3821, Test Accuracy (k = 2): 0.393084, Additional savings: 0.006543\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1011\n",
      "Round:  2, Validation accuracy: 0.4032, Test Accuracy (k = 2): 0.411686, Additional savings: 0.010094\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1015\n",
      "Round:  3, Validation accuracy: 0.4111, Test Accuracy (k = 2): 0.421774, Additional savings: 0.011727\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1017\n",
      "Round:  4, Validation accuracy: 0.4157, Test Accuracy (k = 2): 0.428363, Additional savings: 0.012503\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1023\n",
      "Round:  5, Validation accuracy: 0.4121, Test Accuracy (k = 2): 0.423348, Additional savings: 0.015345\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1027\n",
      "Round:  6, Validation accuracy: 0.4216, Test Accuracy (k = 2): 0.432912, Additional savings: 0.015224\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1030\n",
      "Round:  7, Validation accuracy: 0.4289, Test Accuracy (k = 2): 0.437110, Additional savings: 0.015745\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1035\n",
      "Round:  8, Validation accuracy: 0.4363, Test Accuracy (k = 2): 0.441892, Additional savings: 0.016721\n",
      "INFO:tensorflow:Restoring parameters from /home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn-1039\n",
      "Round:  9, Validation accuracy: 0.4367, Test Accuracy (k = 2): 0.446207, Additional savings: 0.017119\n"
     ]
    }
   ],
   "source": [
    "devnull = open(os.devnull, 'r')\n",
    "for val in modelStats:\n",
    "    round_, acc, modelPrefix, globalStep = val\n",
    "    emiDriver.loadSavedGraphToNewSession(modelPrefix, globalStep, redirFile=devnull)\n",
    "    predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,\n",
    "                                                               minProb=0.99, keep_prob=1.0)\n",
    " \n",
    "    bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)\n",
    "    print(\"Round: %2d, Validation accuracy: %.4f\" % (round_, acc), end='')\n",
    "    print(', Test Accuracy (k = %d): %f, ' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))), end='')\n",
    "    print('Additional savings: %f' % getEarlySaving(predictionStep, NUM_TIMESTEPS)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Network parameters for our FastGRNN + FC Layer\n",
    "NUM_HIDDEN = 128\n",
    "NUM_TIMESTEPS = 48\n",
    "NUM_FEATS = 16\n",
    "FORGET_BIAS = 1.0\n",
    "NUM_OUTPUT = 5\n",
    "USE_DROPOUT = False\n",
    "KEEP_PROB = 0.9\n",
    "\n",
    "# Non-linearities can be chosen among \"tanh, sigmoid, relu, quantTanh, quantSigm\"\n",
    "UPDATE_NL = \"quantTanh\"\n",
    "GATE_NL = \"quantSigm\"\n",
    "\n",
    "# Ranks of Parameter matrices for low-rank parameterisation to compress models.\n",
    "WRANK = 5\n",
    "URANK = 6\n",
    "\n",
    "# For dataset API\n",
    "PREFETCH_NUM = 5\n",
    "BATCH_SIZE = 32\n",
    "\n",
    "# Number of epochs in *one iteration*\n",
    "NUM_EPOCHS = 2\n",
    "\n",
    "# Number of iterations in *one round*. After each iteration,\n",
    "# the model is dumped to disk. At the end of the current\n",
    "# round, the best model among all the dumped models in the\n",
    "# current round is picked up..\n",
    "NUM_ITER = 4\n",
    "\n",
    "# A round consists of multiple training iterations and a belief\n",
    "# update step using the best model from all of these iterations\n",
    "NUM_ROUNDS = 10\n",
    "\n",
    "# A staging direcory to store models\n",
    "MODEL_PREFIX = '/home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "params = {\n",
    "    \"NUM_HIDDEN\" : 128,\n",
    "    \"NUM_TIMESTEPS\" : 48, #subinstance length.\n",
    "    \"ORIGINAL_NUM_TIMESTEPS\" : 128,\n",
    "    \"NUM_FEATS\" : 16,\n",
    "    \"FORGET_BIAS\" : 1.0,\n",
    "    \"UPDATE_NL\" : \"quantTanh\",\n",
    "    \"GATE_NL\" : \"quantSigm\",\n",
    "    \"NUM_OUTPUT\" : 5,\n",
    "    \"WRANK\" : 5,\n",
    "    \"URANK\" : 6,\n",
    "    \"USE_DROPOUT\" : 0, # '1' -> True. '0' -> False\n",
    "    \"KEEP_PROB\" : 0.9,\n",
    "    \"PREFETCH_NUM\" : 5,\n",
    "    \"BATCH_SIZE\" : 32,\n",
    "    \"NUM_EPOCHS\" : 2,\n",
    "    \"NUM_ITER\" : 4,\n",
    "    \"NUM_ROUNDS\" : 10,\n",
    "    \"MODEL_PREFIX\" : '/home/sf/data/DREAMER/Valence/48_16/models/Fast-GRNN/model-fgrnn'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   len       acc  macro-fsc  macro-pre  macro-rec  micro-fsc  micro-pre  \\\n",
      "0    1  0.431454   0.422877   0.456502   0.421756   0.431454   0.431454   \n",
      "1    2  0.442533   0.435352   0.447853   0.432985   0.442533   0.442533   \n",
      "2    3  0.422299   0.411657   0.464350   0.413693   0.422299   0.422299   \n",
      "3    4  0.382530   0.365657   0.501018   0.375431   0.382530   0.382530   \n",
      "4    5  0.353782   0.328291   0.544124   0.348052   0.353782   0.353782   \n",
      "5    6  0.332672   0.298837   0.584957   0.327965   0.332672   0.332672   \n",
      "\n",
      "   micro-rec  \n",
      "0   0.431454  \n",
      "1   0.442533  \n",
      "2   0.422299  \n",
      "3   0.382530  \n",
      "4   0.353782  \n",
      "5   0.332672  \n",
      "Max accuracy 0.442533 at subsequencelength 2\n",
      "Max micro-f 0.442533 at subsequencelength 2\n",
      "Micro-precision 0.442533 at subsequencelength 2\n",
      "Micro-recall 0.442533 at subsequencelength 2\n",
      "Max macro-f 0.435352 at subsequencelength 2\n",
      "macro-precision 0.447853 at subsequencelength 2\n",
      "macro-recall 0.432985 at subsequencelength 2\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n",
      "|    |   len |      acc |   macro-fsc |   macro-pre |   macro-rec |   micro-fsc |   micro-pre |   micro-rec |\n",
      "+====+=======+==========+=============+=============+=============+=============+=============+=============+\n",
      "|  0 |     1 | 0.431454 |    0.422877 |    0.456502 |    0.421756 |    0.431454 |    0.431454 |    0.431454 |\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n",
      "|  1 |     2 | 0.442533 |    0.435352 |    0.447853 |    0.432985 |    0.442533 |    0.442533 |    0.442533 |\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n",
      "|  2 |     3 | 0.422299 |    0.411657 |    0.46435  |    0.413693 |    0.422299 |    0.422299 |    0.422299 |\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n",
      "|  3 |     4 | 0.38253  |    0.365657 |    0.501018 |    0.375431 |    0.38253  |    0.38253  |    0.38253  |\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n",
      "|  4 |     5 | 0.353782 |    0.328291 |    0.544124 |    0.348052 |    0.353782 |    0.353782 |    0.353782 |\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n",
      "|  5 |     6 | 0.332672 |    0.298837 |    0.584957 |    0.327965 |    0.332672 |    0.332672 |    0.332672 |\n",
      "+----+-------+----------+-------------+-------------+-------------+-------------+-------------+-------------+\n"
     ]
    }
   ],
   "source": [
    "fgrnn_dict = {**params}\n",
    "fgrnn_dict[\"k\"] = k\n",
    "fgrnn_dict[\"accuracy\"] = np.mean((bagPredictions == BAG_TEST).astype(int))\n",
    "fgrnn_dict[\"total_savings\"] = getEarlySaving(predictionStep, NUM_TIMESTEPS)\n",
    "fgrnn_dict[\"y_test\"] = BAG_TEST\n",
    "fgrnn_dict[\"y_pred\"] = bagPredictions\n",
    "\n",
    "# A slightly more detailed analysis method is provided. \n",
    "df = emiDriver.analyseModel(predictions, BAG_TEST, NUM_SUBINSTANCE, NUM_OUTPUT)\n",
    "print (tabulate(df, headers=list(df.columns), tablefmt='grid'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results for this run have been saved at /home/sf/data/DREAMER/Valence/Fast_GRNN/ .\n"
     ]
    }
   ],
   "source": [
    "dirname = \"/home/sf/data/DREAMER/Valence/Fast_GRNN/\"\n",
    "pathlib.Path(dirname).mkdir(parents=True, exist_ok=True)\n",
    "print (\"Results for this run have been saved at\" , dirname, \".\")\n",
    "\n",
    "now = datetime.datetime.now()\n",
    "filename = list((str(now.year),\"-\",str(now.month),\"-\",str(now.day),\"|\",str(now.hour),\"-\",str(now.minute)))\n",
    "filename = ''.join(filename)\n",
    "\n",
    "#Save the dictionary containing the params and the results.\n",
    "pkl.dump(fgrnn_dict,open(dirname  + filename + \".pkl\",mode='wb'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}