587 lines (586 with data), 18.2 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SWELL-KW Scores"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adapted from Microsoft's notebooks, available at https://github.com/microsoft/EdgeML authored by Dennis et al."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:51:58.751435Z",
"start_time": "2019-04-30T09:51:57.442626Z"
}
},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"import os\n",
"import sys\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"# To include edgeml in python path\n",
"sys.path.insert(0, '../../')\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
"\n",
"# MI-RNN and EMI-RNN imports\n",
"from edgeml.graph.rnn import EMI_DataPipeline\n",
"from edgeml.graph.rnn import EMI_FastGRNN,EMI_FastRNN\n",
"from edgeml.trainer.emirnnTrainer import EMI_Trainer, EMI_Driver\n",
"import edgeml.utils"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-28T09:37:09.981820Z",
"start_time": "2019-07-28T09:37:09.975515Z"
}
},
"outputs": [],
"source": [
"NUM_HIDDEN = 128\n",
"NUM_TIMESTEPS = 30\n",
"NUM_FEATS = 22\n",
"FORGET_BIAS = 1.0\n",
"NUM_OUTPUT = 3\n",
"USE_DROPOUT = 0\n",
"\n",
"KEEP_PROB = 0.9\n",
"UPDATE_NL = \"quantTanh\"\n",
"GATE_NL = \"quantSigm\"\n",
"WRANK = 5\n",
"URANK = 6\n",
"PREFETCH_NUM = 5\n",
"BATCH_SIZE = 32\n",
"NUM_EPOCHS = 50\n",
"NUM_ITER = 4\n",
"NUM_ROUNDS = 2\n",
"\n",
"# A staging direcory to store models\n",
"MODEL_PREFIX = '/home/sf/data/SWELL-KW/FGModels_30_10/'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:00.022110Z",
"start_time": "2019-04-30T09:51:59.925101Z"
}
},
"outputs": [],
"source": [
"path=\"/home/sf/data/SWELL-KW/30_10/\"\n",
"x_train, y_train = np.load(path + 'x_train.npy'), np.load(path + 'y_train.npy')\n",
"x_test, y_test = np.load(path + 'x_test.npy'), np.load(path + 'y_test.npy')\n",
"x_val, y_val = np.load(path + 'x_val.npy'), np.load(path + 'y_val.npy')\n",
"\n",
"# BAG_TEST, BAG_TRAIN, BAG_VAL represent bag_level labels. These are used for the label update\n",
"# step of EMI/MI RNN\n",
"BAG_TEST = np.argmax(y_test[:, 0, :], axis=1)\n",
"BAG_TRAIN = np.argmax(y_train[:, 0, :], axis=1)\n",
"BAG_VAL = np.argmax(y_val[:, 0, :], axis=1)\n",
"NUM_SUBINSTANCE = x_train.shape[1]\n",
"print(\"x_train shape is:\", x_train.shape)\n",
"print(\"y_train shape is:\", y_train.shape)\n",
"print(\"x_test shape is:\", x_val.shape)\n",
"print(\"y_test shape is:\", y_val.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:00.604049Z",
"start_time": "2019-04-30T09:52:00.589634Z"
}
},
"outputs": [],
"source": [
"# Define the linear secondary classifier\n",
"def createExtendedGraph(self, baseOutput, *args, **kwargs):\n",
" W1 = tf.Variable(np.random.normal(size=[NUM_HIDDEN, NUM_OUTPUT]).astype('float32'), name='W1')\n",
" B1 = tf.Variable(np.random.normal(size=[NUM_OUTPUT]).astype('float32'), name='B1')\n",
" y_cap = tf.add(tf.tensordot(baseOutput, W1, axes=1), B1, name='y_cap_tata')\n",
" self.output = y_cap\n",
" self.graphCreated = True\n",
" \n",
"def addExtendedAssignOps(self, graph, W_val=None, B_val=None):\n",
" W1 = graph.get_tensor_by_name('W1:0')\n",
" B1 = graph.get_tensor_by_name('B1:0')\n",
" W1_op = tf.assign(W1, W_val)\n",
" B1_op = tf.assign(B1, B_val)\n",
" self.assignOps.extend([W1_op, B1_op])\n",
"\n",
"def restoreExtendedGraph(self, graph, *args, **kwargs):\n",
" y_cap = graph.get_tensor_by_name('y_cap_tata:0')\n",
" self.output = y_cap\n",
" self.graphCreated = True\n",
" \n",
"def feedDictFunc(self, keep_prob, **kwargs):\n",
" feedDict = {self._emiGraph.keep_prob: keep_prob}\n",
" return feedDict\n",
" \n",
"EMI_FastGRNN._createExtendedGraph = createExtendedGraph\n",
"EMI_FastGRNN._restoreExtendedGraph = restoreExtendedGraph\n",
"EMI_FastGRNN.addExtendedAssignOps = addExtendedAssignOps\n",
"\n",
"def earlyPolicy_minProb(instanceOut, minProb, **kwargs):\n",
" assert instanceOut.ndim == 2\n",
" classes = np.argmax(instanceOut, axis=1)\n",
" prob = np.max(instanceOut, axis=1)\n",
" index = np.where(prob >= minProb)[0]\n",
" if len(index) == 0:\n",
" assert (len(instanceOut) - 1) == (len(classes) - 1)\n",
" return classes[-1], len(instanceOut) - 1\n",
" index = index[0]\n",
" return classes[index], index\n",
"\n",
"\n",
"if USE_DROPOUT is True:\n",
" EMI_Driver.feedDictFunc = feedDictFunc"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2018-08-19T09:34:06.288012Z",
"start_time": "2018-08-19T09:34:06.285286Z"
}
},
"source": [
"## 1. Initializing a New Computation Graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:10.701762Z",
"start_time": "2019-04-30T09:52:02.074816Z"
}
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT)\n",
"emiLSTM = EMI_FastGRNN(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS, wRank=WRANK, uRank=URANK, \n",
" gate_non_linearity=GATE_NL, update_non_linearity=UPDATE_NL, useDropout=USE_DROPOUT)\n",
"emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')\n",
"\n",
"# Construct the graph\n",
"g1 = tf.Graph() \n",
"with g1.as_default():\n",
" x_batch, y_batch = inputPipeline()\n",
" y_cap = emiLSTM(x_batch)\n",
" emiTrainer(y_cap, y_batch)\n",
" \n",
"with g1.as_default():\n",
" emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-25T06:25:20.220063Z",
"start_time": "2019-04-25T06:25:19.987538Z"
}
},
"outputs": [],
"source": [
"emiDriver.initializeSession(g1)\n",
"#y_updated, modelStats = emiDriver.run(numClasses=NUM_OUTPUT, x_train=x_train,\n",
"# y_train=y_train, bag_train=BAG_TRAIN,\n",
"# x_val=x_val, y_val=y_val, bag_val=BAG_VAL,\n",
"# numIter=NUM_ITER, keep_prob=KEEP_PROB,\n",
"# numRounds=NUM_ROUNDS, batchSize=BATCH_SIZE,\n",
"# numEpochs=NUM_EPOCHS, modelPrefix=MODEL_PREFIX,\n",
"# fracEMI=0.5, updatePolicy='top-k', k=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-08-19T11:48:33.294431Z",
"start_time": "2018-08-19T11:48:32.897376Z"
}
},
"outputs": [],
"source": [
"def earlyPolicy_minProb(instanceOut, minProb, **kwargs):\n",
" assert instanceOut.ndim == 2\n",
" classes = np.argmax(instanceOut, axis=1)\n",
" prob = np.max(instanceOut, axis=1)\n",
" index = np.where(prob >= minProb)[0]\n",
" if len(index) == 0:\n",
" assert (len(instanceOut) - 1) == (len(classes) - 1)\n",
" return classes[-1], len(instanceOut) - 1\n",
" index = index[0]\n",
" return classes[index], index\n",
"\n",
"emiDriver.initializeSession(g1)\n",
"\n",
"k = 2\n",
"predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,\n",
" minProb=0.99, keep_prob=1.0)\n",
"bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)\n",
"print('Accuracy at k = %d: %f' % (k, np.mean((bagPredictions == BAG_TEST).astype(int))))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Loading a Saved Graph into EMI-Driver"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-25T06:24:09.713351Z",
"start_time": "2019-04-25T06:24:09.638610Z"
}
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"emiDriver.initializeSession(g1)\n",
"emiDriver.loadSavedGraphToNewSession(MODEL_PREFIX , 1039)\n",
"k = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-25T06:24:09.713351Z",
"start_time": "2019-04-25T06:24:09.638610Z"
}
},
"outputs": [],
"source": [
"%%time\n",
"predictions, predictionStep = emiDriver.getInstancePredictions(x_test[:64], y_test[:64], earlyPolicy_minProb,\n",
" minProb=0.99, keep_prob=1.0)\n",
"bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-25T06:24:09.713351Z",
"start_time": "2019-04-25T06:24:09.638610Z"
}
},
"outputs": [],
"source": [
"print('Accuracy at k = %d: %f' % (k, np.mean(int(bagPredictions == BAG_TEST))))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Initializing using a Saved Graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-24T12:07:09.616748Z",
"start_time": "2019-04-24T12:07:09.596906Z"
}
},
"outputs": [],
"source": [
"# Making sure the old graph and sessions are closed\n",
"sess = emiDriver.getCurrentSession()\n",
"sess.close()\n",
"tf.reset_default_graph()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:19.568739Z",
"start_time": "2019-04-30T09:52:10.703663Z"
}
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"sess = tf.Session()\n",
"graphManager = edgeml.utils.GraphManager()\n",
"graph = graphManager.loadCheckpoint(sess, MODEL_PREFIX, globalStep=1004)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:20.570380Z",
"start_time": "2019-04-30T09:52:19.571022Z"
}
},
"outputs": [],
"source": [
"inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT, graph=graph)\n",
"emiLSTM = EMI_FastGRNN(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS, wRank=WRANK, uRank=URANK, \n",
" gate_non_linearity=GATE_NL, update_non_linearity=UPDATE_NL, useDropout=USE_DROPOUT)\n",
"emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy', graph=graph)\n",
"\n",
"g1 = graph\n",
"with g1.as_default():\n",
" x_batch, y_batch = inputPipeline()\n",
" y_cap = emiLSTM(x_batch)\n",
" emiTrainer(y_cap, y_batch)\n",
" \n",
"with g1.as_default():\n",
" emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:20.574716Z",
"start_time": "2019-04-30T09:52:20.572193Z"
}
},
"outputs": [],
"source": [
"emiDriver.setSession(sess)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:34.913965Z",
"start_time": "2019-04-30T09:52:32.795936Z"
}
},
"outputs": [],
"source": [
"%%time\n",
"\n",
"# tf.reset_default_graph()\n",
"# emiDriver.initializeSession(g1)\n",
"# emiDriver.loadSavedGraphToNewSession(MODEL_PREFIX, 1007)\n",
"k = 1\n",
"predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,\n",
" minProb=0.99, keep_prob=1.0)\n",
"bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)\n",
"print('Accuracy at k = %d: %f' % (k, np.mean((bagPredictions == BAG_TEST).astype(int))))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-30T09:52:42.334950Z",
"start_time": "2019-04-30T09:52:42.318103Z"
}
},
"outputs": [],
"source": [
"x_test.shape, bagPredictions.shape,y_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Restoring from Numpy Matrices"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-08-19T11:48:44.379901Z",
"start_time": "2018-08-19T11:48:44.326706Z"
}
},
"outputs": [],
"source": [
"graph = tf.get_default_graph()\n",
"W1 = graph.get_tensor_by_name('W1:0')\n",
"B1 = graph.get_tensor_by_name('B1:0')\n",
"allVars = emiLSTM.varList + [W1, B1]\n",
"sess = emiDriver.getCurrentSession()\n",
"allVars = sess.run(allVars)\n",
"\n",
"base = '/tmp/models/'\n",
"np.save(base + 'kernel.npy', allVars[0])\n",
"np.save(base + 'bias.npy', allVars[1])\n",
"np.save(base + 'W1.npy', allVars[2])\n",
"np.save(base + 'B1.npy', allVars[3])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-08-19T11:48:44.389724Z",
"start_time": "2018-08-19T11:48:44.381802Z"
}
},
"outputs": [],
"source": [
"sess = emiDriver.getCurrentSession()\n",
"sess.close()\n",
"tf.reset_default_graph()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-08-19T11:48:44.442241Z",
"start_time": "2018-08-19T11:48:44.391384Z"
}
},
"outputs": [],
"source": [
"base = '/home/iot/Documents/EdgeML-master/tf/examples/EMI-RNN/GRNN model'\n",
"kernel = np.load(base + 'kernel.npy')\n",
"bias = np.load(base + 'bias.npy')\n",
"W = np.load(base + 'W1.npy')\n",
"B = np.load(base + 'B1.npy')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-08-19T11:48:51.378377Z",
"start_time": "2018-08-19T11:48:44.444182Z"
}
},
"outputs": [],
"source": [
"inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS,\n",
" NUM_OUTPUT)\n",
"emiLSTM = EMI_Fast(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS,\n",
" forgetBias=FORGET_BIAS, useDropout=USE_DROPOUT)\n",
"emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')\n",
"\n",
"tf.reset_default_graph()\n",
"graph = tf.Graph()\n",
"\n",
"with graph.as_default():\n",
" x_batch, y_batch = inputPipeline()\n",
" y_cap = emiLSTM(x_batch)\n",
" emiTrainer(y_cap, y_batch)\n",
" # Add the assignment operations\n",
" emiLSTM.addBaseAssignOps(graph, [kernel, bias])\n",
" emiLSTM.addExtendedAssignOps(graph, W, B)\n",
" # Setup the driver. You can run the initializations manually as well\n",
" emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)\n",
"\n",
"emiDriver.initializeSession(graph)\n",
"# Run the assignment operations\n",
"sess = emiDriver.getCurrentSession()\n",
"sess.run(emiLSTM.assignOps)\n",
"\n",
"k = 2\n",
"predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test,\n",
" earlyPolicy_minProb,\n",
" minProb=0.99,\n",
" keep_prob=1.0)\n",
"bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k,\n",
" numClass=NUM_OUTPUT)\n",
"print('PART IV: Accuracy at k = %d: %f' % (k, np.mean((bagPredictions ==\n",
" BAG_TEST).astype(int))))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}