[d30992]: / Preprocess_Deap.ipynb

Download this file

727 lines (726 with data), 26.3 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Valence value regression based on Deap Dataset\n",
    "\n",
    "## 0. This notebook is based on DEAP database\n",
    "\n",
    "Anyone should refer to DEAP team first\n",
    "\n",
    "@article{koelstra2012deap,\n",
    "  title={Deap: A database for emotion analysis; using physiological signals},\n",
    "  author={Koelstra, Sander and Muhl, Christian and Soleymani, Mohammad and Lee, Jong-Seok and Yazdani, Ashkan and Ebrahimi, Touradj and Pun, Thierry and Nijholt, Anton and Patras, Ioannis},\n",
    "  journal={IEEE Transactions on Affective Computing},\n",
    "  volume={3},\n",
    "  number={1},\n",
    "  pages={18--31},\n",
    "  year={2012},\n",
    "  publisher={IEEE}\n",
    "}\n",
    "\n",
    "## 1. Dependency\n",
    "* numpy\n",
    "* pyEEG\n",
    "* sciki-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "#import pyeeg as pe\n",
    "import pickle as pickle\n",
    "import pandas as pd\n",
    "import math\n",
    "\n",
    "from sklearn import svm\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.ensemble import AdaBoostRegressor\n",
    "\n",
    "import os\n",
    "#import tensorflow as tf\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Global Variables setup\n",
    "File Name data\\SXX.dat, XX \\in [0,31]\n",
    "* data: 40 x 40 x 8064: trial x channel x data\n",
    "* label: 40 x 4: video/trial x label (valence, arousal, dominance, liking)\n",
    "\n",
    "Channel Indice: {\n",
    "* 1 : AF3; 2: F3; 3: F7; 4: FC5; 7: T7; 11: P7; 13: O1\n",
    "* 17: AF4; 19: F4; 20: F8; 21: FC6; 25: T8; 29: P8; 31: O2 }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "channel = [1,2,3,4,6,11,13,17,19,20,21,25,29,31] #14 Channels chosen to fit Emotiv Epoch+\n",
    "band = [4,8,12,16,25,45] #5 bands\n",
    "window_size = 256 #Averaging band power of 2 sec\n",
    "step_size = 16 #Each 0.125 sec update once\n",
    "sample_rate = 128 #Sampling rate of 128 Hz\n",
    "subjectList = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32']\n",
    "#List of subjects"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. FFT with pyeeg\n",
    "* [4-8]: theta band\n",
    "* [8-12]: alpha band\n",
    "* [12-16]: low beta band \n",
    "* [16-25]: high beta band\n",
    "* [25-45]: gamma band"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "def FFT_Processing (sub, channel, band, window_size, step_size, sample_rate):\n",
    "    '''\n",
    "    arguments:  string subject\n",
    "                list channel indice\n",
    "                list band\n",
    "                int window size for FFT\n",
    "                int step size for FFT\n",
    "                int sample rate for FFT\n",
    "    return:     void\n",
    "    '''\n",
    "    meta = []\n",
    "    with open('data\\s' + sub + '.dat', 'rb') as file:\n",
    "\n",
    "        subject = pickle.load(file, encoding='latin1') #resolve the python 2 data problem by encoding : latin1\n",
    "\n",
    "        for i in range (0,40):\n",
    "            # loop over 0-39 trails\n",
    "            data = subject[\"data\"][i]\n",
    "            labels = subject[\"labels\"][i]\n",
    "            start = 0;\n",
    "\n",
    "            while start + window_size < data.shape[1]:\n",
    "                meta_array = []\n",
    "                meta_data = [] #meta vector for analysis\n",
    "                for j in channel:\n",
    "                    X = data[j][start : start + window_size] #Slice raw data over 2 sec, at interval of 0.125 sec\n",
    "                    Y = pe.bin_power(X, band, sample_rate) #FFT over 2 sec of channel j, in seq of theta, alpha, low beta, high beta, gamma\n",
    "                    meta_data = meta_data + list(Y[0])\n",
    "\n",
    "                meta_array.append(np.array(meta_data))\n",
    "                meta_array.append(labels)\n",
    "\n",
    "                meta.append(np.array(meta_array))    \n",
    "                start = start + step_size\n",
    "                \n",
    "        meta = np.array(meta)\n",
    "        np.save('out\\s' + sub, meta, allow_pickle=True, fix_imports=True)\n",
    "\n",
    "def testing (M, L, model):\n",
    "    '''\n",
    "    arguments:  M: testing dataset\n",
    "                L: testing dataset label\n",
    "                model: scikit-learn model\n",
    "\n",
    "    return:     void\n",
    "    '''\n",
    "    output = model.predict(M[0:78080:32])\n",
    "    label = L[0:78080:32]\n",
    "\n",
    "    k = 0\n",
    "    l = 0\n",
    "\n",
    "    for i in range(len(label)):\n",
    "        k = k + (output[i] - label[i])*(output[i] - label[i]) #square difference \n",
    "\n",
    "        #a good guess\n",
    "        if (output[i] > 5 and label[i] > 5):\n",
    "            l = l + 1\n",
    "        elif (output[i] < 5 and label[i] <5):\n",
    "            l = l + 1\n",
    "\n",
    "    print (\"l2 error:\", k/len(label), \"classification accuracy:\", l / len(label),l, len(label))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for subjects in subjectList:\n",
    "    FFT_Processing (subjects, channel, band, window_size, step_size, sample_rate)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.Segment of preprocessed data\n",
    "* training dataset: 75 %\n",
    "* validation dataset: 12.5%\n",
    "* testing dataset: 12.5%\n",
    "\n",
    "Agrithom pool:\n",
    "* Support Vector Machine (which kernal?)\n",
    "* Ada-Boost\n",
    "\n",
    "Best practice could be refered to this paper: \n",
    "\n",
    "@article{alarcao2017emotions,\n",
    "  title={Emotions recognition using EEG signals: A survey},\n",
    "  author={Alarcao, Soraia M and Fonseca, Manuel J},\n",
    "  journal={IEEE Transactions on Affective Computing},\n",
    "  year={2017},\n",
    "  publisher={IEEE}\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training dataset: (468480, 70) (468480, 4)\n",
      "testing dataset: (78080, 70) (78080, 4)\n",
      "validation dataset: (78080, 70) (78080, 4)\n"
     ]
    }
   ],
   "source": [
    "#for subjects in subjectList:\n",
    "data_training = []\n",
    "label_training = []\n",
    "data_testing = []\n",
    "label_testing = []\n",
    "data_validation = []\n",
    "label_validation = []\n",
    "\n",
    "for subjects in subjectList:\n",
    "\n",
    "    with open('out\\s' + subjects + '.npy', 'rb') as file:\n",
    "        sub = np.load(file)\n",
    "        for i in range (0,sub.shape[0]):\n",
    "            if i % 8 == 0:\n",
    "                data_testing.append(sub[i][0])\n",
    "                label_testing.append(sub[i][1])\n",
    "            elif i % 8 == 1:\n",
    "                data_validation.append(sub[i][0])\n",
    "                label_validation.append(sub[i][1])\n",
    "            else:\n",
    "                data_training.append(sub[i][0])\n",
    "                label_training.append(sub[i][1])\n",
    "\n",
    "np.save('out\\data_training', np.array(data_training), allow_pickle=True, fix_imports=True)\n",
    "np.save('out\\label_training', np.array(label_training), allow_pickle=True, fix_imports=True)\n",
    "print(\"training dataset:\", np.array(data_training).shape, np.array(label_training).shape)\n",
    "\n",
    "np.save('out\\data_testing', np.array(data_testing), allow_pickle=True, fix_imports=True)\n",
    "np.save('out\\label_testing', np.array(label_testing), allow_pickle=True, fix_imports=True)\n",
    "print(\"testing dataset:\", np.array(data_testing).shape, np.array(label_testing).shape)\n",
    "\n",
    "np.save('out\\data_validation', np.array(data_validation), allow_pickle=True, fix_imports=True)\n",
    "np.save('out\\label_validation', np.array(label_validation), allow_pickle=True, fix_imports=True)\n",
    "print(\"validation dataset:\", np.array(data_validation).shape, np.array(label_validation).shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.Regression\n",
    "### 0. Loading Training and Testing dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('out\\data_training.npy', 'rb') as fileTrain:\n",
    "    X  = np.load(fileTrain)\n",
    "    \n",
    "with open('out\\label_training.npy', 'rb') as fileTrainL:\n",
    "    Y  = np.load(fileTrainL)\n",
    "    \n",
    "X = normalize(X)\n",
    "Z = np.ravel(Y[:, [1]])\n",
    "\n",
    "Arousal_Train = np.ravel(Y[:, [0]])\n",
    "Valence_Train = np.ravel(Y[:, [1]])\n",
    "Domain_Train = np.ravel(Y[:, [2]])\n",
    "Like_Train = np.ravel(Y[:, [3]])\n",
    "\n",
    "\n",
    "\n",
    "with open('out\\data_validation.npy', 'rb') as fileTrain:\n",
    "    M  = np.load(fileTrain)\n",
    "    \n",
    "with open('out\\label_validation.npy', 'rb') as fileTrainL:\n",
    "    N  = np.load(fileTrainL)\n",
    "\n",
    "M = normalize(M)\n",
    "L = np.ravel(N[:, [1]])\n",
    "\n",
    "Arousal_Test = np.ravel(N[:, [0]])\n",
    "Valence_Test = np.ravel(N[:, [1]])\n",
    "Domain_Test = np.ravel(N[:, [2]])\n",
    "Like_Test = np.ravel(N[:, [3]])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "### 1. Support Vector Regression\n",
    "* default setting, l1 error: 1.621761042477756 classification error: 0.6057377049180328 1478 2440"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',\n",
       "  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = svm.SVR()\n",
    "clf.fit(X[0:468480:32], Z[0:468480:32])  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Random Forest Regression\n",
    "* n_estimators = 10, sample rate = 1/32, l1 error: 1.137919672131145 classification accuracy: 0.7774590163934426 1897 2440\n",
    "* n_estimators = 100, sample rate = 1/32, l1 error: 1.1029040163934432 classification accuracy: 0.8147540983606557 1988 2440\n",
    "* n_estimators = 100, min_samples_leaf=10, sample rate = 1/32, l1 error: 1.274458098574928 classification accuracy: 0.7622950819672131 1860 2440\n",
    "* n_estimators = 100, min_samples_leaf=50, sample rate = 1/32, l1 error: 1.4575897309409926 classification accuracy: 0.6823770491803278 1665 2440\n",
    "\n",
    "* n_estimators = 250, sample rate = 1/32, l1 error: 1.0905590819672137 classification accuracy: 0.830327868852459 2026 2440\n",
    "* n_estimators = 750, sample rate = 1/32, l1 error: 1.0953162021857932 classification accuracy: 0.8340163934426229 2035 2440\n",
    "* n_estimators = 750, sample rate = 1/8, l1 error: l1 error: 1.066982950819674 classification accuracy: 0.8217213114754098 2005 2440\n",
    "* __n_estimators = 512, sample rate = 1/32, l1 error: 1.092375304175206 classification accuracy: 0.8364754098360656 2041 2440\n",
    "__\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l2 error: 1.876775658972537 classification accuracy: 0.8290983606557377 2023 2440\n"
     ]
    }
   ],
   "source": [
    "Val_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n",
    "Val_R.fit(X[0:468480:32], Valence_Train[0:468480:32])\n",
    "testing (M, Valence_Test, Val_R)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l2 error: 2.0764509040715233 classification accuracy: 0.8266393442622951 2017 2440\n"
     ]
    }
   ],
   "source": [
    "Aro_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n",
    "Aro_R.fit(X[0:468480:32], Arousal_Train[0:468480:32])\n",
    "testing (M, Arousal_Test, Aro_R)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l2 error: 1.813647083229937 classification accuracy: 0.8184426229508197 1997 2440\n"
     ]
    }
   ],
   "source": [
    "Dom_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n",
    "Dom_R.fit(X[0:468480:32], Domain_Train[0:468480:32])\n",
    "testing (M, Domain_Test, Dom_R)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l2 error: 2.489005384276336 classification accuracy: 0.8512295081967213 2077 2440\n"
     ]
    }
   ],
   "source": [
    "Lik_R = RandomForestRegressor(n_estimators=512, n_jobs=6)\n",
    "Lik_R.fit(X[0:468480:32], Like_Train[0:468480:32])\n",
    "testing (M, Like_Test, Lik_R)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. AdaBoost Regression\n",
    "* n = 50, lr = 1.0: l2 error: 3.8454054839726695 classification accuracy: 0.6147540983606558 1500 2440\n",
    "* n = 50, lr = 1.0, square: l2 error: 4.015289218608164 classification accuracy: 0.5913934426229508 1443 2440\n",
    "* n = 500, lr = 1.0: l2 error: 3.8861651269012594 classification accuracy: 0.6155737704918033 1502 2440\n",
    "*\n",
    "*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AdaBoostRegressor(base_estimator=None, learning_rate=0.01, loss='linear',\n",
       "         n_estimators=5000, random_state=None)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = AdaBoostRegressor(n_estimators=5000, learning_rate=0.01)\n",
    "clf.fit(X[0:468480:32], Z[0:468480:32])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Calculating accuracy and loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l2 error: 1.8832017200301692 classification accuracy: 0.8348360655737705 2037 2440\n"
     ]
    }
   ],
   "source": [
    "output = Val_R.predict(M[0:78080:32])\n",
    "label = L[0:78080:32]\n",
    "\n",
    "k = 0\n",
    "l = 0\n",
    "\n",
    "for i in range(len(label)):\n",
    "    k = k + (output[i] - label[i])*(output[i] - label[i]) #square difference \n",
    "    \n",
    "    #a good guess\n",
    "    if (output[i] > 5 and label[i] > 5):\n",
    "        l = l + 1\n",
    "    elif (output[i] < 5 and label[i] <5):\n",
    "        l = l + 1\n",
    "\n",
    "print (\"l2 error:\", k/len(label), \"classification accuracy:\", l / len(label),l, len(label))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. ANN\n",
    "* 500 epoch 0.005 128 - 256 - 256 - 128 loss = 3.1\n",
    "* 3000 epoch 0.0001 256-512-512-256 Epoch: 3196 - Training Cost: 1.8372873067855835  Testing Cost: 2.231332540512085\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pull out columns for X (data to train with) and Y (value to predict)\n",
    "X_training = X[0:468480:32]\n",
    "Y_training = Z[0:468480:32]\n",
    "\n",
    "# Pull out columns for X (data to train with) and Y (value to predict)\n",
    "X_testing = M[0:78080:32]\n",
    "Y_testing = L[0:78080:32]\n",
    "\n",
    "# DO Scale both the training inputs and outputs\n",
    "X_scaled_training = pd.DataFrame (data = X_training).values\n",
    "Y_scaled_training = pd.DataFrame (data = Y_training).values\n",
    "\n",
    "# It's very important that the training and test data are scaled with the same scaler.\n",
    "X_scaled_testing = pd.DataFrame (data = X_testing).values\n",
    "Y_scaled_testing = pd.DataFrame (data = Y_testing).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Turn off TensorFlow warning messages in program output\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n",
    "\n",
    "# Define model parameters\n",
    "t = time.time()\n",
    "learning_rate = 0.0001\n",
    "training_epochs = 5000\n",
    "display_step = 1\n",
    "\n",
    "# Define how many inputs and outputs are in our neural network\n",
    "number_of_inputs = 70\n",
    "number_of_outputs = 1\n",
    "\n",
    "# Define how many neurons we want in each layer of our neural network\n",
    "layer_1_nodes = 512\n",
    "layer_2_nodes = 1024\n",
    "layer_3_nodes = 1024\n",
    "layer_4_nodes = 512\n",
    "\n",
    "# Section One: Define the layers of the neural network itself\n",
    "RUN_NAME = str(int(round(t * 1000))) + '_' + str(layer_1_nodes) + '_' + str(layer_2_nodes) + '_' + str(layer_3_nodes) + '_' + str(layer_4_nodes) + '_' + str(learning_rate) + '_' + str(training_epochs) + '_' + 'Val'\n",
    "\n",
    "\n",
    "# Input Layer\n",
    "with tf.variable_scope('input'):\n",
    "    X = tf.placeholder(tf.float32, shape=(None, number_of_inputs))\n",
    "\n",
    "# Layer 1\n",
    "with tf.variable_scope('layer_1'):\n",
    "    weights = tf.get_variable(\"weights1\", shape=[number_of_inputs, layer_1_nodes], initializer=tf.contrib.layers.xavier_initializer())\n",
    "    biases = tf.get_variable(name=\"biases1\", shape=[layer_1_nodes], initializer=tf.zeros_initializer())\n",
    "    layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)\n",
    "\n",
    "# Layer 2\n",
    "with tf.variable_scope('layer_2'):\n",
    "    weights = tf.get_variable(\"weights2\", shape=[layer_1_nodes, layer_2_nodes], initializer=tf.contrib.layers.xavier_initializer())\n",
    "    biases = tf.get_variable(name=\"biases2\", shape=[layer_2_nodes], initializer=tf.zeros_initializer())\n",
    "    layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)\n",
    "\n",
    "# Layer 3\n",
    "with tf.variable_scope('layer_3'):\n",
    "    weights = tf.get_variable(\"weights3\", shape=[layer_2_nodes, layer_3_nodes], initializer=tf.contrib.layers.xavier_initializer())\n",
    "    biases = tf.get_variable(name=\"biases3\", shape=[layer_3_nodes], initializer=tf.zeros_initializer())\n",
    "    layer_3_output = tf.nn.relu(tf.matmul(layer_2_output, weights) + biases)\n",
    "\n",
    "# Layer 4\n",
    "with tf.variable_scope('layer_4'):\n",
    "    weights = tf.get_variable(\"weights4\", shape=[layer_3_nodes, layer_4_nodes], initializer=tf.contrib.layers.xavier_initializer())\n",
    "    biases = tf.get_variable(name=\"biases4\", shape=[layer_4_nodes], initializer=tf.zeros_initializer())\n",
    "    layer_4_output = tf.nn.relu(tf.matmul(layer_3_output, weights) + biases)\n",
    "\n",
    "# Output Layer\n",
    "with tf.variable_scope('output'):\n",
    "    weights = tf.get_variable(\"weights5\", shape=[layer_4_nodes, number_of_outputs], initializer=tf.contrib.layers.xavier_initializer())\n",
    "    biases = tf.get_variable(name=\"biases5\", shape=[number_of_outputs], initializer=tf.zeros_initializer())\n",
    "    prediction = tf.matmul(layer_4_output, weights) + biases\n",
    "\n",
    "# Section Two: Define the cost function of the neural network that will be optimized during training\n",
    "\n",
    "with tf.variable_scope('cost'):\n",
    "    Y = tf.placeholder(tf.float32, shape=(None, 1))\n",
    "    cost = tf.reduce_mean(tf.squared_difference(prediction, Y))\n",
    "\n",
    "# Section Three: Define the optimizer function that will be run to optimize the neural network\n",
    "\n",
    "with tf.variable_scope('train'):\n",
    "    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)\n",
    "\n",
    "# Create a summary operation to log the progress of the network\n",
    "with tf.variable_scope('logging'):\n",
    "    tf.summary.scalar('current_cost', cost)\n",
    "    summary = tf.summary.merge_all()\n",
    "\n",
    "saver = tf.train.Saver()\n",
    "\n",
    "# Initialize a session so that we can run TensorFlow operations\n",
    "with tf.Session() as session:\n",
    "\n",
    "    # Run the global variable initializer to initialize all variables and layers of the neural network\n",
    "    session.run(tf.global_variables_initializer())\n",
    "\n",
    "    # Create log file writers to record training progress.\n",
    "    # We'll store training and testing log data separately.\n",
    "    training_writer = tf.summary.FileWriter(\"./{}/logs/training\".format(RUN_NAME), session.graph)\n",
    "    testing_writer = tf.summary.FileWriter(\"./{}/logs/testing\".format(RUN_NAME), session.graph)\n",
    "\n",
    "    # Run the optimizer over and over to train the network.\n",
    "    # One epoch is one full run through the training data set.\n",
    "    for epoch in range(training_epochs):\n",
    "\n",
    "        # Feed in the training data and do one step of neural network training\n",
    "        session.run(optimizer, feed_dict={X: X_scaled_training, Y: Y_scaled_training})\n",
    "\n",
    "        # Every few training steps, log our progress\n",
    "        if epoch % display_step == 0:\n",
    "            # Get the current accuracy scores by running the \"cost\" operation on the training and test data sets\n",
    "            training_cost, training_summary = session.run([cost, summary], feed_dict={X: X_scaled_training, Y:Y_scaled_training})\n",
    "            testing_cost, testing_summary = session.run([cost, summary], feed_dict={X: X_scaled_testing, Y:Y_scaled_testing})\n",
    "\n",
    "            # Write the current training status to the log files (Which we can view with TensorBoard)\n",
    "            training_writer.add_summary(training_summary, epoch)\n",
    "            testing_writer.add_summary(testing_summary, epoch)\n",
    "\n",
    "            # Print the current training status to the screen\n",
    "            print(\"Epoch: {} - Training Cost: {}  Testing Cost: {}\".format(epoch, training_cost, testing_cost))\n",
    "\n",
    "    # Training is now complete!\n",
    "\n",
    "    # Get the final accuracy scores by running the \"cost\" operation on the training and test data sets\n",
    "    final_training_cost = session.run(cost, feed_dict={X: X_scaled_training, Y: Y_scaled_training})\n",
    "    final_testing_cost = session.run(cost, feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})\n",
    "\n",
    "    print(\"Final Training cost: {}\".format(final_training_cost))\n",
    "    print(\"Final Testing cost: {}\".format(final_testing_cost))\n",
    "\n",
    "    save_path = saver.save(session, \"./{}/logs/trained_model.ckpt\".format(RUN_NAME))\n",
    "    print(\"Model saved: {}\".format(save_path))\n",
    "\n",
    "    '''\n",
    "    # Now that the neural network is trained, let's use it to make predictions for our test data.\n",
    "    # Pass in the X testing data and run the \"prediciton\" operation\n",
    "    Y_predicted_scaled = session.run(prediction, feed_dict={X: X_scaled_testing})\n",
    "    # Unscale the data back to it's original units (dollars)\n",
    "    Y_predicted = Y_scaler.inverse_transform(Y_predicted_scaled)\n",
    "    real_earnings = test_data_df['total_earnings'].values[0]\n",
    "    predicted_earnings = Y_predicted[0][0]\n",
    "    print(\"The actual earnings of Game #1 were ${}\".format(real_earnings))\n",
    "    print(\"Our neural network predicted earnings of ${}\".format(predicted_earnings))\n",
    "    \n",
    "'''\n",
    "    model_builder = tf.saved_model.builder.SavedModelBuilder(\"./{}/exported_model\".format(RUN_NAME))\n",
    "\n",
    "    inputs = {\n",
    "        'input': tf.saved_model.utils.build_tensor_info(X)\n",
    "        }\n",
    "    outputs = {\n",
    "        'earnings': tf.saved_model.utils.build_tensor_info(prediction)\n",
    "        }\n",
    "\n",
    "    signature_def = tf.saved_model.signature_def_utils.build_signature_def(\n",
    "        inputs=inputs,\n",
    "        outputs=outputs,\n",
    "        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME\n",
    "    )\n",
    "\n",
    "    model_builder.add_meta_graph_and_variables(\n",
    "        session,\n",
    "        tags=[tf.saved_model.tag_constants.SERVING],\n",
    "        signature_def_map={\n",
    "            tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def\n",
    "        }\n",
    "    )\n",
    "\n",
    "    model_builder.save()\n",
    "    print('model saved')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}