|
a |
|
b/backwardPropagation.py |
|
|
1 |
import pandas |
|
|
2 |
# from pattern.en import sentiment |
|
|
3 |
# import HTMLParser |
|
|
4 |
import re |
|
|
5 |
import pandas as pd |
|
|
6 |
import tensorflow as tf |
|
|
7 |
from collections import Counter |
|
|
8 |
from nltk.corpus import stopwords |
|
|
9 |
import string |
|
|
10 |
from collections import OrderedDict |
|
|
11 |
from nltk import bigrams |
|
|
12 |
from nltk.tokenize import word_tokenize |
|
|
13 |
import matplotlib.pyplot as plt |
|
|
14 |
import numpy as np |
|
|
15 |
# import plotly.plotly as py |
|
|
16 |
|
|
|
17 |
# import pandas as pd |
|
|
18 |
# import matplotlib.pyplot as plt |
|
|
19 |
import numpy as np |
|
|
20 |
from sklearn.metrics import recall_score, precision_score, accuracy_score |
|
|
21 |
import math |
|
|
22 |
from sklearn.feature_extraction.text import CountVectorizer |
|
|
23 |
from sklearn.model_selection import train_test_split |
|
|
24 |
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
25 |
from sklearn.naive_bayes import MultinomialNB |
|
|
26 |
from sklearn.metrics import confusion_matrix |
|
|
27 |
from sklearn.feature_selection import RFE |
|
|
28 |
from tensorflow.python.framework import ops |
|
|
29 |
import requests |
|
|
30 |
from bs4 import BeautifulSoup |
|
|
31 |
# import numpy as np |
|
|
32 |
# import matplotlib.pyplot as plt |
|
|
33 |
# from matplotlib import style |
|
|
34 |
# style.use("ggplot") |
|
|
35 |
import os |
|
|
36 |
|
|
|
37 |
from tf_utils import load_dataset, convert_to_one_hot,create_placeholders,initialize_parameters,forward_propagation,compute_cost,random_mini_batches |
|
|
38 |
|
|
|
39 |
|
|
|
40 |
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001, |
|
|
41 |
num_epochs=2000, minibatch_size=32, print_cost=True): |
|
|
42 |
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables |
|
|
43 |
tf.set_random_seed(1) # to keep consistent results |
|
|
44 |
seed = 3 # to keep consistent results |
|
|
45 |
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set) |
|
|
46 |
n_y = Y_train.shape[0] # n_y : output size |
|
|
47 |
costs = [] # To keep track of the cost |
|
|
48 |
|
|
|
49 |
# Create Placeholders of shape (n_x, n_y) |
|
|
50 |
|
|
|
51 |
X, Y = create_placeholders(n_x, n_y) |
|
|
52 |
|
|
|
53 |
|
|
|
54 |
# Initialize parameters |
|
|
55 |
|
|
|
56 |
parameters = initialize_parameters() |
|
|
57 |
|
|
|
58 |
|
|
|
59 |
# Forward propagation: Build the forward propagation in the tensorflow graph |
|
|
60 |
|
|
|
61 |
Z3 = forward_propagation(X, parameters) |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
# Cost function: Add cost function to tensorflow graph |
|
|
65 |
|
|
|
66 |
cost = compute_cost(Z3, Y) |
|
|
67 |
|
|
|
68 |
|
|
|
69 |
# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer. |
|
|
70 |
|
|
|
71 |
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) |
|
|
72 |
|
|
|
73 |
|
|
|
74 |
# Initialize all the variables |
|
|
75 |
init = tf.global_variables_initializer() |
|
|
76 |
|
|
|
77 |
# Start the session to compute the tensorflow graph |
|
|
78 |
with tf.Session() as sess: |
|
|
79 |
|
|
|
80 |
# Run the initialization |
|
|
81 |
sess.run(init) |
|
|
82 |
|
|
|
83 |
# Do the training loop |
|
|
84 |
for epoch in range(num_epochs): |
|
|
85 |
|
|
|
86 |
epoch_cost = 0 # Defines a cost related to an epoch |
|
|
87 |
num_minibatches = int(m / minibatch_size) |
|
|
88 |
# number of minibatches of size minibatch_size in the train set |
|
|
89 |
|
|
|
90 |
#print("Number of minibatch = " + str(num_minibatches)) |
|
|
91 |
|
|
|
92 |
minibatches = random_mini_batches(X_train, Y_train, minibatch_size) |
|
|
93 |
|
|
|
94 |
for minibatch in minibatches: |
|
|
95 |
# Select a minibatch |
|
|
96 |
(minibatch_X, minibatch_Y) = minibatch |
|
|
97 |
|
|
|
98 |
# IMPORTANT: The line that runs the graph on a minibatch. |
|
|
99 |
# Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y). |
|
|
100 |
### START CODE HERE ### (1 line) |
|
|
101 |
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y}) |
|
|
102 |
#print("MinibatchCOst is " +str(minibatch_cost)) |
|
|
103 |
### END CODE HERE ### |
|
|
104 |
|
|
|
105 |
epoch_cost += minibatch_cost / num_minibatches |
|
|
106 |
|
|
|
107 |
# Print the cost every epoch |
|
|
108 |
if print_cost == True and epoch % 100 == 0: |
|
|
109 |
print("Cost after epoch %i: %f" % (epoch, epoch_cost)) |
|
|
110 |
if print_cost == True and epoch % 5 == 0: |
|
|
111 |
costs.append(epoch_cost) |
|
|
112 |
|
|
|
113 |
# plot the cost |
|
|
114 |
plt.plot(np.squeeze(costs)) |
|
|
115 |
plt.ylabel('cost') |
|
|
116 |
plt.xlabel('iterations (per tens)') |
|
|
117 |
plt.title("Learning rate =" + str(learning_rate)) |
|
|
118 |
plt.show() |
|
|
119 |
|
|
|
120 |
# lets save the parameters in a variable |
|
|
121 |
parameters = sess.run(parameters) |
|
|
122 |
print("Parameters have been trained!") |
|
|
123 |
|
|
|
124 |
# Calculate the correct predictions |
|
|
125 |
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y)) |
|
|
126 |
|
|
|
127 |
# Calculate accuracy on the test set |
|
|
128 |
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) |
|
|
129 |
|
|
|
130 |
print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train})) |
|
|
131 |
print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test})) |
|
|
132 |
|
|
|
133 |
return parameters |
|
|
134 |
|
|
|
135 |
|