Switch to unified view

a b/Projects/NCS1/Evaluation.py
1
############################################################################################
2
#
3
# Project:       Peter Moss Acute Myeloid & Lymphoblastic Leukemia AI Research Project
4
# Repository:    ALL Detection System 2019
5
# Project:       Facial Authentication Server
6
#
7
# Author:        Adam Milton-Barker (AdamMiltonBarker.com)
8
# Contributors:
9
# Title:         Evaluation Class
10
# Description:   Evaluation class for the ALL Detection System 2019 NCS1 Classifier.
11
# License:       MIT License
12
# Last Modified: 2020-07-16
13
#
14
############################################################################################
15
16
import cv2, json, matplotlib, os, sys, time
17
18
import Classes.inception_preprocessing
19
20
import numpy as np
21
import tensorflow as tf
22
import matplotlib.pyplot as plt
23
import tkinter as tk
24
import pylab as pl
25
26
from tensorflow.python.framework import graph_util
27
from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step
28
from tensorflow.python.platform import tf_logging as logging
29
30
from Classes.Helpers import Helpers
31
from Classes.Data import Data
32
from Classes.inception_v3 import inception_v3, inception_v3_arg_scope
33
34
matplotlib.use("Agg")
35
plt.style.use('ggplot')
36
slim = tf.contrib.slim
37
38
# config = tf.ConfigProto(intra_op_parallelism_threads=12, inter_op_parallelism_threads=2,
39
#                        allow_soft_placement=True,  device_count={'CPU': 12})
40
41
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
42
#os.environ["OMP_NUM_THREADS"] = "12"
43
#os.environ["KMP_BLOCKTIME"] = "30"
44
#os.environ["KMP_SETTINGS"] = "1"
45
#os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0"
46
47
class Evaluation():
48
    """ Evaluation Class
49
50
    Evaluates the ALL Detection System 2019 NCS1 Classifier.
51
    """
52
53
    def __init__(self):
54
        """ Initializes the Evaluation Class """
55
56
        self.Helpers = Helpers("Evaluator")
57
        self.confs = self.Helpers.confs
58
59
        self.labelsToName = {}
60
61
        self.checkpoint_file = tf.train.latest_checkpoint(
62
            self.confs["Classifier"]["LogDir"])
63
64
        # Open the labels file
65
        self.labels = open(
66
            self.confs["Classifier"]["DatasetDir"] + "/" + self.confs["Classifier"]["Labels"], 'r')
67
68
        # Create a dictionary to refer each label to their string name
69
        for line in self.labels:
70
            label, string_name = line.split(':')
71
            string_name = string_name[:-1]  # Remove newline
72
            self.labelsToName[int(label)] = string_name
73
74
        # Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
75
        self.items_to_descriptions = {
76
            'image': 'A 3-channel RGB coloured  image that is ex: office, people',
77
            'label': 'A label that ,start from zero'
78
        }
79
80
        self.Helpers.logger.info(
81
            "Evaluator class initialization complete.")
82
83
    # ============== DATASET LOADING ======================
84
    # We now create a function that creates a Dataset class which will give us many TFRecord files to feed in the examples into a queue in parallel.
85
    def getSplit(self, split_name):
86
        '''
87
            Obtains the split - training or validation - to create a Dataset class for feeding the examples into a queue later on. This function will
88
            set up the decoder and dataset information all into one Dataset class so that you can avoid the brute work later on.
89
            Your file_pattern is very important in locating the files later.
90
91
            INPUTS:
92
                - split_name(str): 'train' or 'validation'. Used to get the correct data split of tfrecord files
93
94
            OUTPUTS:
95
                - dataset (Dataset): A Dataset class object where we can read its various components for easier batch creation later.
96
        '''
97
98
        # First check whether the split_name is train or validation
99
        if split_name not in ['train', 'validation']:
100
101
            raise ValueError(
102
                'The split_name %s is not recognized. Please input either train or validation as the split_name' % (split_name))
103
104
        # Create the full path for a general file_pattern to locate the tfrecord_files
105
        file_pattern_path = os.path.join(
106
            self.confs["Classifier"]["DatasetDir"], self.confs["Classifier"]["TFRecordPattern"] % (split_name))
107
108
        # Count the total number of examples in all of these shard
109
        num_samples = 0
110
        file_pattern_for_counting = 'ALL_' + split_name
111
        tfrecords_to_count = [os.path.join(self.confs["Classifier"]["DatasetDir"], file) for file in os.listdir(
112
            self.confs["Classifier"]["DatasetDir"]) if file.startswith(file_pattern_for_counting)]
113
114
        # print(tfrecords_to_count)
115
        for tfrecord_file in tfrecords_to_count:
116
117
            for record in tf.python_io.tf_record_iterator(tfrecord_file):
118
119
                num_samples += 1
120
121
        # Create a reader, which must be a TFRecord reader in this case
122
        reader = tf.TFRecordReader
123
124
        # Create the keys_to_features dictionary for the decoder
125
        keys_to_features = {
126
            'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
127
            'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
128
            'image/class/label': tf.FixedLenFeature(
129
                [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
130
        }
131
132
        # Create the items_to_handlers dictionary for the decoder.
133
        items_to_handlers = {
134
            'image': slim.tfexample_decoder.Image(),
135
            'label': slim.tfexample_decoder.Tensor('image/class/label'),
136
        }
137
138
        # Start to create the decoder
139
        decoder = slim.tfexample_decoder.TFExampleDecoder(
140
            keys_to_features, items_to_handlers)
141
142
        # Create the labels_to_name file
143
        labels_to_name_dict = self.labelsToName
144
145
        # Actually create the dataset
146
        dataset = slim.dataset.Dataset(
147
            data_sources=file_pattern_path,
148
            decoder=decoder,
149
            reader=reader,
150
            num_readers=4,
151
            num_samples=num_samples,
152
            num_classes=self.confs["Classifier"]["NumClasses"],
153
            labels_to_name=labels_to_name_dict,
154
            items_to_descriptions=self.items_to_descriptions)
155
156
        return dataset
157
158
    def loadBatch(self, dataset, is_training=True):
159
        '''
160
            Loads a batch for training.
161
162
            INPUTS:
163
                - dataset(Dataset): a Dataset class object that is created from the get_split function
164
                - batch_size(int): determines how big of a batch to train
165
                - height(int): the height of the image to resize to during preprocessing
166
                - width(int): the width of the image to resize to during preprocessing
167
                - is_training(bool): to determine whether to perform a training or evaluation preprocessing
168
169
            OUTPUTS:
170
                - images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images
171
                - labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding).
172
173
        '''
174
175
        # First create the data_provider object
176
        data_provider = slim.dataset_data_provider.DatasetDataProvider(
177
            dataset,
178
            common_queue_capacity=24 + 3 *
179
            self.confs["Classifier"]["BatchTestSize"],
180
            common_queue_min=24)
181
182
        # Obtain the raw image using the get method
183
        raw_image, label = data_provider.get(['image', 'label'])
184
185
        # Perform the correct preprocessing for this image depending if it is training or evaluating
186
        image = Classes.inception_preprocessing.preprocess_image(
187
            raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"], is_training)
188
189
        # As for the raw images, we just do a simple reshape to batch it up
190
        raw_image = tf.image.resize_image_with_crop_or_pad(
191
            raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"])
192
193
        # Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
194
        images, raw_images, labels = tf.train.batch(
195
            [image, raw_image, label],
196
            batch_size=self.confs["Classifier"]["BatchTestSize"],
197
            num_threads=4,
198
            capacity=4 * self.confs["Classifier"]["BatchTestSize"],
199
            allow_smaller_final_batch=True)
200
201
        return images, raw_images, labels
202
203
204
Evaluation = Evaluation()
205
206
207
def run():
208
209
    # Create LogDirEval for evaluation information
210
    if not os.path.exists(Evaluation.confs["Classifier"]["LogDirEval"]):
211
        os.mkdir(Evaluation.confs["Classifier"]["LogDirEval"])
212
213
    # Just construct the graph from scratch again
214
    with tf.Graph().as_default() as graph:
215
216
        tf.logging.set_verbosity(tf.logging.INFO)
217
218
        # Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
219
        dataset = Evaluation.getSplit('validation')
220
        images, raw_images, labels = Evaluation.loadBatch(dataset, is_training=False)
221
222
        # Create some information about the training steps
223
        num_batches_per_epoch = dataset.num_samples / \
224
            Evaluation.confs["Classifier"]["BatchTestSize"]
225
        num_steps_per_epoch = num_batches_per_epoch
226
227
        # Now create the inference model but set is_training=False
228
        with slim.arg_scope(inception_v3_arg_scope()):
229
            logits, end_points = inception_v3(
230
                images, num_classes=dataset.num_classes, is_training=False)
231
232
        # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
233
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
234
235
        # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
236
        loss = tf.losses.softmax_cross_entropy(
237
            onehot_labels=one_hot_labels, logits=logits)
238
        # obtain the regularization losses as well
239
        total_loss = tf.losses.get_total_loss()
240
241
        # #get all the variables to restore from the checkpoint file and create the saver function to restore
242
        variables_to_restore = slim.get_variables_to_restore()
243
        saver = tf.train.Saver(variables_to_restore)
244
245
        def restore_fn(sess):
246
            return saver.restore(sess, Evaluation.checkpoint_file)
247
248
        # Just define the metrics to track without the loss or whatsoever
249
        probabilities = end_points['Predictions']
250
        predictions = tf.argmax(probabilities, 1)
251
252
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
253
            predictions, labels)
254
        metrics_op = tf.group(accuracy_update)
255
256
        # Create the global step and an increment op for monitoring
257
        global_step = get_or_create_global_step()
258
        # no apply_gradient method so manually increasing the global_step
259
        global_step_op = tf.assign(global_step, global_step + 1)
260
261
        # Create a evaluation step function
262
        def eval_step(sess, metrics_op, global_step):
263
            '''
264
            Simply takes in a session, runs the metrics op and some logging information.
265
            '''
266
            start_time = time.time()
267
            _, global_step_count, accuracy_value = sess.run(
268
                [metrics_op, global_step_op, accuracy])
269
            time_elapsed = time.time() - start_time
270
271
            # Log some information
272
            logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)',
273
                         global_step_count, accuracy_value, time_elapsed)
274
275
            return accuracy_value
276
277
        # Define some scalar quantities to monitor
278
        tf.summary.scalar("Validation_Accuracy", accuracy)
279
        tf.summary.scalar("Validation_losses/Total_Loss", total_loss)
280
        my_summary_op = tf.summary.merge_all()
281
282
        # Get your supervisor
283
        sv = tf.train.Supervisor(
284
            logdir=Evaluation.confs["Classifier"]["LogDirEval"], summary_op=None, init_fn=restore_fn)
285
286
        # Now we are ready to run in one session
287
        with sv.managed_session() as sess:
288
            for step in range(int(num_batches_per_epoch * Evaluation.confs["Classifier"]["EpochsTest"])):
289
                # print vital information every start of the epoch as always
290
                if step % num_batches_per_epoch == 0:
291
                    logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1,
292
                                 Evaluation.confs["Classifier"]["EpochsTest"])
293
                    logging.info('Current Streaming Accuracy: %.4f',
294
                                 sess.run(accuracy))
295
296
                # Compute summaries every 10 steps and continue evaluating
297
                if step % 10 == 0:
298
                    eval_step(sess, metrics_op=metrics_op,
299
                              global_step=sv.global_step)
300
                    summaries = sess.run(my_summary_op)
301
                    sv.summary_computed(sess, summaries)
302
303
                # Otherwise just run as per normal
304
                else:
305
                    eval_step(sess, metrics_op=metrics_op,
306
                              global_step=sv.global_step)
307
308
            # At the end of all the evaluation, show the final accuracy
309
            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))
310
311
            # Now we want to visualize the last batch's images just to see what our model has predicted
312
            raw_images, labels, predictions, probabilities = sess.run(
313
                [raw_images, labels, predictions, probabilities])
314
            for i in range(10):
315
                image, label, prediction, probability = raw_images[
316
                    i], labels[i], predictions[i], probabilities[i]
317
                prediction_name, label_name = dataset.labels_to_name[
318
                    prediction], dataset.labels_to_name[label]
319
                text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % (
320
                    prediction_name, label_name, probability[prediction])
321
                img_plot = plt.imshow(image)
322
323
                # Set up the plot and hide axes
324
                # plt.title(text)
325
                # img_plot.axes.get_yaxis().set_ticks([])
326
                # img_plot.axes.get_xaxis().set_ticks([])
327
                # plt.show()
328
329
            logging.info(
330
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.')
331
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
332
333
334
if __name__ == '__main__':
335
    run()