--- a +++ b/Projects/NCS1/Evaluation.py @@ -0,0 +1,335 @@ +############################################################################################ +# +# Project: Peter Moss Acute Myeloid & Lymphoblastic Leukemia AI Research Project +# Repository: ALL Detection System 2019 +# Project: Facial Authentication Server +# +# Author: Adam Milton-Barker (AdamMiltonBarker.com) +# Contributors: +# Title: Evaluation Class +# Description: Evaluation class for the ALL Detection System 2019 NCS1 Classifier. +# License: MIT License +# Last Modified: 2020-07-16 +# +############################################################################################ + +import cv2, json, matplotlib, os, sys, time + +import Classes.inception_preprocessing + +import numpy as np +import tensorflow as tf +import matplotlib.pyplot as plt +import tkinter as tk +import pylab as pl + +from tensorflow.python.framework import graph_util +from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step +from tensorflow.python.platform import tf_logging as logging + +from Classes.Helpers import Helpers +from Classes.Data import Data +from Classes.inception_v3 import inception_v3, inception_v3_arg_scope + +matplotlib.use("Agg") +plt.style.use('ggplot') +slim = tf.contrib.slim + +# config = tf.ConfigProto(intra_op_parallelism_threads=12, inter_op_parallelism_threads=2, +# allow_soft_placement=True, device_count={'CPU': 12}) + +#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +#os.environ["OMP_NUM_THREADS"] = "12" +#os.environ["KMP_BLOCKTIME"] = "30" +#os.environ["KMP_SETTINGS"] = "1" +#os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0" + +class Evaluation(): + """ Evaluation Class + + Evaluates the ALL Detection System 2019 NCS1 Classifier. + """ + + def __init__(self): + """ Initializes the Evaluation Class """ + + self.Helpers = Helpers("Evaluator") + self.confs = self.Helpers.confs + + self.labelsToName = {} + + self.checkpoint_file = tf.train.latest_checkpoint( + self.confs["Classifier"]["LogDir"]) + + # Open the labels file + self.labels = open( + self.confs["Classifier"]["DatasetDir"] + "/" + self.confs["Classifier"]["Labels"], 'r') + + # Create a dictionary to refer each label to their string name + for line in self.labels: + label, string_name = line.split(':') + string_name = string_name[:-1] # Remove newline + self.labelsToName[int(label)] = string_name + + # Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later. + self.items_to_descriptions = { + 'image': 'A 3-channel RGB coloured image that is ex: office, people', + 'label': 'A label that ,start from zero' + } + + self.Helpers.logger.info( + "Evaluator class initialization complete.") + + # ============== DATASET LOADING ====================== + # We now create a function that creates a Dataset class which will give us many TFRecord files to feed in the examples into a queue in parallel. + def getSplit(self, split_name): + ''' + Obtains the split - training or validation - to create a Dataset class for feeding the examples into a queue later on. This function will + set up the decoder and dataset information all into one Dataset class so that you can avoid the brute work later on. + Your file_pattern is very important in locating the files later. + + INPUTS: + - split_name(str): 'train' or 'validation'. Used to get the correct data split of tfrecord files + + OUTPUTS: + - dataset (Dataset): A Dataset class object where we can read its various components for easier batch creation later. + ''' + + # First check whether the split_name is train or validation + if split_name not in ['train', 'validation']: + + raise ValueError( + 'The split_name %s is not recognized. Please input either train or validation as the split_name' % (split_name)) + + # Create the full path for a general file_pattern to locate the tfrecord_files + file_pattern_path = os.path.join( + self.confs["Classifier"]["DatasetDir"], self.confs["Classifier"]["TFRecordPattern"] % (split_name)) + + # Count the total number of examples in all of these shard + num_samples = 0 + file_pattern_for_counting = 'ALL_' + split_name + tfrecords_to_count = [os.path.join(self.confs["Classifier"]["DatasetDir"], file) for file in os.listdir( + self.confs["Classifier"]["DatasetDir"]) if file.startswith(file_pattern_for_counting)] + + # print(tfrecords_to_count) + for tfrecord_file in tfrecords_to_count: + + for record in tf.python_io.tf_record_iterator(tfrecord_file): + + num_samples += 1 + + # Create a reader, which must be a TFRecord reader in this case + reader = tf.TFRecordReader + + # Create the keys_to_features dictionary for the decoder + keys_to_features = { + 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), + 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), + 'image/class/label': tf.FixedLenFeature( + [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), + } + + # Create the items_to_handlers dictionary for the decoder. + items_to_handlers = { + 'image': slim.tfexample_decoder.Image(), + 'label': slim.tfexample_decoder.Tensor('image/class/label'), + } + + # Start to create the decoder + decoder = slim.tfexample_decoder.TFExampleDecoder( + keys_to_features, items_to_handlers) + + # Create the labels_to_name file + labels_to_name_dict = self.labelsToName + + # Actually create the dataset + dataset = slim.dataset.Dataset( + data_sources=file_pattern_path, + decoder=decoder, + reader=reader, + num_readers=4, + num_samples=num_samples, + num_classes=self.confs["Classifier"]["NumClasses"], + labels_to_name=labels_to_name_dict, + items_to_descriptions=self.items_to_descriptions) + + return dataset + + def loadBatch(self, dataset, is_training=True): + ''' + Loads a batch for training. + + INPUTS: + - dataset(Dataset): a Dataset class object that is created from the get_split function + - batch_size(int): determines how big of a batch to train + - height(int): the height of the image to resize to during preprocessing + - width(int): the width of the image to resize to during preprocessing + - is_training(bool): to determine whether to perform a training or evaluation preprocessing + + OUTPUTS: + - images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images + - labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding). + + ''' + + # First create the data_provider object + data_provider = slim.dataset_data_provider.DatasetDataProvider( + dataset, + common_queue_capacity=24 + 3 * + self.confs["Classifier"]["BatchTestSize"], + common_queue_min=24) + + # Obtain the raw image using the get method + raw_image, label = data_provider.get(['image', 'label']) + + # Perform the correct preprocessing for this image depending if it is training or evaluating + image = Classes.inception_preprocessing.preprocess_image( + raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"], is_training) + + # As for the raw images, we just do a simple reshape to batch it up + raw_image = tf.image.resize_image_with_crop_or_pad( + raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"]) + + # Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch. + images, raw_images, labels = tf.train.batch( + [image, raw_image, label], + batch_size=self.confs["Classifier"]["BatchTestSize"], + num_threads=4, + capacity=4 * self.confs["Classifier"]["BatchTestSize"], + allow_smaller_final_batch=True) + + return images, raw_images, labels + + +Evaluation = Evaluation() + + +def run(): + + # Create LogDirEval for evaluation information + if not os.path.exists(Evaluation.confs["Classifier"]["LogDirEval"]): + os.mkdir(Evaluation.confs["Classifier"]["LogDirEval"]) + + # Just construct the graph from scratch again + with tf.Graph().as_default() as graph: + + tf.logging.set_verbosity(tf.logging.INFO) + + # Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing + dataset = Evaluation.getSplit('validation') + images, raw_images, labels = Evaluation.loadBatch(dataset, is_training=False) + + # Create some information about the training steps + num_batches_per_epoch = dataset.num_samples / \ + Evaluation.confs["Classifier"]["BatchTestSize"] + num_steps_per_epoch = num_batches_per_epoch + + # Now create the inference model but set is_training=False + with slim.arg_scope(inception_v3_arg_scope()): + logits, end_points = inception_v3( + images, num_classes=dataset.num_classes, is_training=False) + + # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!) + one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) + + # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks + loss = tf.losses.softmax_cross_entropy( + onehot_labels=one_hot_labels, logits=logits) + # obtain the regularization losses as well + total_loss = tf.losses.get_total_loss() + + # #get all the variables to restore from the checkpoint file and create the saver function to restore + variables_to_restore = slim.get_variables_to_restore() + saver = tf.train.Saver(variables_to_restore) + + def restore_fn(sess): + return saver.restore(sess, Evaluation.checkpoint_file) + + # Just define the metrics to track without the loss or whatsoever + probabilities = end_points['Predictions'] + predictions = tf.argmax(probabilities, 1) + + accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( + predictions, labels) + metrics_op = tf.group(accuracy_update) + + # Create the global step and an increment op for monitoring + global_step = get_or_create_global_step() + # no apply_gradient method so manually increasing the global_step + global_step_op = tf.assign(global_step, global_step + 1) + + # Create a evaluation step function + def eval_step(sess, metrics_op, global_step): + ''' + Simply takes in a session, runs the metrics op and some logging information. + ''' + start_time = time.time() + _, global_step_count, accuracy_value = sess.run( + [metrics_op, global_step_op, accuracy]) + time_elapsed = time.time() - start_time + + # Log some information + logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', + global_step_count, accuracy_value, time_elapsed) + + return accuracy_value + + # Define some scalar quantities to monitor + tf.summary.scalar("Validation_Accuracy", accuracy) + tf.summary.scalar("Validation_losses/Total_Loss", total_loss) + my_summary_op = tf.summary.merge_all() + + # Get your supervisor + sv = tf.train.Supervisor( + logdir=Evaluation.confs["Classifier"]["LogDirEval"], summary_op=None, init_fn=restore_fn) + + # Now we are ready to run in one session + with sv.managed_session() as sess: + for step in range(int(num_batches_per_epoch * Evaluation.confs["Classifier"]["EpochsTest"])): + # print vital information every start of the epoch as always + if step % num_batches_per_epoch == 0: + logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1, + Evaluation.confs["Classifier"]["EpochsTest"]) + logging.info('Current Streaming Accuracy: %.4f', + sess.run(accuracy)) + + # Compute summaries every 10 steps and continue evaluating + if step % 10 == 0: + eval_step(sess, metrics_op=metrics_op, + global_step=sv.global_step) + summaries = sess.run(my_summary_op) + sv.summary_computed(sess, summaries) + + # Otherwise just run as per normal + else: + eval_step(sess, metrics_op=metrics_op, + global_step=sv.global_step) + + # At the end of all the evaluation, show the final accuracy + logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy)) + + # Now we want to visualize the last batch's images just to see what our model has predicted + raw_images, labels, predictions, probabilities = sess.run( + [raw_images, labels, predictions, probabilities]) + for i in range(10): + image, label, prediction, probability = raw_images[ + i], labels[i], predictions[i], probabilities[i] + prediction_name, label_name = dataset.labels_to_name[ + prediction], dataset.labels_to_name[label] + text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % ( + prediction_name, label_name, probability[prediction]) + img_plot = plt.imshow(image) + + # Set up the plot and hide axes + # plt.title(text) + # img_plot.axes.get_yaxis().set_ticks([]) + # img_plot.axes.get_xaxis().set_ticks([]) + # plt.show() + + logging.info( + 'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.') + sv.saver.save(sess, sv.save_path, global_step=sv.global_step) + + +if __name__ == '__main__': + run()