all-classifiers-2019 / Git / Diff of /Projects/Caffe/allCNN/Classes/CaffeHelpers.py

Models:

WandaB/

all-classifiers-2019

Downloads: 1

Diff of /Projects/Caffe/allCNN/Classes/CaffeHelpers.py [000000] .. [acd362]

Switch to unified view

 b/Projects/Caffe/allCNN/Classes/CaffeHelpers.py
+############################################################################################
+#
+# The MIT License (MIT)
+#
+# Peter Moss Acute Myeloid/Lymphoblastic Leukemia AI Research Project
+# Copyright (C) 2018 Adam Milton-Barker (AdamMiltonBarker.com)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# Title:         Caffe Acute Lymphoblastic Leukemia CNN Caffe Helpers
+# Description:   Common Caffe tools used by the Caffe Acute Lymphoblastic Leukemia CNN
+# Configuration: Required/Confs.json
+# Last Modified: 2019-03-10
+# References:    Based on: ACUTE LEUKEMIA CLASSIFICATION USING CONVOLUTION NEURAL NETWORK
+#                IN CLINICAL DECISION SUPPORT SYSTEM
+#                https://airccj.org/CSCP/vol7/csit77505.pdf
+#
+############################################################################################
+import os, sys, random, cv2, lmdb
+sys.path.append('/home/upsquared/caffe/python')
+from caffe.proto import caffe_pb2
+class CaffeHelpers():
+    def __init__(self, confs, helpers, logFile):
+        """
+        Sets up all default requirements and placeholders needed for the
+        Caffe Acute Lymphoblastic Leukemia CNN Helpers.
+        """
+        self.Helpers = helpers
+        self.confs = confs
+        self.logFile = logFile
+        self.labels = None
+        self.trainLMDB = None
+        self.trainData = []
+        self.trainData0 = []
+        self.trainData1 = []
+        self.valData = []
+        self.valData0 = []
+        self.valData1 = []
+        self.classNames = []
+        self.imSize = (self.confs["Settings"]["Classifier"]["Input"]["imageWidth"],
+                       self.confs["Settings"]["Classifier"]["Input"]["imageHeight"])
+        self.negativeTrainAmnt = self.confs["Settings"]["Classifier"]["Data"]["negativeTrainAmnt"]
+        self.positiveTrainAmnt = self.confs["Settings"]["Classifier"]["Data"]["positiveTrainAmnt"]
+        self.negativeTestAmnt = self.confs["Settings"]["Classifier"]["Data"]["negativeTestAmnt"]
+        self.positiveTestAmnt = self.confs["Settings"]["Classifier"]["Data"]["positiveTestAmnt"]
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "CaffeHelpers initiated")
+    def deleteLMDB(self):
+        """
+        Deletes existing LMDB files.
+        """
+        os.system('rm -rf  ' + self.confs["Settings"]["Classifier"]["LMDB"]["train"])
+        os.system('rm -rf  ' + self.confs["Settings"]["Classifier"]["LMDB"]["validation"])
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Existing LMDB deleted")
+    def sortLabels(self):
+        """
+        Sorts the training / validation data and labels.
+        """
+        self.labels = open(self.confs["Settings"]["Classifier"]["Data"]["labels"], "w")
+        for dirName in os.listdir(self.confs["Settings"]["Classifier"]["Model"]["dirData"]):
+            if dirName == ".ipynb_checkpoints":
+                continue
+            path = os.path.join(self.confs["Settings"]["Classifier"]["Model"]["dirData"], dirName)
+            if os.path.isdir(path):
+                self.classNames.append(path)
+                self.labels.write(dirName+"\n")
+        self.labels.close()
+        self.Helpers.logMessage(self.logFile, "allCNN",  "Status", str(len(self.classNames)) + " label(s) created")
+    def appendDataPaths(self, directory, filename):
+        """
+        Appends training data paths.
+        """
+        filePath = os.path.join(directory, filename)
+        cDirectory = os.path.basename(os.path.normpath(directory))
+        image = cv2.imread(filePath)
+        image = cv2.resize(image, self.imSize)
+        cv2.imwrite(filePath, image)
+        if cDirectory is self.confs["Settings"]["Classifier"]["Data"]["negativeDir"]:
+            self.trainData0.append(filePath)
+        elif cDirectory is self.confs["Settings"]["Classifier"]["Data"]["positiveDir"]:
+            self.trainData1.append(filePath)
+    def isValidFile(self, filename):
+        """
+        Checks that input file type is allowed.
+        """
+        return filename.endswith(tuple(self.confs["Settings"]["Classifier"]["Data"]["validFiles"]))
+    def sortTrainingData(self):
+        """
+        Sorts the training / validation data
+        """
+        for directory in self.classNames:
+            for filename in os.listdir(directory):
+                if self.isValidFile(filename):
+                    self.appendDataPaths(directory, filename)
+                else:
+                    continue
+    def recreatePaperData(self):
+        """
+        Recreates the dataset sizes specified in the paper.
+        """
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Total data size: " + str(len(self.trainData0) + len(self.trainData1)) + " (" + str(len(self.trainData0)) + " + " + str(len(self.trainData1)) + ")")
+        msg = "Recreating negative training size of " + str(self.negativeTrainAmnt)
+        msg += " with negative testing size of " + str(self.negativeTestAmnt)
+        msg += " and positive training size of " + str(self.positiveTrainAmnt)
+        msg += " with positive testing size of " + str(self.positiveTestAmnt)
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", msg)
+        random.shuffle(self.trainData0)
+        random.shuffle(self.trainData1)
+        trainingData0 = self.trainData0[0:self.negativeTrainAmnt]
+        trainingData1 = self.trainData1[0:self.positiveTrainAmnt]
+        self.Helpers.logMessage(self.logFile, "allCNN", "Data", "Negative training data created, size: " + str(len(trainingData0)))
+        self.Helpers.logMessage(self.logFile, "allCNN", "Data", "Positive training data created, size: " + str(len(trainingData1)))
+        for i in range(0, len(trainingData0)):
+            self.trainData.append(trainingData0[i])
+        for i in range(0, len(trainingData1)):
+            self.trainData.append(trainingData1[i])
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Paper training data created. " + str(len(trainingData0)) + " x Negative & " + str(len(trainingData1)) + " x Positive ")
+        valData0 = self.trainData0[self.negativeTrainAmnt:]
+        valData1 = self.trainData1[self.positiveTrainAmnt:]
+        self.Helpers.logMessage(self.logFile, "allCNN", "Data", "Negative validation data created, size: " + str(len(valData0)))
+        self.Helpers.logMessage(self.logFile, "allCNN", "Data", "Positive validation data created, size: " + str(len(valData1)))
+        for i in range(0, len(valData0)):
+            self.valData.append(valData0[i])
+        for i in range(0, len(valData1)):
+            self.valData.append(valData1[i])
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Paper validation data created. " + str(len(valData0)) + " x Negative & " + str(len(valData1)) + " x Positive ")
+        msg = "Recreated negative training size of " + str(len(trainingData0))
+        msg += " with negative testing size of " + str(len(trainingData1))
+        msg += " and positive training size of " + str(len(valData0))
+        msg += " with positive testing size of " + str(len(valData1))
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", msg)
+    def createDatum(self, imageData, label):
+        """
+        Generates a Datum object including label.
+        """
+        datum = caffe_pb2.Datum()
+        datum.channels = imageData.shape[2]
+        datum.height = imageData.shape[0]
+        datum.width = imageData.shape[1]
+        datum.data = imageData.tobytes()
+        datum.label = int(label)
+        return datum
+    def transform(self, img):
+        """
+        Transforms image using histogram equalization and resizing.
+        """
+        img[:, :, 0] = cv2.equalizeHist(img[:, :, 0])
+        img[:, :, 1] = cv2.equalizeHist(img[:, :, 1])
+        img[:, :, 2] = cv2.equalizeHist(img[:, :, 2])
+        return cv2.resize(img,
+                          self.imSize,
+                          interpolation = cv2.INTER_CUBIC)
+    def createAllDatum(self, rlmdb, data, dType):
+        """
+        Generates all Datum objects.
+        """
+        if dType is "Training":
+            dataPath = self.trainData
+        elif dType is "Validation":
+            dataPath = self.valData
+        with rlmdb.begin(write=True) as i:
+            count = 0
+            for data in dataPath:
+                i.put(
+                    '{:08}'.format(count).encode('ascii'),
+                    self.createDatum(
+                        cv2.resize(
+                            self.transform(
+                                cv2.imread(data, cv2.IMREAD_COLOR)
+                            ),
+                            (self.confs["Settings"]["Classifier"]["Input"]["imageHeight"], self.confs["Settings"]["Classifier"]["Input"]["imageWidth"])),
+                        os.path.basename(os.path.dirname(data))
+                    ).SerializeToString())
+                count = count + 1
+        rlmdb.close()
+        self.Helpers.logMessage(self.logFile,  "allCNN", "Status", dType + " data count: " + str(count))
+    def createTrainingLMDB(self):
+        """
+        Creates training LMDB database.
+        """
+        random.shuffle(self.trainData)
+        self.createAllDatum(lmdb.open(self.confs["Settings"]["Classifier"]["LMDB"]["train"], map_size=int(1e12)), self.trainData, "Training")
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Training LDBM created")
+    def createValidationLMDB(self):
+        """
+        Creates validation LMDB database.
+        """
+        random.shuffle(self.valData)
+        self.createAllDatum(lmdb.open(self.confs["Settings"]["Classifier"]["LMDB"]["validation"], map_size=int(1e12)), self.trainData, "Validation")
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Validation LDBM created")
+    def computeMean(self):
+        """
+        Computes the mean.
+        """
+        os.system('/home/upsquared/caffe/build/tools/compute_image_mean -backend=lmdb  ' + self.confs["Settings"]["Classifier"]["LMDB"]["train"] + ' ' + self.confs["Settings"]["Classifier"]["Caffe"]["proto"])
+        self.Helpers.logMessage(self.logFile, "allCNN", "Status", "Mean computed")