Diff of /Stats/TrainingMethod.py [000000] .. [b4a150]

Switch to side-by-side view

--- a
+++ b/Stats/TrainingMethod.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+#
+# Copyright 2017 University of Westminster. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""It applies the training functions
+"""
+
+from typing import Dict, List, TypeVar, Any
+from Stats._LogisticRegression import _LogisticRegression
+from Stats._LogisticRegressionCV import _LogisticRegressionCV
+from Stats._MixedLinearModel import _MixedLinearModel
+from Stats._RandomForestClassifier import _RandomForestClassifier
+from Stats._GradientBoostingClassifier import _GradientBoostingClassifier
+from Stats._DecisionTreeClassifier import _DecisionTreeClassifier
+from Stats._KNeighborsClassifier import _KNeighborsClassifier
+from Stats._NaiveBayes import _NaiveBayes
+from Stats._NeuralNetwork import _NeuralNetwork
+from ReadersWriters.ReadersWriters import ReadersWriters
+from Configs.CONSTANTS import CONSTANTS
+from sklearn.model_selection import cross_val_score
+import numpy as np
+import sys
+import logging
+
+PandasDataFrame = TypeVar('DataFrame')
+NumpyNDArray = TypeVar('ndarray')
+CollectionsOrderedDict = TypeVar('OrderedDict')
+
+__author__ = "Mohsen Mesgarpour"
+__copyright__ = "Copyright 2016, https://github.com/mesgarpour"
+__credits__ = ["Mohsen Mesgarpour"]
+__license__ = "GPL"
+__version__ = "1.1"
+__maintainer__ = "Mohsen Mesgarpour"
+__email__ = "mohsen.mesgarpour@gmail.com"
+__status__ = "Release"
+
+
+class TrainingMethod:
+    def __init__(self,
+                 method_name: str,
+                 path: str=None,
+                 title: str=None):
+        """Initialise the objects and constants.
+        :param method_name: the training method that will be used
+        (options: {'lr': Logistic Regression, 'lr_cv': Logistic Regression with Cross-Validation,
+        'mlm': Mixed Linear Model, 'rfc': Random Forest Classifier, 'gbc': Gradient Boosting Classifier,
+        'dtc' Decision Tree Classifier, 'knc': K-Nearest Neighbors Classifier, 'nb': Multinomial Naive Bayes,
+        'nn': Multi-Layer Perceptron (MLP) Neural Network}).
+        :param path: the directory path of the saved trained model file, using this application (if applicable).
+        :param title: the file name of the saved trained model file, using this application
+        """
+        self.__logger = logging.getLogger(CONSTANTS.app_name)
+        self.__logger.debug(__name__)
+
+        self.__readers_writers = ReadersWriters()
+        self.__method = None
+        self.method_name = method_name
+        self.model_labels = None
+        self.model_train = None
+        self.model_predict = dict()
+        self.model_cross_validate = None
+        if method_name is not None:
+            self.__init__method(method_name)
+        else:
+            self.load(path, title)
+
+    def __init__method(self,
+                       method_name: str,
+                       model_labels: List=None,
+                       model_train: Any=None,
+                       model_predict: Dict=None,
+                       model_cross_validate: NumpyNDArray=None):
+        """Initialise the selected training method.
+        :param method_name: the training method that will be used
+        (options: {'lr': Logistic Regression, 'lr_cv': Logistic Regression with Cross-Validation,
+        'mlm': Mixed Linear Model, 'rfc': Random Forest Classifier, 'gbc': Gradient Boosting Classifier,
+        'dtc' Decision Tree Classifier, 'knc': K-Nearest Neighbors Classifier, 'nb': Multinomial Naive Bayes,
+        'nn': Multi-Layer Perceptron (MLP) Neural Network}).
+        :param model_labels: the features names to be inputted into the model.
+        Note: the order of features will be preserved internally.
+        :param model_train: the training model.
+        :param model_predict: the prediction outputs.
+        :param model_cross_validate: the cross-validation model.
+        """
+        self.__logger.debug("Initialise the training method.")
+        if method_name == "lr":
+            self.__method = _LogisticRegression()
+        elif method_name == "lr_cv":
+            self.__method = _LogisticRegressionCV()
+        elif method_name == "mlm":
+            self.__method = _MixedLinearModel()
+        elif method_name == "rfc":
+            self.__method = _RandomForestClassifier()
+        elif method_name == "gbc":
+            self.__method = _GradientBoostingClassifier()
+        elif method_name == "dtc":
+            self.__method = _DecisionTreeClassifier()
+        elif method_name == "knc":
+            self.__method = _KNeighborsClassifier()
+        elif method_name == "nb":
+            self.__method = _NaiveBayes()
+        elif method_name == "nn":
+            self.__method = _NeuralNetwork()
+        else:
+            self.__logger.error(__name__ + " - Invalid training method: " + str(method_name))
+            sys.exit()
+
+        self.model_labels = model_labels
+        self.model_train = model_train
+        self.model_predict = dict() if model_predict is None else model_predict
+        self.model_cross_validate = model_cross_validate
+
+    def train(self,
+              features_indep_df: PandasDataFrame,
+              feature_target: List,
+              **kwargs: Any) -> Any:
+        """Perform the training, using the selected method.
+        :param features_indep_df: the independent features, which are inputted into the model.
+        :param feature_target: the target feature, which is being estimated.
+        :param kwargs: the training method's argument.
+        :return: the trained model.
+        """
+        self.__logger.debug("Train.")
+        self.model_labels = list(features_indep_df.columns.values)
+        self.model_train = self.__method.train(
+            features_indep_df[self.model_labels], feature_target, self.model_labels, **kwargs)
+        return self.model_train
+
+    def plot(self) -> Any:
+        """Plot the tree diagram.
+        :return: the model graph.
+        """
+        self.__logger.debug("Plot.")
+        return self.__method.plot(self.model_train, self.model_labels, ["True", "False"])
+
+    def train_summaries(self) -> Any:
+        """ Produce the training summary.
+        :return: the training summary.
+        """
+        self.__logger.debug("Summarise training model.")
+        return self.__method.train_summaries(self.model_train)
+
+    def predict(self,
+                features_indep_df: PandasDataFrame,
+                sample_name: str) -> PandasDataFrame:
+        """Predict probability of labels, using the training model.
+        :param features_indep_df: the independent features, which are inputted into the model.
+        :param sample_name: the sample to predict(e.g. 'train', 'test', 'validate').
+        :return: the predicted probabilities, and the predicted labels.
+        """
+        self.__logger.debug("Predict.")
+        self.model_predict[sample_name] = self.__method.predict(self.model_train, features_indep_df[self.model_labels])
+        return self.model_predict[sample_name]
+
+    def predict_summaries(self,
+                          feature_target: List,
+                          sample_name: str) -> CollectionsOrderedDict:
+        """roduce summary statistics for the prediction performance.
+        :param feature_target: the target feature, which is being estimated.
+        :param sample_name: the sample to predict(e.g. 'train', 'test', 'validate').
+        :return: the prediction summaries.
+        """
+        self.__logger.debug("Summarise predictions.")
+        self.model_predict[sample_name]['target'] = feature_target
+        return self.__method.predict_summaries(self.model_predict[sample_name], feature_target)
+
+    def predict_summaries_risk_bands(self,
+                                     feature_target: List,
+                                     sample_name: str,
+                                     cutoffs: List=np.arange(0, 1.05, 0.05)) -> CollectionsOrderedDict:
+        """Produce a summary statistics table for a range of cut-off points.
+        :param feature_target: the target feature, which is being estimated.
+        :param sample_name: the sample to predict(e.g. 'train', 'test', 'validate').
+        :param cutoffs: a list of risk cut-off points.
+        :return: the summary statistics table for the cut-off points.
+        """
+        self.__logger.debug("Summarise predictions.")
+        self.model_predict[sample_name]['target'] = feature_target
+        return self.__method.predict_summaries_cutoffs_table(
+            self.model_predict[sample_name]['score'], feature_target, cutoffs)
+
+    def cross_validate(self,
+                       features_indep_df: PandasDataFrame,
+                       feature_target: List,
+                       scoring: str="neg_mean_squared_error",
+                       cv: int=10) -> Any:
+        """Evaluate the model by performing cross-validation.
+        :param features_indep_df: the independent features, which are inputted into the model.
+        :param feature_target: the target feature, which is being estimated.
+        :param scoring: the scoring method (default: 'neg_mean_squared_error').
+        :param cv: the cross-validation splitting strategy (optional).
+        :return: the cross-validation summary
+        """
+        self.__logger.info("Cross-Validate")
+
+        self.model_cross_validate = cross_val_score(
+            self.model_train, features_indep_df[self.model_labels], feature_target, scoring=scoring, cv=cv)
+        return self.model_cross_validate
+
+    def cross_validate_summaries(self) -> Any:
+        """Produce a summary of the applied cross-validation
+        :return: the cross-validation summary
+        """
+        return self.model_cross_validate
+
+    def save_model(self,
+                   path: str,
+                   title: str):
+        """Save (pickle) the training model, as well as predictions and cross-validations.
+        Note: summaries statistics won't not saved.
+        :param path: the directory path of the saved trained model file, using this application (if applicable).
+        :param title: the file name of the saved trained model file, using this application.
+        """
+        self.__logger.info("Saving model")
+        objects = dict()
+        objects['method_name'] = self.method_name
+        objects['model_labels'] = self.model_labels
+        objects['model_train'] = self.model_train
+        objects['model_predict'] = self.model_predict
+        objects['model_cross_validate'] = self.model_cross_validate
+        self.__readers_writers.save_serialised(path, title, objects=objects)
+
+    def save_model_compressed(self,
+                              path: str,
+                              title: str):
+        """Save (pickle) & compressthe training model, as well as predictions and cross-validations.
+        Note: summaries statistics won't not saved.
+        :param path: the directory path of the saved trained model file, using this application (if applicable).
+        :param title: the file name of the saved trained model file, using this application.
+        """
+        self.__logger.debug("Save model.")
+        objects = dict()
+        objects['method_name'] = self.method_name
+        objects['model_labels'] = self.model_labels
+        objects['model_train'] = self.model_train
+        objects['model_predict'] = self.model_predict
+        objects['model_cross_validate'] = self.model_cross_validate
+        self.__readers_writers.save_serialised_compressed(path, title, objects=objects)
+
+    def load(self,
+             path: str,
+             title: str):
+        """Load (unpickle) the model, which was saved using this application.
+        :param path: the directory path of the saved trained model file, using this application (if applicable).
+        :param title: the file name of the saved trained model file, using this application
+        """
+        self.__logger.debug("Load model.")
+        objects = self.__readers_writers.load_serialised(path, title)
+        try:
+            self.__init__method(method_name=objects['method_name'],
+                                model_labels=objects['model_labels'],
+                                model_train=objects['model_train'],
+                                model_predict=objects['model_predict'],
+                                model_cross_validate=objects['model_cross_validate'])
+        except():
+            self.__logger.error(__name__ + " - Invalid field(s) in the model file: " + path)
+            sys.exit()