Diff of /Stats/TransformThread.py [000000] .. [b4a150]

Switch to side-by-side view

--- a
+++ b/Stats/TransformThread.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+#
+# Copyright 2017 University of Westminster. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""" It applies a set of transformation functions using independent threads for each feature.
+"""
+
+from typing import TypeVar, Any
+from scipy import stats
+from sklearn import preprocessing
+from Stats.YeoJohnson import YeoJohnson
+import numpy as np
+
+PandasDataFrame = TypeVar('DataFrame')
+
+__author__ = "Mohsen Mesgarpour"
+__copyright__ = "Copyright 2016, https://github.com/mesgarpour"
+__credits__ = ["Mohsen Mesgarpour"]
+__license__ = "GPL"
+__version__ = "1.1"
+__maintainer__ = "Mohsen Mesgarpour"
+__email__ = "mohsen.mesgarpour@gmail.com"
+__status__ = "Release"
+
+
+class TransformThread:
+    # todo: optimise threading further
+
+    def __init__(self,
+                 **kwargs: Any):
+        """Initialise the objects and constants.
+        :param kwargs: the input arguments for the selected transform function.
+        """
+        self.__kwargs = kwargs
+
+    def transform_scale_arr(self,
+                            dt: PandasDataFrame,
+                            method_args: Any,
+                            name: str):
+        """Standardize a dataset along any axis.
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (kwargs: with_mean=True)
+        :param name: the name of the feature to be transformed.
+        """
+        method_args[name] = None
+        dt[name] = preprocessing.scale(dt[name], **self.__kwargs)
+
+    def transform_robust_scale_arr(self,
+                                   dt: PandasDataFrame,
+                                   method_args: Any,
+                                   name: str):
+        """Standardize a dataset along any axis.
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (kwargs: axis=0, with_centering=True, with_scaling=True)
+        :param name: the name of the feature to be transformed.
+        """
+        method_args[name] = None
+        dt[name] = preprocessing.robust_scale(dt[name], **self.__kwargs)
+
+    def transform_max_abs_scalar_arr(self,
+                                     dt: PandasDataFrame,
+                                     method_args: Any,
+                                     name: str):
+        """Scale each feature by its maximum absolute value.
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (it is a placeholder no argument is available).
+        :param name: the name of the feature to be transformed.
+        """
+        if name in method_args[name] and "scale" in method_args[name].keys():
+            scale = method_args[name]["scale"]
+        else:
+            scale = preprocessing.MaxAbsScaler(**self.__kwargs)
+            method_args[name] = {"scale": scale}
+
+        arr = scale.fit_transform(dt[name])
+        arr = np.array(scale.transform(arr)) + 1
+        dt[name], summaries = stats.boxcox(arr)
+
+    def transform_normalizer_arr(self,
+                                 dt: PandasDataFrame,
+                                 method_args: Any,
+                                 name: str):
+        """Normalize samples individually to unit norm.
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (kwargs: norm='l2')
+        :param name: the name of the feature to be transformed.
+        """
+        if name in method_args[name] and "scale" in method_args[name].keys():
+            scale = method_args[name]["scale"]
+        else:
+            scale = preprocessing.Normalizer(**self.__kwargs)
+            method_args[name] = {"scale": scale}
+
+        arr = scale.fit_transform(dt[name])
+        dt[name] = scale.transform(arr)
+
+    def transform_kernel_centerer_arr(self,
+                                      dt: PandasDataFrame,
+                                      method_args: Any,
+                                      name: str):
+        """Center a kernel matrix
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (it is a placeholder no argument is available).
+        :param name: the name of the feature to be transformed.
+        """
+        if name in method_args[name] and "scale" in method_args[name].keys():
+            scale = method_args[name]["scale"]
+        else:
+            scale = preprocessing.KernelCenterer()
+            method_args[name] = {"scale": scale}
+
+        arr = scale.fit_transform(dt[name])
+        dt[name] = scale.transform(arr)
+
+    def transform_yeo_johnson_arr(self,
+                                  dt: PandasDataFrame,
+                                  method_args: Any,
+                                  name: str):
+        """Apply the Ye-Johnson transformation.
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (kwargs: lmbda=-0.5, derivative=0, epsilon=np.finfo(np.float).eps, inverse=False).
+        :param name: the name of the feature to be transformed.
+        """
+        method_args[name] = None
+        yeo_johnson = YeoJohnson()
+        dt[name] = yeo_johnson.fit(dt[name], **self.__kwargs)
+
+    def transform_box_cox_arr(self,
+                              dt: PandasDataFrame,
+                              method_args: Any,
+                              name: str):
+        """Apply the Box-Cox transformation.
+        :param dt: the dataframe of features.
+        :param method_args: other input arguments
+        (kwargs: lmbda=None, alpha=None).
+        :param name: the name of the feature to be transformed.
+        """
+        if name in method_args[name] and "scale" in method_args[name].keys():
+            scale = method_args[name]["scale"]
+        else:
+            scale, _ = stats.boxcox(dt[name], **self.__kwargs)
+            method_args[name] = {"scale": scale}
+
+        arr = scale.fit_transform(dt[name])
+        dt[name] = scale.transform(arr)