a b/Stats/TransformThread.py
1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
#
4
# Copyright 2017 University of Westminster. All Rights Reserved.
5
#
6
# Licensed under the Apache License, Version 2.0 (the "License");
7
# you may not use this file except in compliance with the License.
8
# You may obtain a copy of the License at
9
#
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
#
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
# ==============================================================================
18
""" It applies a set of transformation functions using independent threads for each feature.
19
"""
20
21
from typing import TypeVar, Any
22
from scipy import stats
23
from sklearn import preprocessing
24
from Stats.YeoJohnson import YeoJohnson
25
import numpy as np
26
27
PandasDataFrame = TypeVar('DataFrame')
28
29
__author__ = "Mohsen Mesgarpour"
30
__copyright__ = "Copyright 2016, https://github.com/mesgarpour"
31
__credits__ = ["Mohsen Mesgarpour"]
32
__license__ = "GPL"
33
__version__ = "1.1"
34
__maintainer__ = "Mohsen Mesgarpour"
35
__email__ = "mohsen.mesgarpour@gmail.com"
36
__status__ = "Release"
37
38
39
class TransformThread:
40
    # todo: optimise threading further
41
42
    def __init__(self,
43
                 **kwargs: Any):
44
        """Initialise the objects and constants.
45
        :param kwargs: the input arguments for the selected transform function.
46
        """
47
        self.__kwargs = kwargs
48
49
    def transform_scale_arr(self,
50
                            dt: PandasDataFrame,
51
                            method_args: Any,
52
                            name: str):
53
        """Standardize a dataset along any axis.
54
        :param dt: the dataframe of features.
55
        :param method_args: other input arguments
56
        (kwargs: with_mean=True)
57
        :param name: the name of the feature to be transformed.
58
        """
59
        method_args[name] = None
60
        dt[name] = preprocessing.scale(dt[name], **self.__kwargs)
61
62
    def transform_robust_scale_arr(self,
63
                                   dt: PandasDataFrame,
64
                                   method_args: Any,
65
                                   name: str):
66
        """Standardize a dataset along any axis.
67
        :param dt: the dataframe of features.
68
        :param method_args: other input arguments
69
        (kwargs: axis=0, with_centering=True, with_scaling=True)
70
        :param name: the name of the feature to be transformed.
71
        """
72
        method_args[name] = None
73
        dt[name] = preprocessing.robust_scale(dt[name], **self.__kwargs)
74
75
    def transform_max_abs_scalar_arr(self,
76
                                     dt: PandasDataFrame,
77
                                     method_args: Any,
78
                                     name: str):
79
        """Scale each feature by its maximum absolute value.
80
        :param dt: the dataframe of features.
81
        :param method_args: other input arguments
82
        (it is a placeholder no argument is available).
83
        :param name: the name of the feature to be transformed.
84
        """
85
        if name in method_args[name] and "scale" in method_args[name].keys():
86
            scale = method_args[name]["scale"]
87
        else:
88
            scale = preprocessing.MaxAbsScaler(**self.__kwargs)
89
            method_args[name] = {"scale": scale}
90
91
        arr = scale.fit_transform(dt[name])
92
        arr = np.array(scale.transform(arr)) + 1
93
        dt[name], summaries = stats.boxcox(arr)
94
95
    def transform_normalizer_arr(self,
96
                                 dt: PandasDataFrame,
97
                                 method_args: Any,
98
                                 name: str):
99
        """Normalize samples individually to unit norm.
100
        :param dt: the dataframe of features.
101
        :param method_args: other input arguments
102
        (kwargs: norm='l2')
103
        :param name: the name of the feature to be transformed.
104
        """
105
        if name in method_args[name] and "scale" in method_args[name].keys():
106
            scale = method_args[name]["scale"]
107
        else:
108
            scale = preprocessing.Normalizer(**self.__kwargs)
109
            method_args[name] = {"scale": scale}
110
111
        arr = scale.fit_transform(dt[name])
112
        dt[name] = scale.transform(arr)
113
114
    def transform_kernel_centerer_arr(self,
115
                                      dt: PandasDataFrame,
116
                                      method_args: Any,
117
                                      name: str):
118
        """Center a kernel matrix
119
        :param dt: the dataframe of features.
120
        :param method_args: other input arguments
121
        (it is a placeholder no argument is available).
122
        :param name: the name of the feature to be transformed.
123
        """
124
        if name in method_args[name] and "scale" in method_args[name].keys():
125
            scale = method_args[name]["scale"]
126
        else:
127
            scale = preprocessing.KernelCenterer()
128
            method_args[name] = {"scale": scale}
129
130
        arr = scale.fit_transform(dt[name])
131
        dt[name] = scale.transform(arr)
132
133
    def transform_yeo_johnson_arr(self,
134
                                  dt: PandasDataFrame,
135
                                  method_args: Any,
136
                                  name: str):
137
        """Apply the Ye-Johnson transformation.
138
        :param dt: the dataframe of features.
139
        :param method_args: other input arguments
140
        (kwargs: lmbda=-0.5, derivative=0, epsilon=np.finfo(np.float).eps, inverse=False).
141
        :param name: the name of the feature to be transformed.
142
        """
143
        method_args[name] = None
144
        yeo_johnson = YeoJohnson()
145
        dt[name] = yeo_johnson.fit(dt[name], **self.__kwargs)
146
147
    def transform_box_cox_arr(self,
148
                              dt: PandasDataFrame,
149
                              method_args: Any,
150
                              name: str):
151
        """Apply the Box-Cox transformation.
152
        :param dt: the dataframe of features.
153
        :param method_args: other input arguments
154
        (kwargs: lmbda=None, alpha=None).
155
        :param name: the name of the feature to be transformed.
156
        """
157
        if name in method_args[name] and "scale" in method_args[name].keys():
158
            scale = method_args[name]["scale"]
159
        else:
160
            scale, _ = stats.boxcox(dt[name], **self.__kwargs)
161
            method_args[name] = {"scale": scale}
162
163
        arr = scale.fit_transform(dt[name])
164
        dt[name] = scale.transform(arr)