Switch to unified view

a b/ensembling/WeightedMean.py
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Sat Aug 15 14:12:12 2015.
4
5
@author: rc, alex
6
"""
7
import numpy as np
8
from collections import OrderedDict
9
from sklearn.base import BaseEstimator, ClassifierMixin
10
from sklearn.metrics import roc_auc_score
11
from hyperopt import fmin, tpe, hp
12
13
from progressbar import Bar, ETA, Percentage, ProgressBar, RotatingMarker
14
15
16
class WeightedMeanClassifier(BaseEstimator, ClassifierMixin):
17
    
18
    """Weigted mean classifier with AUC optimization."""
19
    
20
    def __init__(self, ensemble, step=0.025, max_evals=100, mean='arithmetic', 
21
                 verbose=True):
22
        """Init."""
23
        self.ensemble = ensemble
24
        self.step = step
25
        self.max_evals = max_evals
26
        self.mean = mean
27
        self.count = -1
28
        self.verbose = verbose
29
        
30
        self.param_space = OrderedDict()
31
        for model in ensemble:
32
            self.param_space[model] = hp.quniform(model, 0, 3, self.step)
33
            
34
        # input data are arranged in a particular order, whereas hyperopt uses 
35
        # unordered lists when optimizing. The model has to keep track 
36
        # of the initial order so that correct weights are applied to columns
37
        self.sorting = dict()
38
        for i, m in enumerate(self.ensemble):
39
            self.sorting[m] = i
40
    
41
    def fit(self, X, y):
42
        """Fit."""
43
        self.best_params = None
44
        if self.mean != 'simple':
45
            if self.verbose:
46
                widgets = ['Training : ', Percentage(), ' ', Bar(marker=RotatingMarker()),
47
                   ' ', ETA(), ' ']
48
                self.pbar = ProgressBar(widgets=widgets, maxval=(self.max_evals * len(self.param_space)))
49
                self.pbar.start()
50
            
51
            objective = lambda w: -np.mean([roc_auc_score(y[:, col],
52
                                            self.calcMean(X[:, col::6], w, training=True))
53
                                            for col in range(6)])
54
                                            
55
            self.best_params = fmin(objective, self.param_space, algo=tpe.suggest,
56
                                    max_evals=self.max_evals)
57
            
58
            if self.verbose:
59
                print(self.best_params)
60
        else:
61
            self.best_params = None
62
    
63
    def predict_proba(self, X):
64
        """Get predictions."""
65
        return np.c_[[self.calcMean(X[:, col::6], self.best_params)
66
                      for col in range(6)]].transpose()
67
    
68
    def calcMean(self, X, w, training = False):
69
        """Calculate Mean according to weights."""
70
        self.count += 1
71
        if self.verbose and self.count <= (self.max_evals * len(self.param_space)) and not self.count%10 and training:
72
            self.pbar.update(self.count)
73
        
74
        if self.mean == 'simple':
75
            return np.sum(X, axis=1)/X.shape[1]
76
        else:
77
            w = [w[k] for k in sorted(self.sorting, key=self.sorting.get)]
78
            if self.mean == 'arithmetic':
79
                return np.sum(X * w, axis=1)/np.sum(w)
80
            elif self.mean == 'geometric':
81
                return np.exp(np.sum(np.log(X) * w, axis=1)/np.sum(w))
82
            elif self.mean == 'power':
83
                return 1/(1+np.exp(-np.sum(X ** w, axis=1)))
84
            else:
85
                print 'Mean should be either "simple", "arithmetic", "geometric" or "power"'