--- a +++ b/mix_of_exp.py @@ -0,0 +1,205 @@ +import sklearn as skl +import numpy as np +import random +from utils import plot_predictions +from sklearn.base import BaseEstimator, ClassifierMixin +from multiprocessing import Pool, Process +#from main import plot_predictions + +class MixtureOfExperts(BaseEstimator, ClassifierMixin): + + def __init__(self, experts, gate, max_iter = 50): + + self.experts = experts + self.gate = gate + self.max_iter = max_iter + + def fit(self, X, y): + + show_plots = False + + self.num_experts_ = len(self.experts) + self.num_classes_ = y.shape[1] + self.__initialize(X, y) + obj_vals = [] + while len(obj_vals) <= 1 or (abs(obj_vals[-2] - obj_vals[-1]) > 1e-4 and len(obj_vals) < self.max_iter): + expert_weights = self.__E_step(X, y) + obj_val = self.__M_step(X, y, expert_weights) + obj_vals += [obj_val] + print obj_val + if show_plots: + plot_predictions(X, y, self.gate, "Gate predictions") + for i in range(self.num_experts_): + plot_predictions(X, y, self.experts[i], "Expert #" + str(i) + " predictions") + plot_predictions(X, y, self) + # print obj_vals + return self + + def predict(self, X): + + return np.argmax(self.predict_proba(X), axis = 1) + + def score(self, X, y, sample_weight = None): + + """ + Description: evaluates log-likelihood of the data, sum [ weights * log ( sum g_j(i) * p(y_i | x_i, j) )] + input: X - data matrix + y - label matrix + sample_weight - vector of the importance of each sample in [1, n] + output: log-likelihood of data + """ + + weighted_expert_accuracy = self.__weighted_expert_accuracy(X, y) + expert_weights = self.__get_expert_weights(weighted_expert_accuracy) + log_prob = np.multiply(np.log(np.clip(weighted_expert_accuracy, 1e-18, 100)), expert_weights) + if sample_weight != None: + log_prob = np.multiply(log_prob, sample_weight) + + return np.sum(log_prob) + + # predictions = self.predict_proba(X) + # log_prob = np.log(predictions) + # entropy = np.multiply(y, predictions) + # return np.sum(entropy) + + + def predict_proba(self, X): + + """ + description: returns the probability that X belongs in each class + input: X - data matrix + output: N x K probability matrix, so sum( * , axis = 1) = 1 + """ + expert_predictions = self.__predict_experts(X) + gate_proba = self.gate.predict_proba(X) + gate_proba_big = np.empty((X.shape[0], self.num_classes_, self.num_experts_)) + for k in range(self.num_classes_): + gate_proba_big[:, k, :] = gate_proba + + gated_expert_accruacy = np.multiply(expert_predictions, gate_proba_big) + return np.sum(gated_expert_accruacy.reshape(X.shape[0], self.num_classes_, self.num_experts_), axis = 2) + + def __predict_experts(self, X): + + """ + description: finds the predicted probability for each point, of each class, for each expert + input: input matrix X + output: N X K X M matrix where sum( * , axis = 2) = 1 + """ + predictions = np.zeros((X.shape[0], self.num_classes_, self.num_experts_)) + #predictions = np.empty((X.shape[0], self.num_classes_, self.num_experts_)) + + gate_proba = self.gate.predict_proba(X) + for expert_index in range(self.num_experts_): + expert = self.experts[expert_index] + predictions[:, : , expert_index] = expert.predict_proba(X) ####CHANGE THIS + #expert_predictions = expert.predict(X) + #for i in range(X.shape[0]): + #predictions[i, expert_predictions[i], expert_index] = 1. ####CHANGE THIS + return predictions + + def __initialize(self, X, y): + + """ + description: initializes experts and gate by using random initial values and partitions of the data + input: X - data matrix + y - label matrix + output: None + """ + + for expert in self.experts: + idx = np.array(random.sample(range(X.shape[0]), int(X.shape[0]* (1. - 1/ self.num_experts_)))) + expert.fit(X[idx], y[idx]) + + random_init = np.random.rand(X.shape[0], self.num_experts_) + + self.gate.fit(X, random_init) + + def __weighted_expert_accuracy(self, X, y): + + """ + description: returns matrix A_ij = g_j (x_i) * P(y_i | x_i, j) + input: X - input matrix + y - output matrix + output: gates expert predictions in N x M matrix as described above + """ + expert_predictions = self.__predict_experts(X) + expert_accuracy = np.multiply(expert_predictions, y[:, :, np.newaxis]) + #expert_accuracy = expert_predictions + #gap = 0 + gate_proba = self.gate.predict_proba(X) + gate_proba_big = np.empty((X.shape[0], self.num_classes_, self.num_experts_)) + for k in range(self.num_classes_): + gate_proba_big[:, k, :] = gate_proba + + gated_expert_accruacy = np.multiply(expert_accuracy, gate_proba_big) + norm_weights = gated_expert_accruacy.reshape(X.shape[0], self.num_classes_, self.num_experts_) + + + #gated_expert_accruacy = expert_accuracy + return np.sum(norm_weights, axis = 1) + + + def __get_expert_weights(self, weighted_expert_accuracy): + return np.divide(weighted_expert_accuracy, np.sum(weighted_expert_accuracy, axis = 1)[:, np.newaxis]) + + def __E_step(self, X, y): + + """ + description: finds the contribution of each expert to final prediction + input: X - data matrix + y - label matrix + output: N x M matrix of feature weights for each point for each expert + """ + + weighted_expert_accuracy = self.__weighted_expert_accuracy(X, y) + feature_weights = self.__get_expert_weights(weighted_expert_accuracy) + + return feature_weights + + + def __M_step(self, X, y, expert_weights): + + """ + description: fits experts and gate according to weights and returns new obj function value + input: X - input matrix + y - output matrix + expert_weights - weights obtained from E-step + output: new obj function value + """ + + + #processes = [Process(target = self.gate.fit, args = (X, expert_weights))] + self.gate.fit(X, expert_weights) + #for num in range(10): + #Process(target=f, args=(lock, num)).start() + + #expert_weights = self.__E_step(X, y) + + for expert_index in range(self.num_experts_): + y_expert = np.empty(X.shape[0], ) + fw = expert_weights[:, expert_index] + #fw = np.array(feature_weights[:, expert_index].transpose().tolist()[0]) + #processes += [Process(target = self.experts[expert_index].fit, args = (X, y, fw))] + self.experts[expert_index].fit(X, y, fw) + + # for p in processes: + # p.start() + # for p in processes: + # p.join() + # print processes + + return self.score(X, y) + + + def get_params(self, deep = True): + return {'experts' : self.experts, + 'gate' : self.gate, + 'max_iter' : self.max_iter} + + def set_params(self, **parameters): + for parameter, value in parameters.items(): + self.setattr(parameter, value) + return self + +