NLP_CRT / Git / [8d2107] /mix_of

Models:
philipB/
NLP_CRT
Downloads: 1
[8d2107]: / mix_of_exp.py
History
Download this file
206 lines (155 with data), 6.7 kB

import sklearn as skl
import numpy as np
import random
from utils import plot_predictions
from sklearn.base import BaseEstimator, ClassifierMixin
from multiprocessing import Pool, Process
#from main import plot_predictions

class MixtureOfExperts(BaseEstimator, ClassifierMixin):

	def __init__(self, experts, gate, max_iter = 50):

		self.experts = experts
		self.gate = gate
		self.max_iter = max_iter

	def fit(self, X, y):

		show_plots = False

		self.num_experts_ = len(self.experts)
		self.num_classes_ = y.shape[1]
		self.__initialize(X, y)
		obj_vals = []
		while len(obj_vals) <= 1 or (abs(obj_vals[-2] - obj_vals[-1]) > 1e-4 and len(obj_vals) < self.max_iter):
			expert_weights = self.__E_step(X, y)
			obj_val = self.__M_step(X, y, expert_weights)
			obj_vals += [obj_val]
			print obj_val
			if show_plots:
				plot_predictions(X, y, self.gate, "Gate predictions")
				for i in range(self.num_experts_):
					plot_predictions(X, y, self.experts[i], "Expert #" + str(i) + " predictions")
				plot_predictions(X, y, self)
		# print obj_vals
		return self

	def predict(self, X):

		return np.argmax(self.predict_proba(X), axis = 1)

	def score(self, X, y, sample_weight = None):

		"""
		Description: evaluates log-likelihood of the data, sum [ weights * log ( sum g_j(i) * p(y_i | x_i, j) )]
		input:	X - data matrix
				y - label matrix
				sample_weight - vector of the importance of each sample in [1, n]
		output: log-likelihood of data
		"""

		weighted_expert_accuracy = self.__weighted_expert_accuracy(X, y)
		expert_weights = self.__get_expert_weights(weighted_expert_accuracy)
		log_prob = np.multiply(np.log(np.clip(weighted_expert_accuracy, 1e-18, 100)), expert_weights)
		if sample_weight != None:
			log_prob = np.multiply(log_prob, sample_weight)

		return np.sum(log_prob)

		# predictions = self.predict_proba(X)
		# log_prob = np.log(predictions)
		# entropy = np.multiply(y, predictions)
		# return np.sum(entropy)


	def predict_proba(self, X):

		"""
		description: returns the probability that X belongs in each class
		input: X - data matrix
		output: N x K probability matrix, so sum( * , axis = 1) = 1
		"""
		expert_predictions = self.__predict_experts(X)
		gate_proba = self.gate.predict_proba(X)
		gate_proba_big = np.empty((X.shape[0], self.num_classes_, self.num_experts_))
		for k in range(self.num_classes_):
			gate_proba_big[:, k, :] = gate_proba

		gated_expert_accruacy = np.multiply(expert_predictions, gate_proba_big)
		return np.sum(gated_expert_accruacy.reshape(X.shape[0], self.num_classes_, self.num_experts_), axis = 2)

	def __predict_experts(self, X):

		"""
		description: finds the predicted probability for each point, of each class, for each expert
		input: input matrix X
		output: N X K X M matrix where sum( * , axis = 2) = 1
		"""
		predictions = np.zeros((X.shape[0], self.num_classes_, self.num_experts_))
		#predictions = np.empty((X.shape[0], self.num_classes_, self.num_experts_))

		gate_proba = self.gate.predict_proba(X)
		for expert_index in range(self.num_experts_):
			expert = self.experts[expert_index]
			predictions[:, : , expert_index] = expert.predict_proba(X)  ####CHANGE THIS
			#expert_predictions = expert.predict(X)
			#for i in range(X.shape[0]):
				#predictions[i, expert_predictions[i], expert_index] =  1. ####CHANGE THIS
		return predictions

	def __initialize(self, X, y):

		"""
		description: initializes experts and gate by using random initial values and partitions of the data
		input: 	X - data matrix
				y - label matrix
		output: None
		"""

		for expert in self.experts:
			idx = np.array(random.sample(range(X.shape[0]), int(X.shape[0]* (1. - 1/ self.num_experts_))))
			expert.fit(X[idx], y[idx])

		random_init = np.random.rand(X.shape[0], self.num_experts_)

		self.gate.fit(X, random_init)

	def __weighted_expert_accuracy(self, X, y):

		"""
		description: returns matrix A_ij = g_j (x_i) * P(y_i | x_i, j)
		input:	X - input matrix
				y - output matrix
		output: gates expert predictions in N x M matrix as described above
		"""
		expert_predictions = self.__predict_experts(X)
		expert_accuracy = np.multiply(expert_predictions, y[:, :, np.newaxis]) 
		#expert_accuracy = expert_predictions
		#gap = 0
		gate_proba = self.gate.predict_proba(X)
		gate_proba_big = np.empty((X.shape[0], self.num_classes_, self.num_experts_))
		for k in range(self.num_classes_):
			gate_proba_big[:, k, :] = gate_proba

		gated_expert_accruacy = np.multiply(expert_accuracy, gate_proba_big)
		norm_weights = gated_expert_accruacy.reshape(X.shape[0], self.num_classes_, self.num_experts_)
		

		#gated_expert_accruacy = expert_accuracy
		return np.sum(norm_weights, axis = 1)


	def __get_expert_weights(self, weighted_expert_accuracy):
		return np.divide(weighted_expert_accuracy, np.sum(weighted_expert_accuracy, axis = 1)[:, np.newaxis])

	def __E_step(self, X, y):

		"""
		description: finds the contribution of each expert to final prediction
		input:	X - data matrix
				y - label matrix
		output: N x M matrix of feature weights for each point for each expert
		"""

		weighted_expert_accuracy = self.__weighted_expert_accuracy(X, y)
		feature_weights = self.__get_expert_weights(weighted_expert_accuracy)
		
		return feature_weights


	def __M_step(self, X, y, expert_weights):

		"""
		description: fits experts and gate according to weights and returns new obj function value
		input:  X - input matrix
				y - output matrix
				expert_weights - weights obtained from E-step
		output: new obj function value
		"""


		#processes = [Process(target = self.gate.fit, args = (X, expert_weights))]
		self.gate.fit(X, expert_weights)
		#for num in range(10):
        #Process(target=f, args=(lock, num)).start()
        
        #expert_weights = self.__E_step(X, y)
        
		for expert_index in range(self.num_experts_):
			y_expert = np.empty(X.shape[0], )
			fw = expert_weights[:, expert_index]
			#fw = np.array(feature_weights[:, expert_index].transpose().tolist()[0])
			#processes += [Process(target = self.experts[expert_index].fit, args = (X, y, fw))]
			self.experts[expert_index].fit(X, y, fw)

		# for p in processes:
		# 	p.start()
		# for p in processes:
		# 	p.join()
		# print processes

		return self.score(X, y)		


	def get_params(self, deep = True):
		return {'experts' : self.experts, 
				'gate' : self.gate,
				'max_iter' : self.max_iter}

	def set_params(self, **parameters):
	    for parameter, value in parameters.items():
	        self.setattr(parameter, value)
	    return self