import sklearn as skl
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.cross_validation import train_test_split
import math
class NeuralNetwork(BaseEstimator, ClassifierMixin):
def __init__(self, layers, obj_fun, regularization = 0., init_size = 1e-1, include_offset = True,
restarts = 10, step_size = 1e-1, learning_schedule = "bold driver", max_iter = 30000, criterion = 1e-6):
self.layers = layers
self.obj_fun = obj_fun
self.regularization = regularization
self.init_size = init_size
self.include_offset = include_offset
self.restarts = restarts
self.step_size = step_size
self.learning_schedule = learning_schedule
self.max_iter = max_iter
self.criterion = criterion
#layers: list of (size, transform_function_1, transform_function_2...) tuples
def fit(self, X_all, y_all, sample_weight = None, test_split = 1., val_split = .8):
show_plots = False
if test_split < 1:
if type(sample_weight) == type(None):
X, X_test, y, y_test = train_test_split(X_all, y_all, train_size = test_split)#, stratify = np.argmax(y_all, axis = 1))
sample_weight_test = None
else:
X, X_test, y, y_test, sample_weight, sample_weight_test = train_test_split(X_all, y_all, sample_weight, train_size = test_split)#, stratify = np.argmax(y_all, axis = 1))
else:
X, y, sample_weight = (X_all, y_all, sample_weight)
self.input_dim_ = X.shape[1]
self.output_dim_ = y.shape[1]
self.num_layers_ = len(self.layers)
self.layers[-1] = tuple([self.output_dim_] + list(self.layers[-1][1:]))
opt_weights = None
opt_value = -10e10
if show_plots:
plt.figure()
while opt_weights == None:
for i in range(self.restarts):
self.__init_weights()
obj_vals = []
val_scores = []
if val_split < 1:
if type(sample_weight) == type(None):
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size = val_split)#, stratify = np.argmax(y, axis = 1))
sample_weight_train = None
sample_weight_val = None
else:
X_train, X_val, y_train, y_val, sample_weight_train, sample_weight_val = train_test_split(X, y, sample_weight, train_size = val_split)#, stratify = np.argmax(y, axis = 1))
else:
X_train, y_train, sample_weight_train = (X, y, sample_weight)
best_val_value = -1e100
best_val_weights = None
#print "Weights: ", [x.shape for x in self.weights_]
step_size = self.step_size
while len(obj_vals) <= 1 or (abs(obj_vals[-1] - obj_vals[-2]) > self.criterion and len(obj_vals) <= self.max_iter):
#forward prop
inputs, weighted_inputs, activations = self.__forward(X_train)
obj_val = self.__eval_obj_fun(y_train, activations[-1], regularization = self.regularization)
obj_vals += [obj_val]
#backward prop
gradients = self.__backward(inputs, weighted_inputs, activations, y_train, sample_weight_train)
if len(obj_vals) == 1:
step_size = self.__get_step_size(len(obj_vals), step_size, -1e100, obj_vals[-1])
else:
step_size = self.__get_step_size(len(obj_vals), step_size, obj_vals[-2], obj_vals[-1])
self.update_weights(gradients, step_size)
if val_split < 1: #early termination with validation holdout
val_score = self.score(X_val, y_val, sample_weight_val)
val_scores += [val_score]
if val_score > best_val_value:
best_val_weights = [w.copy() for w in self.weights_]
best_val_value = val_score
else:
best_val_value = obj_vals[-1]
if len(obj_vals) >= self.max_iter:
print "OVERFLOW"
#print i, "Obj val: ", obj_vals[-1]
if val_split < 1:
self.weights_ = best_val_weights
if test_split < 1:
test_score = self.score(X_test, y_test, sample_weight_test)
else:
test_score = best_val_value
if opt_value < test_score:
opt_value = test_score
opt_weights = self.weights_
if show_plots:
#print test_score
plt.plot([math.log10(-1.*x) for x in obj_vals], color = 'green', label = "log Train")
plt.plot([math.log10(-1.*x) for x in val_scores], color = 'red', label = "log Val")
#print opt_weights
if show_plots:
#plt.legend()
plt.show()
self.weights_ = opt_weights
return self
def predict(self, X):
return np.argmax(self.predict_proba(X), axis = 1)
def predict_proba(self, X):
return self.__forward(X)[2][-1]
#the dimension of the weights is (dim_before +1, dim_after, ), with +1 -> 0 if no offset
def __init_weights(self):
self.weights_ = []
offset_size = int(self.include_offset)
for layer_index in range(self.num_layers_):
if layer_index == 0:
before_dim = self.input_dim_
else:
before_dim = self.layers[layer_index - 1][0]
after_dim = self.layers[layer_index][0]
self.weights_ += [(np.random.rand(before_dim + offset_size, after_dim) - .5)*self.init_size]
def __forward(self, X):
inputs = []
activations = []
weighted_inputs = []
current_input = self.__add_offset(X)
if self.weights_ == None:
print self.weights_
for layer_index in range(self.num_layers_):
weighted_input = np.dot(current_input, self.weights_[layer_index])
current_activation = self.__transform_function(weighted_input, layer_index)
inputs += [current_input]
weighted_inputs += [weighted_input]
activations += [current_activation]
current_input = self.__add_offset(current_activation)
return (inputs, weighted_inputs, activations)
def __backward(self, inputs, weighted_inputs, activations, y, sample_weight = None, err_gradient = None):
gradients = []
if type(err_gradient) == type(None):
overall_gradient = self.__obj_fun_gradient(y, activations[-1], sample_weight) #N x categories
else:
overall_gradient = err_gradient
#NEED REMOVE OFFSET SOMEWHERE???
for layer_index in range(self.num_layers_ -1, -1, -1):
activations_gradient = self.__gradient_function(weighted_inputs[layer_index], activations[layer_index], layer_index)
layer_gradient = np.multiply(overall_gradient, activations_gradient)
gradients = [np.dot(inputs[layer_index].transpose(), layer_gradient) - self.regularization * self.weights_[layer_index]] + gradients
overall_gradient = self.__remove_offset(np.dot(overall_gradient, self.weights_[layer_index].transpose()))
return gradients
def update_weights(self, gradients, step_size):
for layer_index in range(self.num_layers_):
self.weights_[layer_index] += step_size * gradients[layer_index]
def score(self, X, y, sample_weight = None):
y_hat = self.predict_proba(X)
return self.__eval_obj_fun(y, y_hat, sample_weight)
def accuracy(self, X, y):
y_hat = self.predict(X)
return 1. * np.sum(int(np.argmax(y, axis = 1) == np.argmax(y_hat, axis = 1))) / y.shape[0]
def __eval_obj_fun(self, y, y_hat, sample_weight = None, regularization = 0.):
if self.obj_fun in ['maxent', 'logistic']:
eps = 1e-10
y_hat = np.clip(y_hat, eps, 1. - eps)
err_matrix = np.multiply(y, np.log(y_hat)) + np.multiply(1. - y, np.log(1. - y_hat))
elif self.obj_fun in ['lsq', 'least squares']:
err_matrix = -1. * np.square(y - y_hat)
elif self.obj_fun in ['mll']:
eps = 1e-10
y_hat = np.clip(y_hat, eps, 1.)
err_matrix = np.multiply(y, np.log(y_hat))
else:
raise ValueError("Objective function '" + self.obj_fun + "' is not supported.")
if type(sample_weight) != type(None):
err_matrix = np.dot(np.diag(sample_weight), err_matrix)
return np.sum(err_matrix) - regularization * np.sum([np.sum(np.square(self.__remove_offset(x))) for x in self.weights_])
def __obj_fun_gradient(self, y, y_hat, sample_weight = None):
if self.obj_fun in ['maxent', 'logistic']:
eps = 1e-10
y_hat = np.clip(y_hat, eps, 1. - eps)
grad = np.divide(y, y_hat) - np.divide(1. - y, 1. - y_hat)
if type(sample_weight) != type(None):
grad = np.dot(np.diag(sample_weight), grad)
return grad
elif self.obj_fun in ['lsq', 'least squares']:
grad = y_hat - y
if type(sample_weight) != type(None):
grad = np.multiply(sample_weight, grad)
return grad
elif self.obj_fun in ['mll']:
eps = 1e-10
y_hat = np.clip(y_hat, eps, 1. - eps)
grad = np.divide(y, y_hat)
if type(sample_weight) != type(None):
grad = np.dot(np.diag(sample_weight), grad)
return grad
else:
raise ValueError("Objective function '" + self.obj_fun + "' is not supported.")
def __transform_function(self, X, layer_index):
funct = self.layers[layer_index][1]
if funct in ['logistic']: #1/ (1 + exp(x))
X_transformed = 1. / (1. + np.exp(np.clip(-1.*X, -1e100, 50)))
elif funct in ['tanh']: #tanh(x)
X_transformed = np.tanh(X)
elif funct in ['rectifier', 'hinge']: #max(0, x)
X_transformed = np.clip(X, 0, 1e100)
elif funct in ['softmax', 'multinomial']:
exp_X = np.exp(np.clip(X, -1e100, 50))
X_transformed = np.divide(exp_X, np.sum(exp_X, axis = 1)[:, np.newaxis])
elif funct in ['linear', 'none', None]:
X_transformed = X
else:
raise ValueError("Transform function '" + funct + "' is not supported.")
return X_transformed
def __gradient_function(self, X_weighted, Z, layer_index):
funct = self.layers[layer_index][1]
if funct in ['logistic']: #1/ (1 + exp(-x))
Z_grad = np.multiply(np.square(Z), np.exp(np.clip(-1.*X_weighted, -1e100, 50)))
elif funct in ['tanh']: #tanh(x)
Z_grad = 1. - np.square(np.tanh(X_weighted))
elif funct in ['rectifier', 'hinge']: #max(0, x)
Z_grad = Z.copy()
Z_grad[np.nonzero(Z_grad)] = 1.
elif funct in ['softmax', 'multinomial']:
sig = 1. / (1 + np.exp(np.clip(-1. * X_weighted, -1e100, 50)))
Z_grad = np.multiply(Z, 1 - Z)
elif funct in ['linear', 'none', None]:
Z_grad = np.ones(Z.shape) * 1.
else:
raise ValueError("Transform function '" + funct + "' is not supported.")
return Z_grad
def __add_offset(self, X):
if self.include_offset:
results = np.empty((X.shape[0], X.shape[1] + 1))
results[:, 0] = 1
results[:, 1:] = X
return results
else:
return X.copy()
def __remove_offset(self, X):
if self.include_offset:
return X[:, 1:]
else:
return X.copy()
def __get_step_size(self, t = None, last_step = None, last_obj_val = None, obj_val = None):
if self.learning_schedule == 'fixed':
return self.step_size/(t + 1.)**.5
elif self.learning_schedule == 'bold driver':
growth_rate = 1.02
if last_obj_val < obj_val:
return last_step * growth_rate
else:
return last_step / growth_rate *.5
def get_params(self, deep = True):
return {'layers' : self.layers,
'obj_fun' : self.obj_fun,
'regularization' : self.regularization,
'init_size' : self.init_size,
'include_offset' : self.include_offset,
'restarts' : self.restarts,
'step_size' : self.step_size,
'learning_schedule' : self.learning_schedule,
'max_iter' : self.max_iter,
'criterion' : self.criterion}
def set_params(self, **parameters):
for parameter, value in parameters.items():
self.setattr(parameter, value)
return self
class NeuralLogistic(NeuralNetwork):
def __init__(self, **kwargs):
layers = [(None, 'softmax')]
obj_fun = 'maxent'
NeuralNetwork.__init__(self, layers, obj_fun, restarts = 10, step_size = 1e-1)