HFmeRisk / Git / [d2c46b] /model/DeepFM.py

Models:
RaymondKing/
HFmeRisk
Downloads: 1
[d2c46b]: / model / DeepFM.py
History
Download this file
460 lines (368 with data), 21.3 kB

import numpy as np
import tensorflow as tf
import pandas as pd
from time import time
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import roc_auc_score
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from yellowfin import YFOptimizer
from numpy.random import seed
seed(2020)
from tensorflow import set_random_seed
set_random_seed(2020) 

class DeepFM(BaseEstimator, TransformerMixin):

    def __init__(self, 
                 feature_size, 
                 field_size,
                 embedding_size=32, 
                 dropout_fm=[1.0, 1.0],
                 deep_layers=[32, 32], 
                 dropout_deep=[1.0,1.0,1.0,],
                 deep_layer_activation=tf.nn.relu,
                 epoch=10, 
                 batch_size=50,
                 learning_rate=0.0001, 
                 optimizer="adam",
                 batch_norm=0.6, 
                 batch_norm_decay=0.90,
                 verbose=False, 
                 random_seed=2016,
                 use_fm=True, 
                 use_deep=True,
                 loss_type="logloss", 
                 eval_metric=roc_auc_score,
                 l2_reg=0.001, 
                 greater_is_better=True):
        assert (use_fm or use_deep)
        assert loss_type in ["logloss", "mse"], "loss_type can be either 'logloss' for classification task or 'mse' for regression task"

        self.feature_size = feature_size
        self.field_size = field_size
        self.embedding_size = embedding_size

        self.dropout_fm = dropout_fm
        self.deep_layers = deep_layers
        self.dropout_dep = dropout_deep
        self.deep_layers_activation = deep_layer_activation
        self.use_fm = use_fm
        self.use_deep = use_deep
        self.l2_reg = l2_reg

        self.epoch = epoch
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer

        self.batch_norm = batch_norm
        self.batch_norm_decay = batch_norm_decay

        self.verbose = verbose
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric
        self.greater_is_better = greater_is_better
        self.train_result,self.valid_result,self.test_result  = [],[],[]

        self._init_graph()

    def _init_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            tf.set_random_seed(self.random_seed)
            #input data
            with tf.name_scope('inputs'):

                self.feat_index = tf.placeholder(tf.int32,shape=[None,None],name='feat_index')
                self.feat_value = tf.placeholder(tf.float32,shape=[None,None], name='feat_value')
                self.label = tf.placeholder(tf.float32,shape=[None,1],name='label')
            self.dropout_keep_fm = tf.placeholder(tf.float32,shape=[None],name='dropout_keep_fm')
            self.dropout_keep_deep = tf.placeholder(tf.float32,shape=[None],name='dropout_deep_deep')
            self.train_phase = tf.placeholder(tf.bool,name='train_phase')
            
            #weight
            self.weights = self._initialize_weights()

            # model
            self.embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'],self.feat_index) # N * F * K
            feat_value = tf.reshape(self.feat_value,shape=[-1,self.field_size,1])
            self.embeddings = tf.multiply(self.embeddings,feat_value)


            # first order term
            self.y_first_order = tf.nn.embedding_lookup(self.weights['feature_bias'],self.feat_index)
            self.y_first_order = tf.reduce_sum(tf.multiply(self.y_first_order,feat_value),2)
            self.y_first_order = tf.nn.dropout(self.y_first_order,self.dropout_keep_fm[0])

            # second order term
            # sum-square-part
            self.summed_features_emb = tf.reduce_sum(self.embeddings,1) # None * k
            self.summed_features_emb_square = tf.square(self.summed_features_emb) # None * K

            # squre-sum-part
            self.squared_features_emb = tf.square(self.embeddings)
            self.squared_sum_features_emb = tf.reduce_sum(self.squared_features_emb, 1)  # None * K

            #second order
            self.y_second_order = 0.5 * tf.subtract(self.summed_features_emb_square,self.squared_sum_features_emb)
            self.y_second_order = tf.nn.dropout(self.y_second_order,self.dropout_keep_fm[1])


            # Deep component
            self.y_deep = tf.reshape(self.embeddings,shape=[-1,self.field_size * self.embedding_size])
            self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[0])
            
            
            #deep_layers_activation
            for i in range(0,len(self.deep_layers)):
                self.y_deep = tf.add(tf.matmul(self.y_deep,self.weights["layer_%d" %i]), self.weights["bias_%d"%i])
                if self.batch_norm:
                    self.y_deep = self.batch_norm_layer(self.y_deep, train_phase=self.train_phase, scope_bn="bn_%d" %i) # None * layer[i] * 1
                    #print("norm yes")
                self.y_deep = self.deep_layers_activation(self.y_deep)
                self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[i+1])


            #----DeepFM---------
            if self.use_fm and self.use_deep:
                concat_input = tf.concat([self.y_first_order, self.y_second_order, self.y_deep], axis=1)
            elif self.use_fm:
                concat_input = tf.concat([self.y_first_order, self.y_second_order], axis=1)
            elif self.use_deep:
                concat_input = self.y_deep

            self.out = tf.add(tf.matmul(concat_input,self.weights['concat_projection']),self.weights['concat_bias'])

            # loss
            with tf.name_scope('loss'):
                if self.loss_type == "logloss":
                    self.out = tf.nn.sigmoid(self.out)
                    self.loss = tf.losses.log_loss(self.label, self.out)
                elif self.loss_type == "mse":
                    self.loss = tf.nn.l2_loss(tf.subtract(self.label, self.out))
                # l2 regularization on weights
                if self.l2_reg > 0:
                    self.loss += tf.contrib.layers.l2_regularizer(
                        self.l2_reg)(self.weights["concat_projection"])
                    if self.use_deep:
                        for i in range(len(self.deep_layers)):
                            self.loss += tf.contrib.layers.l2_regularizer(
                                self.l2_reg)(self.weights["layer_%d" % i])
                tf.summary.scalar('loss', self.loss)
                            
            with tf.name_scope('train_optimizer'):
                if self.optimizer_type == "adam":
                    self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999,epsilon=1e-8).minimize(self.loss)
                elif self.optimizer_type == "adagrad":
                    self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss)
                elif self.optimizer_type == "gd":
                    self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
                elif self.optimizer_type == "momentum":
                    self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss)
                elif self.optimizer_type == "yellowfin":
                    self.optimizer = YFOptimizer(learning_rate=self.learning_rate, momentum=0.0).minimize(self.loss)


            #init
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.merged = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter("logs/20210506_lasso_xgboost",self.sess.graph)
            self.test_writer = tf.summary.FileWriter("logs/20210506_lasso_xgboost", self.sess.graph)
            self.sess.run(init)

            # number of params
            total_parameters = 0
            for variable in self.weights.values():
                shape = variable.get_shape()
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters
            if self.verbose > 0:
                print("#params: %d" % total_parameters)





    def _initialize_weights(self):
        weights = dict()

        
        with tf.name_scope('layer'):
            
            #embeddings
            with tf.name_scope('feature_embeddings'):
                weights['feature_embeddings'] = tf.Variable(tf.random_normal([self.feature_size,self.embedding_size],0.0,0.01),name='feature_embeddings')
                
                tf.summary.histogram('feature_embeddings', weights['feature_embeddings']) 
            
            with tf.name_scope('_feature_bias'):       
                weights['feature_bias'] = tf.Variable(tf.random_normal([self.feature_size,1],0.0,1.0),name='feature_bias')
                
                tf.summary.histogram('feature_bias', weights['feature_bias']) 


            #deep layers
            num_layer = len(self.deep_layers)
            input_size = self.field_size * self.embedding_size
            glorot = np.sqrt(2.0/(input_size + self.deep_layers[0]))
            
            with tf.name_scope('layer_0'):    
                weights['layer_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,self.deep_layers[0])),dtype=np.float32,name="weights_layer_0")
                tf.summary.histogram('layer_0', weights['layer_0']) 
            with tf.name_scope('bias_0'):
                weights['bias_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[0])),dtype=np.float32,name="weights_bias_0")
                tf.summary.histogram('bias_0', weights['bias_0']) 
    
    
            for i in range(1,num_layer):
                layer_names="layer_%d" % i
                glorot = np.sqrt(2.0 / (self.deep_layers[i - 1] + self.deep_layers[i]))
                with tf.name_scope("layer_%d" % i):
                    weights["layer_%d" % i] = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(self.deep_layers[i - 1], self.deep_layers[i])),name="layer_",dtype=np.float32)  # layers[i-1] * layers[i]
                    tf.summary.histogram(layer_names+'layer' , weights["layer_%d" % i]) 
                with tf.name_scope("bias_%d" % i):
                    weights["bias_%d" % i] = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(1, self.deep_layers[i])),name="bias_",dtype=np.float32)  # 1 * layer[i]
                    tf.summary.histogram(layer_names+'bias' , weights["bias_%d" % i]) 
    
    
            # final concat projection layer
    
            if self.use_fm and self.use_deep:
                input_size = self.field_size + self.embedding_size + self.deep_layers[-1]
            elif self.use_fm:
                input_size = self.field_size + self.embedding_size
            elif self.use_deep:
                input_size = self.deep_layers[-1]
    
            glorot = np.sqrt(2.0/(input_size + 1))
            with tf.name_scope("weights_concat_projection"):
                weights['concat_projection'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,1)),dtype=np.float32,name="concat_projection")
                tf.summary.histogram('concat_projection' , weights['concat_projection']) 
            with tf.name_scope("weights_concat_bias"):
                weights['concat_bias'] = tf.Variable(tf.constant(0.01),dtype=np.float32,name="concat_bias")
                tf.summary.histogram('concat_bias' , weights['concat_bias']) 


        return weights
        
    def batch_norm_layer(self, x, train_phase, scope_bn):
        bn_train = batch_norm(x, decay=self.batch_norm_decay, center=True, scale=True, updates_collections=None,
                              is_training=True, reuse=None, trainable=True, scope=scope_bn)
        bn_inference = batch_norm(x, decay=self.batch_norm_decay, center=True, scale=True, updates_collections=None,
                                  is_training=False, reuse=True, trainable=True, scope=scope_bn)
        z = tf.cond(train_phase, lambda: bn_train, lambda: bn_inference)
        return z


    def get_batch(self,Xi,Xv,y,batch_size,index):
        start = index * batch_size
        end = (index + 1) * batch_size
        end = end if end < len(y) else len(y)
        return Xi[start:end],Xv[start:end],[[y_] for y_ in y[start:end]]

    # shuffle three lists simutaneously
    def shuffle_in_unison_scary(self, a, b, c):
        rng_state = np.random.get_state()
        np.random.shuffle(a)
        np.random.set_state(rng_state)
        np.random.shuffle(b)
        np.random.set_state(rng_state)
        np.random.shuffle(c)


    def evaluate(self, Xi, Xv, y):
        """
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :param y: label of each sample in the dataset
        :return: metric of the evaluation
        """
        y_pred = self.predict(Xi, Xv)
        return self.eval_metric(y, y_pred)#roc

    def predict(self, Xi, Xv):
        """
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :return: predicted probability of each sample
        """
        # dummy y
        dummy_y = [1] * len(Xi)
        batch_index = 0
        Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index)
        y_pred = None
        while len(Xi_batch) > 0:
            num_batch = len(y_batch)
            feed_dict = {self.feat_index: Xi_batch,
                         self.feat_value: Xv_batch,
                         #self.label: y_batch,
                         self.dropout_keep_fm: [1.0] * len(self.dropout_fm),
                         self.dropout_keep_deep: [1.0] * len(self.dropout_dep),#dropout
                         self.train_phase: False}
            
            batch_out = self.sess.run(self.out, feed_dict=feed_dict)

            if batch_index == 0:
                y_pred = np.reshape(batch_out, (num_batch,))
            else:
                y_pred = np.concatenate((y_pred, np.reshape(batch_out, (num_batch,))))

            batch_index += 1
            Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index)

        return y_pred


    def fit_on_batch(self,Xi,Xv,y):
        feed_dict = {self.feat_index:Xi,
                     self.feat_value:Xv,
                     self.label:y,
                     self.dropout_keep_fm:  self.dropout_fm,
                     self.dropout_keep_deep:  self.dropout_dep,
                     self.train_phase:True}

        loss,opt = self.sess.run([self.loss,self.optimizer],feed_dict=feed_dict)
        rs = self.sess.run(self.merged,feed_dict = feed_dict)

        return loss,rs

    def fit(self, Xi_train, Xv_train, y_train,
            Xi_valid=None, Xv_valid=None, y_valid=None,
            Xi_test=None, Xv_test=None,y_test=None,
            early_stopping=True, refit=False):
        """
        :param Xi_train: [[ind1_1, ind1_2, ...], [ind2_1, ind2_2, ...], ..., [indi_1, indi_2, ..., indi_j, ...], ...]
                         indi_j is the feature index of feature field j of sample i in the training set
        :param Xv_train: [[val1_1, val1_2, ...], [val2_1, val2_2, ...], ..., [vali_1, vali_2, ..., vali_j, ...], ...]
                         vali_j is the feature value of feature field j of sample i in the training set
                         vali_j can be either binary (1/0, for binary/categorical features) or float (e.g., 10.24, for numerical features)
        :param y_train: label of each sample in the training set
        :param Xi_valid: list of list of feature indices of each sample in the validation set
        :param Xv_valid: list of list of feature values of each sample in the validation set
        :param y_valid: label of each sample in the validation set
        :param early_stopping: perform early stopping or not
        :param refit: refit the model on the train+valid dataset or not
        :return: None
        """
        loss_batch = []
        has_valid = Xv_valid is not None
        
# =============================================================================
#         Xv_resampled, y_resampled1 = RandomOverSampler(random_state=42).fit_sample(pd.DataFrame(Xv_train), pd.DataFrame(y_train))
#         Xi_resampled, y_resampled2 = RandomOverSampler(random_state=42).fit_sample(pd.DataFrame(Xi_train), pd.DataFrame(y_train))
#         if all(y_resampled1 == y_resampled2) :
#             print(Xv_resampled.shape)
#             print(pd.DataFrame(Xv_train).shape)
#             Xv_train = Xv_resampled.tolist()
#             Xi_train = Xi_resampled.tolist()
#             y_train = y_resampled1.tolist()
# =============================================================================
            
        for epoch in range(self.epoch):
            t1 = time()
            self.shuffle_in_unison_scary(Xi_train, Xv_train, y_train)
            total_batch = int(len(y_train) / self.batch_size)
            
            for i in range(total_batch):
                Xi_batch, Xv_batch, y_batch = self.get_batch(Xi_train, Xv_train, y_train, self.batch_size, i)
                loss,rs = self.fit_on_batch(Xi_batch, Xv_batch, y_batch)
                loss_batch.append(loss)
#                print('loss = %.4f' % loss)
                self.train_writer.add_summary(rs,epoch) 

            # evaluate training and validation datasets
            train_result = self.evaluate(Xi_train, Xv_train, y_train)
            self.train_result.append(train_result)
            if has_valid:
                valid_result = self.evaluate(Xi_valid, Xv_valid, y_valid)
                self.valid_result.append(valid_result)
            
#                test_result = self.evaluate(Xi_test, Xv_test, y_test)
#                self.test_result.append(test_result)
                
            if self.verbose > 0 and epoch % self.verbose == 0:
                if has_valid:
#                    print("[%d] train-result=%.4f, valid-result=%.4f ,test-result=%.4f [%.1f s]"
#                        % (epoch + 1, train_result, valid_result, test_result,time() - t1))
                    continue
                else:
#                    print("[%d] train-result=%.4f [%.1f s]"
#                        % (epoch + 1, train_result, time() - t1))
                    continue
            #early_stopping
            if has_valid and early_stopping and self.training_termination(self.valid_result):
                break

        # fit a few more epoch on train+valid until result reaches the best_train_score
        
        if has_valid and refit:
            if self.greater_is_better:
                best_valid_score = max(self.valid_result)
            else:
                best_valid_score = min(self.valid_result)
            best_epoch = self.valid_result.index(best_valid_score)
            best_train_score = self.train_result[best_epoch]
            Xi_train = Xi_train + Xi_valid
            Xv_train = Xv_train + Xv_valid
            y_train = y_train + y_valid
            for epoch in range(100):
                self.shuffle_in_unison_scary(Xi_train, Xv_train, y_train)
                total_batch = int(len(y_train) / self.batch_size)
                for i in range(total_batch):
                    Xi_batch, Xv_batch, y_batch = self.get_batch(Xi_train, Xv_train, y_train,self.batch_size, i)
                    loss,rs = self.fit_on_batch(Xi_batch, Xv_batch, y_batch)
                    
#                    print('loss2 = %.4f' % loss)
                # check
                train_result = self.evaluate(Xi_train, Xv_train, y_train)
                if abs(train_result - best_train_score) < 0.001 or \
                    (self.greater_is_better and train_result > best_train_score) or \
                    ((not self.greater_is_better) and train_result < best_train_score):
                    break
        return loss_batch


    def training_termination(self, valid_result):
        if len(valid_result) > 5:
            if self.greater_is_better:
                if valid_result[-1] < valid_result[-2] and \
                    valid_result[-2] < valid_result[-3] and \
                    valid_result[-3] < valid_result[-4] and \
                    valid_result[-4] < valid_result[-5]:
                    return True
            else:
                if valid_result[-1] > valid_result[-2] and \
                    valid_result[-2] > valid_result[-3] and \
                    valid_result[-3] > valid_result[-4] and \
                    valid_result[-4] > valid_result[-5]:
                    return True
        return False