|
a |
|
b/diff_sex/DeepFM.py |
|
|
1 |
import numpy as np |
|
|
2 |
import tensorflow as tf |
|
|
3 |
import pandas as pd |
|
|
4 |
from time import time |
|
|
5 |
from sklearn.base import BaseEstimator, TransformerMixin |
|
|
6 |
from sklearn.metrics import roc_auc_score |
|
|
7 |
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm |
|
|
8 |
from imblearn.under_sampling import RandomUnderSampler |
|
|
9 |
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN |
|
|
10 |
from yellowfin import YFOptimizer |
|
|
11 |
from numpy.random import seed |
|
|
12 |
seed(2020) |
|
|
13 |
from tensorflow import set_random_seed |
|
|
14 |
set_random_seed(2020) |
|
|
15 |
|
|
|
16 |
class DeepFM(BaseEstimator, TransformerMixin): |
|
|
17 |
|
|
|
18 |
def __init__(self, |
|
|
19 |
feature_size, |
|
|
20 |
field_size, |
|
|
21 |
embedding_size=32, |
|
|
22 |
dropout_fm=[1.0, 1.0], |
|
|
23 |
deep_layers=[32, 32], |
|
|
24 |
dropout_deep=[1.0,1.0,1.0,], |
|
|
25 |
deep_layer_activation=tf.nn.relu, |
|
|
26 |
epoch=10, |
|
|
27 |
batch_size=50, |
|
|
28 |
learning_rate=0.0001, |
|
|
29 |
optimizer="adam", |
|
|
30 |
batch_norm=0.6, |
|
|
31 |
batch_norm_decay=0.90, |
|
|
32 |
verbose=False, |
|
|
33 |
random_seed=2016, |
|
|
34 |
use_fm=True, |
|
|
35 |
use_deep=True, |
|
|
36 |
loss_type="logloss", |
|
|
37 |
eval_metric=roc_auc_score, |
|
|
38 |
l2_reg=0.001, |
|
|
39 |
greater_is_better=True): |
|
|
40 |
assert (use_fm or use_deep) |
|
|
41 |
assert loss_type in ["logloss", "mse"], "loss_type can be either 'logloss' for classification task or 'mse' for regression task" |
|
|
42 |
|
|
|
43 |
self.feature_size = feature_size |
|
|
44 |
self.field_size = field_size |
|
|
45 |
self.embedding_size = embedding_size |
|
|
46 |
|
|
|
47 |
self.dropout_fm = dropout_fm |
|
|
48 |
self.deep_layers = deep_layers |
|
|
49 |
self.dropout_dep = dropout_deep |
|
|
50 |
self.deep_layers_activation = deep_layer_activation |
|
|
51 |
self.use_fm = use_fm |
|
|
52 |
self.use_deep = use_deep |
|
|
53 |
self.l2_reg = l2_reg |
|
|
54 |
|
|
|
55 |
self.epoch = epoch |
|
|
56 |
self.batch_size = batch_size |
|
|
57 |
self.learning_rate = learning_rate |
|
|
58 |
self.optimizer_type = optimizer |
|
|
59 |
|
|
|
60 |
self.batch_norm = batch_norm |
|
|
61 |
self.batch_norm_decay = batch_norm_decay |
|
|
62 |
|
|
|
63 |
self.verbose = verbose |
|
|
64 |
self.random_seed = random_seed |
|
|
65 |
self.loss_type = loss_type |
|
|
66 |
self.eval_metric = eval_metric |
|
|
67 |
self.greater_is_better = greater_is_better |
|
|
68 |
self.train_result,self.valid_result,self.test_result = [],[],[] |
|
|
69 |
|
|
|
70 |
self._init_graph() |
|
|
71 |
|
|
|
72 |
def _init_graph(self): |
|
|
73 |
self.graph = tf.Graph() |
|
|
74 |
with self.graph.as_default(): |
|
|
75 |
tf.set_random_seed(self.random_seed) |
|
|
76 |
#input data |
|
|
77 |
with tf.name_scope('inputs'): |
|
|
78 |
|
|
|
79 |
self.feat_index = tf.placeholder(tf.int32,shape=[None,None],name='feat_index') |
|
|
80 |
self.feat_value = tf.placeholder(tf.float32,shape=[None,None], name='feat_value') |
|
|
81 |
self.label = tf.placeholder(tf.float32,shape=[None,1],name='label') |
|
|
82 |
self.dropout_keep_fm = tf.placeholder(tf.float32,shape=[None],name='dropout_keep_fm') |
|
|
83 |
self.dropout_keep_deep = tf.placeholder(tf.float32,shape=[None],name='dropout_deep_deep') |
|
|
84 |
self.train_phase = tf.placeholder(tf.bool,name='train_phase') |
|
|
85 |
|
|
|
86 |
#weight |
|
|
87 |
self.weights = self._initialize_weights() |
|
|
88 |
|
|
|
89 |
# model |
|
|
90 |
self.embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'],self.feat_index) # N * F * K |
|
|
91 |
feat_value = tf.reshape(self.feat_value,shape=[-1,self.field_size,1]) |
|
|
92 |
self.embeddings = tf.multiply(self.embeddings,feat_value) |
|
|
93 |
|
|
|
94 |
|
|
|
95 |
# first order term |
|
|
96 |
self.y_first_order = tf.nn.embedding_lookup(self.weights['feature_bias'],self.feat_index) |
|
|
97 |
self.y_first_order = tf.reduce_sum(tf.multiply(self.y_first_order,feat_value),2) |
|
|
98 |
self.y_first_order = tf.nn.dropout(self.y_first_order,self.dropout_keep_fm[0]) |
|
|
99 |
|
|
|
100 |
# second order term |
|
|
101 |
# sum-square-part |
|
|
102 |
self.summed_features_emb = tf.reduce_sum(self.embeddings,1) # None * k |
|
|
103 |
self.summed_features_emb_square = tf.square(self.summed_features_emb) # None * K |
|
|
104 |
|
|
|
105 |
# squre-sum-part |
|
|
106 |
self.squared_features_emb = tf.square(self.embeddings) |
|
|
107 |
self.squared_sum_features_emb = tf.reduce_sum(self.squared_features_emb, 1) # None * K |
|
|
108 |
|
|
|
109 |
#second order |
|
|
110 |
self.y_second_order = 0.5 * tf.subtract(self.summed_features_emb_square,self.squared_sum_features_emb) |
|
|
111 |
self.y_second_order = tf.nn.dropout(self.y_second_order,self.dropout_keep_fm[1]) |
|
|
112 |
|
|
|
113 |
|
|
|
114 |
# Deep component |
|
|
115 |
self.y_deep = tf.reshape(self.embeddings,shape=[-1,self.field_size * self.embedding_size]) |
|
|
116 |
self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[0]) |
|
|
117 |
|
|
|
118 |
|
|
|
119 |
#deep_layers_activation |
|
|
120 |
for i in range(0,len(self.deep_layers)): |
|
|
121 |
self.y_deep = tf.add(tf.matmul(self.y_deep,self.weights["layer_%d" %i]), self.weights["bias_%d"%i]) |
|
|
122 |
if self.batch_norm: |
|
|
123 |
self.y_deep = self.batch_norm_layer(self.y_deep, train_phase=self.train_phase, scope_bn="bn_%d" %i) # None * layer[i] * 1 |
|
|
124 |
#print("norm yes") |
|
|
125 |
self.y_deep = self.deep_layers_activation(self.y_deep) |
|
|
126 |
self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[i+1]) |
|
|
127 |
|
|
|
128 |
|
|
|
129 |
#----DeepFM--------- |
|
|
130 |
if self.use_fm and self.use_deep: |
|
|
131 |
concat_input = tf.concat([self.y_first_order, self.y_second_order, self.y_deep], axis=1) |
|
|
132 |
elif self.use_fm: |
|
|
133 |
concat_input = tf.concat([self.y_first_order, self.y_second_order], axis=1) |
|
|
134 |
elif self.use_deep: |
|
|
135 |
concat_input = self.y_deep |
|
|
136 |
|
|
|
137 |
self.out = tf.add(tf.matmul(concat_input,self.weights['concat_projection']),self.weights['concat_bias']) |
|
|
138 |
|
|
|
139 |
# loss |
|
|
140 |
with tf.name_scope('loss'): |
|
|
141 |
if self.loss_type == "logloss": |
|
|
142 |
self.out = tf.nn.sigmoid(self.out) |
|
|
143 |
self.loss = tf.losses.log_loss(self.label, self.out) |
|
|
144 |
elif self.loss_type == "mse": |
|
|
145 |
self.loss = tf.nn.l2_loss(tf.subtract(self.label, self.out)) |
|
|
146 |
# l2 regularization on weights |
|
|
147 |
if self.l2_reg > 0: |
|
|
148 |
self.loss += tf.contrib.layers.l2_regularizer( |
|
|
149 |
self.l2_reg)(self.weights["concat_projection"]) |
|
|
150 |
if self.use_deep: |
|
|
151 |
for i in range(len(self.deep_layers)): |
|
|
152 |
self.loss += tf.contrib.layers.l2_regularizer( |
|
|
153 |
self.l2_reg)(self.weights["layer_%d" % i]) |
|
|
154 |
tf.summary.scalar('loss', self.loss) |
|
|
155 |
|
|
|
156 |
with tf.name_scope('train_optimizer'): |
|
|
157 |
if self.optimizer_type == "adam": |
|
|
158 |
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999,epsilon=1e-8).minimize(self.loss) |
|
|
159 |
elif self.optimizer_type == "adagrad": |
|
|
160 |
self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss) |
|
|
161 |
elif self.optimizer_type == "gd": |
|
|
162 |
self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss) |
|
|
163 |
elif self.optimizer_type == "momentum": |
|
|
164 |
self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss) |
|
|
165 |
elif self.optimizer_type == "yellowfin": |
|
|
166 |
self.optimizer = YFOptimizer(learning_rate=self.learning_rate, momentum=0.0).minimize(self.loss) |
|
|
167 |
|
|
|
168 |
|
|
|
169 |
#init |
|
|
170 |
self.saver = tf.train.Saver() |
|
|
171 |
init = tf.global_variables_initializer() |
|
|
172 |
self.sess = tf.Session() |
|
|
173 |
self.merged = tf.summary.merge_all() |
|
|
174 |
self.train_writer = tf.summary.FileWriter("logs/20210506_lasso_xgboost",self.sess.graph) |
|
|
175 |
self.test_writer = tf.summary.FileWriter("logs/20210506_lasso_xgboost", self.sess.graph) |
|
|
176 |
self.sess.run(init) |
|
|
177 |
|
|
|
178 |
# number of params |
|
|
179 |
total_parameters = 0 |
|
|
180 |
for variable in self.weights.values(): |
|
|
181 |
shape = variable.get_shape() |
|
|
182 |
variable_parameters = 1 |
|
|
183 |
for dim in shape: |
|
|
184 |
variable_parameters *= dim.value |
|
|
185 |
total_parameters += variable_parameters |
|
|
186 |
if self.verbose > 0: |
|
|
187 |
print("#params: %d" % total_parameters) |
|
|
188 |
|
|
|
189 |
|
|
|
190 |
|
|
|
191 |
|
|
|
192 |
|
|
|
193 |
def _initialize_weights(self): |
|
|
194 |
weights = dict() |
|
|
195 |
|
|
|
196 |
|
|
|
197 |
with tf.name_scope('layer'): |
|
|
198 |
|
|
|
199 |
#embeddings |
|
|
200 |
with tf.name_scope('feature_embeddings'): |
|
|
201 |
weights['feature_embeddings'] = tf.Variable(tf.random_normal([self.feature_size,self.embedding_size],0.0,0.01),name='feature_embeddings') |
|
|
202 |
|
|
|
203 |
tf.summary.histogram('feature_embeddings', weights['feature_embeddings']) |
|
|
204 |
|
|
|
205 |
with tf.name_scope('_feature_bias'): |
|
|
206 |
weights['feature_bias'] = tf.Variable(tf.random_normal([self.feature_size,1],0.0,1.0),name='feature_bias') |
|
|
207 |
|
|
|
208 |
tf.summary.histogram('feature_bias', weights['feature_bias']) |
|
|
209 |
|
|
|
210 |
|
|
|
211 |
#deep layers |
|
|
212 |
num_layer = len(self.deep_layers) |
|
|
213 |
input_size = self.field_size * self.embedding_size |
|
|
214 |
glorot = np.sqrt(2.0/(input_size + self.deep_layers[0])) |
|
|
215 |
|
|
|
216 |
with tf.name_scope('layer_0'): |
|
|
217 |
weights['layer_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,self.deep_layers[0])),dtype=np.float32,name="weights_layer_0") |
|
|
218 |
tf.summary.histogram('layer_0', weights['layer_0']) |
|
|
219 |
with tf.name_scope('bias_0'): |
|
|
220 |
weights['bias_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[0])),dtype=np.float32,name="weights_bias_0") |
|
|
221 |
tf.summary.histogram('bias_0', weights['bias_0']) |
|
|
222 |
|
|
|
223 |
|
|
|
224 |
for i in range(1,num_layer): |
|
|
225 |
layer_names="layer_%d" % i |
|
|
226 |
glorot = np.sqrt(2.0 / (self.deep_layers[i - 1] + self.deep_layers[i])) |
|
|
227 |
with tf.name_scope("layer_%d" % i): |
|
|
228 |
weights["layer_%d" % i] = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(self.deep_layers[i - 1], self.deep_layers[i])),name="layer_",dtype=np.float32) # layers[i-1] * layers[i] |
|
|
229 |
tf.summary.histogram(layer_names+'layer' , weights["layer_%d" % i]) |
|
|
230 |
with tf.name_scope("bias_%d" % i): |
|
|
231 |
weights["bias_%d" % i] = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(1, self.deep_layers[i])),name="bias_",dtype=np.float32) # 1 * layer[i] |
|
|
232 |
tf.summary.histogram(layer_names+'bias' , weights["bias_%d" % i]) |
|
|
233 |
|
|
|
234 |
|
|
|
235 |
# final concat projection layer |
|
|
236 |
|
|
|
237 |
if self.use_fm and self.use_deep: |
|
|
238 |
input_size = self.field_size + self.embedding_size + self.deep_layers[-1] |
|
|
239 |
elif self.use_fm: |
|
|
240 |
input_size = self.field_size + self.embedding_size |
|
|
241 |
elif self.use_deep: |
|
|
242 |
input_size = self.deep_layers[-1] |
|
|
243 |
|
|
|
244 |
glorot = np.sqrt(2.0/(input_size + 1)) |
|
|
245 |
with tf.name_scope("weights_concat_projection"): |
|
|
246 |
weights['concat_projection'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,1)),dtype=np.float32,name="concat_projection") |
|
|
247 |
tf.summary.histogram('concat_projection' , weights['concat_projection']) |
|
|
248 |
with tf.name_scope("weights_concat_bias"): |
|
|
249 |
weights['concat_bias'] = tf.Variable(tf.constant(0.01),dtype=np.float32,name="concat_bias") |
|
|
250 |
tf.summary.histogram('concat_bias' , weights['concat_bias']) |
|
|
251 |
|
|
|
252 |
|
|
|
253 |
return weights |
|
|
254 |
|
|
|
255 |
def batch_norm_layer(self, x, train_phase, scope_bn): |
|
|
256 |
bn_train = batch_norm(x, decay=self.batch_norm_decay, center=True, scale=True, updates_collections=None, |
|
|
257 |
is_training=True, reuse=None, trainable=True, scope=scope_bn) |
|
|
258 |
bn_inference = batch_norm(x, decay=self.batch_norm_decay, center=True, scale=True, updates_collections=None, |
|
|
259 |
is_training=False, reuse=True, trainable=True, scope=scope_bn) |
|
|
260 |
z = tf.cond(train_phase, lambda: bn_train, lambda: bn_inference) |
|
|
261 |
return z |
|
|
262 |
|
|
|
263 |
|
|
|
264 |
def get_batch(self,Xi,Xv,y,batch_size,index): |
|
|
265 |
start = index * batch_size |
|
|
266 |
end = (index + 1) * batch_size |
|
|
267 |
end = end if end < len(y) else len(y) |
|
|
268 |
return Xi[start:end],Xv[start:end],[[y_] for y_ in y[start:end]] |
|
|
269 |
|
|
|
270 |
# shuffle three lists simutaneously |
|
|
271 |
def shuffle_in_unison_scary(self, a, b, c): |
|
|
272 |
rng_state = np.random.get_state() |
|
|
273 |
np.random.shuffle(a) |
|
|
274 |
np.random.set_state(rng_state) |
|
|
275 |
np.random.shuffle(b) |
|
|
276 |
np.random.set_state(rng_state) |
|
|
277 |
np.random.shuffle(c) |
|
|
278 |
|
|
|
279 |
|
|
|
280 |
def evaluate(self, Xi, Xv, y): |
|
|
281 |
""" |
|
|
282 |
:param Xi: list of list of feature indices of each sample in the dataset |
|
|
283 |
:param Xv: list of list of feature values of each sample in the dataset |
|
|
284 |
:param y: label of each sample in the dataset |
|
|
285 |
:return: metric of the evaluation |
|
|
286 |
""" |
|
|
287 |
y_pred = self.predict(Xi, Xv) |
|
|
288 |
return self.eval_metric(y, y_pred)#roc |
|
|
289 |
|
|
|
290 |
def predict(self, Xi, Xv): |
|
|
291 |
""" |
|
|
292 |
:param Xi: list of list of feature indices of each sample in the dataset |
|
|
293 |
:param Xv: list of list of feature values of each sample in the dataset |
|
|
294 |
:return: predicted probability of each sample |
|
|
295 |
""" |
|
|
296 |
# dummy y |
|
|
297 |
dummy_y = [1] * len(Xi) |
|
|
298 |
batch_index = 0 |
|
|
299 |
Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index) |
|
|
300 |
y_pred = None |
|
|
301 |
while len(Xi_batch) > 0: |
|
|
302 |
num_batch = len(y_batch) |
|
|
303 |
feed_dict = {self.feat_index: Xi_batch, |
|
|
304 |
self.feat_value: Xv_batch, |
|
|
305 |
#self.label: y_batch, |
|
|
306 |
self.dropout_keep_fm: [1.0] * len(self.dropout_fm), |
|
|
307 |
self.dropout_keep_deep: [1.0] * len(self.dropout_dep),#dropout |
|
|
308 |
self.train_phase: False} |
|
|
309 |
|
|
|
310 |
batch_out = self.sess.run(self.out, feed_dict=feed_dict) |
|
|
311 |
|
|
|
312 |
if batch_index == 0: |
|
|
313 |
y_pred = np.reshape(batch_out, (num_batch,)) |
|
|
314 |
else: |
|
|
315 |
y_pred = np.concatenate((y_pred, np.reshape(batch_out, (num_batch,)))) |
|
|
316 |
|
|
|
317 |
batch_index += 1 |
|
|
318 |
Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index) |
|
|
319 |
|
|
|
320 |
return y_pred |
|
|
321 |
|
|
|
322 |
|
|
|
323 |
def fit_on_batch(self,Xi,Xv,y): |
|
|
324 |
feed_dict = {self.feat_index:Xi, |
|
|
325 |
self.feat_value:Xv, |
|
|
326 |
self.label:y, |
|
|
327 |
self.dropout_keep_fm: self.dropout_fm, |
|
|
328 |
self.dropout_keep_deep: self.dropout_dep, |
|
|
329 |
self.train_phase:True} |
|
|
330 |
|
|
|
331 |
loss,opt = self.sess.run([self.loss,self.optimizer],feed_dict=feed_dict) |
|
|
332 |
rs = self.sess.run(self.merged,feed_dict = feed_dict) |
|
|
333 |
|
|
|
334 |
return loss,rs |
|
|
335 |
|
|
|
336 |
def fit(self, Xi_train, Xv_train, y_train, |
|
|
337 |
Xi_valid=None, Xv_valid=None, y_valid=None, |
|
|
338 |
Xi_test=None, Xv_test=None,y_test=None, |
|
|
339 |
early_stopping=True, refit=False): |
|
|
340 |
""" |
|
|
341 |
:param Xi_train: [[ind1_1, ind1_2, ...], [ind2_1, ind2_2, ...], ..., [indi_1, indi_2, ..., indi_j, ...], ...] |
|
|
342 |
indi_j is the feature index of feature field j of sample i in the training set |
|
|
343 |
:param Xv_train: [[val1_1, val1_2, ...], [val2_1, val2_2, ...], ..., [vali_1, vali_2, ..., vali_j, ...], ...] |
|
|
344 |
vali_j is the feature value of feature field j of sample i in the training set |
|
|
345 |
vali_j can be either binary (1/0, for binary/categorical features) or float (e.g., 10.24, for numerical features) |
|
|
346 |
:param y_train: label of each sample in the training set |
|
|
347 |
:param Xi_valid: list of list of feature indices of each sample in the validation set |
|
|
348 |
:param Xv_valid: list of list of feature values of each sample in the validation set |
|
|
349 |
:param y_valid: label of each sample in the validation set |
|
|
350 |
:param early_stopping: perform early stopping or not |
|
|
351 |
:param refit: refit the model on the train+valid dataset or not |
|
|
352 |
:return: None |
|
|
353 |
""" |
|
|
354 |
loss_batch = [] |
|
|
355 |
has_valid = Xv_valid is not None |
|
|
356 |
|
|
|
357 |
# ============================================================================= |
|
|
358 |
# Xv_resampled, y_resampled1 = RandomOverSampler(random_state=42).fit_sample(pd.DataFrame(Xv_train), pd.DataFrame(y_train)) |
|
|
359 |
# Xi_resampled, y_resampled2 = RandomOverSampler(random_state=42).fit_sample(pd.DataFrame(Xi_train), pd.DataFrame(y_train)) |
|
|
360 |
# if all(y_resampled1 == y_resampled2) : |
|
|
361 |
# print(Xv_resampled.shape) |
|
|
362 |
# print(pd.DataFrame(Xv_train).shape) |
|
|
363 |
# Xv_train = Xv_resampled.tolist() |
|
|
364 |
# Xi_train = Xi_resampled.tolist() |
|
|
365 |
# y_train = y_resampled1.tolist() |
|
|
366 |
# ============================================================================= |
|
|
367 |
|
|
|
368 |
for epoch in range(self.epoch): |
|
|
369 |
t1 = time() |
|
|
370 |
self.shuffle_in_unison_scary(Xi_train, Xv_train, y_train) |
|
|
371 |
total_batch = int(len(y_train) / self.batch_size) |
|
|
372 |
|
|
|
373 |
for i in range(total_batch): |
|
|
374 |
Xi_batch, Xv_batch, y_batch = self.get_batch(Xi_train, Xv_train, y_train, self.batch_size, i) |
|
|
375 |
loss,rs = self.fit_on_batch(Xi_batch, Xv_batch, y_batch) |
|
|
376 |
loss_batch.append(loss) |
|
|
377 |
# print('loss = %.4f' % loss) |
|
|
378 |
self.train_writer.add_summary(rs,epoch) |
|
|
379 |
|
|
|
380 |
# evaluate training and validation datasets |
|
|
381 |
train_result = self.evaluate(Xi_train, Xv_train, y_train) |
|
|
382 |
self.train_result.append(train_result) |
|
|
383 |
if has_valid: |
|
|
384 |
valid_result = self.evaluate(Xi_valid, Xv_valid, y_valid) |
|
|
385 |
self.valid_result.append(valid_result) |
|
|
386 |
|
|
|
387 |
# test_result = self.evaluate(Xi_test, Xv_test, y_test) |
|
|
388 |
# self.test_result.append(test_result) |
|
|
389 |
|
|
|
390 |
if self.verbose > 0 and epoch % self.verbose == 0: |
|
|
391 |
if has_valid: |
|
|
392 |
# print("[%d] train-result=%.4f, valid-result=%.4f ,test-result=%.4f [%.1f s]" |
|
|
393 |
# % (epoch + 1, train_result, valid_result, test_result,time() - t1)) |
|
|
394 |
continue |
|
|
395 |
else: |
|
|
396 |
# print("[%d] train-result=%.4f [%.1f s]" |
|
|
397 |
# % (epoch + 1, train_result, time() - t1)) |
|
|
398 |
continue |
|
|
399 |
#early_stopping |
|
|
400 |
if has_valid and early_stopping and self.training_termination(self.valid_result): |
|
|
401 |
break |
|
|
402 |
|
|
|
403 |
# fit a few more epoch on train+valid until result reaches the best_train_score |
|
|
404 |
|
|
|
405 |
if has_valid and refit: |
|
|
406 |
if self.greater_is_better: |
|
|
407 |
best_valid_score = max(self.valid_result) |
|
|
408 |
else: |
|
|
409 |
best_valid_score = min(self.valid_result) |
|
|
410 |
best_epoch = self.valid_result.index(best_valid_score) |
|
|
411 |
best_train_score = self.train_result[best_epoch] |
|
|
412 |
Xi_train = Xi_train + Xi_valid |
|
|
413 |
Xv_train = Xv_train + Xv_valid |
|
|
414 |
y_train = y_train + y_valid |
|
|
415 |
for epoch in range(100): |
|
|
416 |
self.shuffle_in_unison_scary(Xi_train, Xv_train, y_train) |
|
|
417 |
total_batch = int(len(y_train) / self.batch_size) |
|
|
418 |
for i in range(total_batch): |
|
|
419 |
Xi_batch, Xv_batch, y_batch = self.get_batch(Xi_train, Xv_train, y_train,self.batch_size, i) |
|
|
420 |
loss,rs = self.fit_on_batch(Xi_batch, Xv_batch, y_batch) |
|
|
421 |
|
|
|
422 |
# print('loss2 = %.4f' % loss) |
|
|
423 |
# check |
|
|
424 |
train_result = self.evaluate(Xi_train, Xv_train, y_train) |
|
|
425 |
if abs(train_result - best_train_score) < 0.001 or \ |
|
|
426 |
(self.greater_is_better and train_result > best_train_score) or \ |
|
|
427 |
((not self.greater_is_better) and train_result < best_train_score): |
|
|
428 |
break |
|
|
429 |
return loss_batch |
|
|
430 |
|
|
|
431 |
|
|
|
432 |
def training_termination(self, valid_result): |
|
|
433 |
if len(valid_result) > 5: |
|
|
434 |
if self.greater_is_better: |
|
|
435 |
if valid_result[-1] < valid_result[-2] and \ |
|
|
436 |
valid_result[-2] < valid_result[-3] and \ |
|
|
437 |
valid_result[-3] < valid_result[-4] and \ |
|
|
438 |
valid_result[-4] < valid_result[-5]: |
|
|
439 |
return True |
|
|
440 |
else: |
|
|
441 |
if valid_result[-1] > valid_result[-2] and \ |
|
|
442 |
valid_result[-2] > valid_result[-3] and \ |
|
|
443 |
valid_result[-3] > valid_result[-4] and \ |
|
|
444 |
valid_result[-4] > valid_result[-5]: |
|
|
445 |
return True |
|
|
446 |
return False |
|
|
447 |
|
|
|
448 |
|
|
|
449 |
|
|
|
450 |
|
|
|
451 |
|
|
|
452 |
|
|
|
453 |
|
|
|
454 |
|
|
|
455 |
|
|
|
456 |
|
|
|
457 |
|
|
|
458 |
|
|
|
459 |
|