Diff of /src/model.py [000000] .. [fb2ce2]

Switch to unified view

a b/src/model.py
1
import numpy as np
2
3
import os
4
5
from datetime import datetime
6
7
from math import ceil, floor, log
8
9
import tensorflow as tf
10
import tensorflow.keras as keras
11
import keras as K
12
13
14
from data_loader import DataGenerator
15
16
import pandas as pd
17
18
19
def weighted_log_loss(y_true, y_pred):
20
    """
21
    Can be used as the loss function in model.compile()
22
    ---------------------------------------------------
23
    """
24
25
    class_weights = np.array([2., 1., 1., 1., 1., 1.])
26
27
    eps = K.backend.epsilon()
28
29
    y_pred = K.backend.clip(y_pred, eps, 1.0-eps)
30
31
    out = -(         y_true  * K.backend.log(      y_pred) * class_weights
32
            + (1.0 - y_true) * K.backend.log(1.0 - y_pred) * class_weights)
33
34
    return K.backend.mean(out, axis=-1)
35
36
37
def _normalized_weighted_average(arr, weights=None):
38
    """
39
    A simple K implementation that mimics that of
40
    numpy.average(), specifically for this competition
41
    """
42
43
    if weights is not None:
44
        scl = K.backend.sum(weights)
45
        weights = K.backend.expand_dims(weights, axis=1)
46
        return K.backend.sum(K.backend.dot(arr, weights), axis=1) / scl
47
    return K.backend.mean(arr, axis=1)
48
49
50
def weighted_loss(y_true, y_pred):
51
    """
52
    Will be used as the metric in model.compile()
53
    ---------------------------------------------
54
55
    Similar to the custom loss function 'weighted_log_loss()' above
56
    but with normalized weights, which should be very similar
57
    to the official competition metric:
58
        https://www.kaggle.com/kambarakun/lb-probe-weights-n-of-positives-scoring
59
    and hence:
60
        sklearn.metrics.log_loss with sample weights
61
    """
62
63
    class_weights = K.backend.variable([2., 1., 1., 1., 1., 1.])
64
65
    eps = K.backend.epsilon()
66
67
    y_pred = K.backend.clip(y_pred, eps, 1.0-eps)
68
69
    loss = -(        y_true  * K.backend.log(      y_pred)
70
            + (1.0 - y_true) * K.backend.log(1.0 - y_pred))
71
72
    loss_samples = _normalized_weighted_average(loss, class_weights)
73
74
    return K.backend.mean(loss_samples)
75
76
class PredictionCheckpoint(K.callbacks.Callback):
77
78
    def on_epoch_end(self, epoch, logs={}):
79
        """
80
        Save each epoch file in case of crash
81
        """
82
        print("Saving checkpoint")
83
        self.model.save("epoch{}.hdf5".format(epoch))
84
85
class MyDeepModel:
86
87
    def __init__(self, engine, input_dims, batch_size=5, num_epochs=4, learning_rate=1e-3,
88
                 decay_rate=1.0, decay_steps=1, weights="imagenet", verbose=1, train_image_dir="", model_filename=""):
89
90
        self.engine = engine
91
        self.input_dims = input_dims
92
        self.batch_size = batch_size
93
        self.num_epochs = num_epochs
94
        self.learning_rate = learning_rate
95
        self.decay_rate = decay_rate
96
        self.decay_steps = decay_steps
97
        self.weights = weights
98
        self.verbose = verbose
99
        self.model_filename = model_filename
100
        self.train_images_dir=train_image_dir
101
        self._build()
102
103
    def _build(self):
104
105
106
        engine = self.engine(include_top=False, weights=self.weights, input_shape=self.input_dims,
107
                             backend = K.backend, layers = K.layers,
108
                             models = K.models, utils = K.utils)
109
110
        x = K.layers.GlobalAveragePooling2D(name='avg_pool')(engine.output)
111
        x = K.layers.Dropout(0.3)(x)
112
#         x = keras.layers.Dense(keras.backend.int_shape(x)[1], activation="relu", name="dense_hidden_1")(x)
113
#         x = keras.layers.Dropout(0.1)(x)
114
        out = K.layers.Dense(6, activation="sigmoid", name='dense_output')(x)
115
116
        self.model = K.models.Model(inputs=engine.input, outputs=out)
117
118
        self.model.compile(loss="binary_crossentropy", optimizer=K.optimizers.Adam(), metrics=["categorical_accuracy", "accuracy", weighted_loss])
119
120
    def get_model_filename(self):
121
122
        return self.model_filename
123
124
    def fit_model(self, train_df, valid_df):
125
126
        # callbacks
127
        checkpointer = K.callbacks.ModelCheckpoint(filepath=self.model_filename, verbose=1, save_best_only=True)
128
        scheduler = K.callbacks.LearningRateScheduler(lambda epoch: self.learning_rate * pow(self.decay_rate, floor(epoch / self.decay_steps)))
129
130
        self.model.fit_generator(
131
            DataGenerator(
132
                train_df.index,
133
                train_df,
134
                self.batch_size,
135
                self.input_dims,
136
                self.train_images_dir
137
            ),
138
            epochs=self.num_epochs,
139
            verbose=self.verbose,
140
            validation_data=DataGenerator(
141
                valid_df.index,
142
                valid_df,
143
                self.batch_size,
144
                self.input_dims,
145
                self.train_images_dir
146
            ),
147
            use_multiprocessing=True,
148
            workers=4,
149
            callbacks=[PredictionCheckpoint(), scheduler, checkpointer]
150
        )
151
152
    def save(self, path):
153
        self.model.save(path)
154
155
    def load(self, path):
156
        self.model.load_weights(path)
157
158
159
def create_submission(model, data, test_df):
160
161
    print("Creating predictions on test dataset")
162
163
    pred = model.predict_generator(data, verbose=1)
164
165
    out_df = pd.DataFrame(pred, index=test_df.index, columns=test_df.columns)
166
167
    test_df = out_df.stack().reset_index()
168
169
    test_df.insert(loc=0, column='ID', value=test_df['Image'].astype(str) + "_" + test_df['Diagnosis'])
170
171
    test_df = test_df.drop(["Image", "Diagnosis"], axis=1)
172
173
    print("Saving submissions to submission.csv")
174
    test_df.to_csv('submission.csv', index=False)
175
176
    return test_df