|
a |
|
b/Models/main-finetuning_model.py |
|
|
1 |
#!/usr/bin/env python3 |
|
|
2 |
# encoding=utf-8 |
|
|
3 |
|
|
|
4 |
""" |
|
|
5 |
@Author: Bruce Shuyue Jia |
|
|
6 |
@Date: Nov 5, 2021 |
|
|
7 |
|
|
|
8 |
This is a code only for reference - two-class classification |
|
|
9 |
""" |
|
|
10 |
|
|
|
11 |
import numpy as np |
|
|
12 |
import pandas as pd |
|
|
13 |
|
|
|
14 |
import tensorflow as tf |
|
|
15 |
from tensorflow.keras import layers |
|
|
16 |
from tensorflow import keras |
|
|
17 |
|
|
|
18 |
print("TensorFlow version:", tf.__version__) |
|
|
19 |
|
|
|
20 |
# Read Training Data - Target Domain |
|
|
21 |
train_data = pd.read_csv('../target/T-traindata.csv', header=None) |
|
|
22 |
train_data = np.array(train_data).astype('float32') |
|
|
23 |
|
|
|
24 |
# Read Training Labels - Target Domain |
|
|
25 |
train_labels = pd.read_csv('../target/T-trainlabel.csv', header=None) |
|
|
26 |
train_labels = np.array(train_labels).astype('float32') |
|
|
27 |
train_labels = tf.one_hot(indices=train_labels, depth=2) |
|
|
28 |
train_labels = np.squeeze(train_labels) |
|
|
29 |
|
|
|
30 |
# Read Testing Data - Target Domain |
|
|
31 |
test_data = pd.read_csv('../target/T-testdata.csv', header=None) |
|
|
32 |
test_data = np.array(test_data).astype('float32') |
|
|
33 |
|
|
|
34 |
# Read Testing Labels - Target Domain |
|
|
35 |
test_labels = pd.read_csv('../target/T-testlabel.csv', header=None) |
|
|
36 |
test_labels = np.array(test_labels).astype('float32') |
|
|
37 |
test_labels = tf.one_hot(indices=test_labels, depth=2) |
|
|
38 |
test_labels = np.squeeze(test_labels) |
|
|
39 |
|
|
|
40 |
|
|
|
41 |
class CatgoricalTP(tf.keras.metrics.Metric): |
|
|
42 |
def __init__(self, name='categorical_tp', **kwargs): |
|
|
43 |
super(CatgoricalTP, self).__init__(name=name, **kwargs) |
|
|
44 |
self.tp = self.add_weight(name='tp', initializer='zeros') |
|
|
45 |
|
|
|
46 |
def update_state(self, y_true, y_pred, sample_weight=None): |
|
|
47 |
y_pred = tf.argmax(y_pred, axis=-1) |
|
|
48 |
y_true = tf.argmax(y_true, axis=-1) |
|
|
49 |
values = tf.equal(tf.cast(y_pred, 'int32'), tf.cast(y_true, 'int32')) |
|
|
50 |
values = tf.cast(values, 'float32') |
|
|
51 |
if sample_weight is not None: |
|
|
52 |
sample_weights = tf.cast(sample_weight, 'float32') |
|
|
53 |
values = tf.multiply(values, sample_weights) |
|
|
54 |
|
|
|
55 |
self.tp.assign_add(tf.reduce_sum(values)) |
|
|
56 |
|
|
|
57 |
def result(self): |
|
|
58 |
return self.tp |
|
|
59 |
|
|
|
60 |
def reset_states(self): |
|
|
61 |
self.tp.assign(0.) |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
# Transformer Model |
|
|
65 |
class TransformerBlock(layers.Layer): |
|
|
66 |
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.5): |
|
|
67 |
super(TransformerBlock, self).__init__() |
|
|
68 |
self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) |
|
|
69 |
self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]) |
|
|
70 |
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) |
|
|
71 |
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) |
|
|
72 |
self.dropout1 = layers.Dropout(rate) |
|
|
73 |
self.dropout2 = layers.Dropout(rate) |
|
|
74 |
|
|
|
75 |
self.embed_dim = embed_dim |
|
|
76 |
self.num_heads = num_heads |
|
|
77 |
self.ff_dim = ff_dim |
|
|
78 |
self.rate = rate |
|
|
79 |
|
|
|
80 |
def call(self, inputs, training): |
|
|
81 |
attn_output = self.att(inputs, inputs) |
|
|
82 |
attn_output = self.dropout1(attn_output, training=training) |
|
|
83 |
out1 = self.layernorm1(inputs + attn_output) |
|
|
84 |
ffn_output = self.ffn(out1) |
|
|
85 |
ffn_output = self.dropout2(ffn_output, training=training) |
|
|
86 |
out = self.layernorm2(out1 + ffn_output) |
|
|
87 |
|
|
|
88 |
return out |
|
|
89 |
|
|
|
90 |
|
|
|
91 |
class TokenAndPositionEmbedding(layers.Layer): |
|
|
92 |
def __init__(self, maxlen, embed_dim): |
|
|
93 |
super(TokenAndPositionEmbedding, self).__init__() |
|
|
94 |
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim) |
|
|
95 |
self.maxlen = maxlen |
|
|
96 |
self.embed_dim = embed_dim |
|
|
97 |
|
|
|
98 |
def call(self, x): |
|
|
99 |
positions = tf.range(start=0, limit=self.maxlen, delta=1) |
|
|
100 |
positions = self.pos_emb(positions) |
|
|
101 |
x = tf.reshape(x, [-1, self.maxlen, self.embed_dim]) |
|
|
102 |
out = x + positions |
|
|
103 |
|
|
|
104 |
return out |
|
|
105 |
|
|
|
106 |
|
|
|
107 |
maxlen = 3 |
|
|
108 |
embed_dim = 97 # Embedding size for each token |
|
|
109 |
num_heads = 8 # Number of attention heads |
|
|
110 |
ff_dim = 64 # Hidden layer size in feed forward network inside transformer |
|
|
111 |
|
|
|
112 |
|
|
|
113 |
def get_model(): |
|
|
114 |
# Create a simple model. |
|
|
115 |
# Input Time-series |
|
|
116 |
inputs = layers.Input(shape=(maxlen * embed_dim,)) |
|
|
117 |
embedding_layer = TokenAndPositionEmbedding(maxlen, embed_dim) |
|
|
118 |
x = embedding_layer(inputs) |
|
|
119 |
|
|
|
120 |
# Encoder Architecture |
|
|
121 |
transformer_block_1 = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim) |
|
|
122 |
transformer_block_2 = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim) |
|
|
123 |
x = transformer_block_1(x) |
|
|
124 |
x = transformer_block_2(x) |
|
|
125 |
|
|
|
126 |
# Output |
|
|
127 |
x = layers.GlobalMaxPooling1D()(x) |
|
|
128 |
x = layers.Dropout(0.5)(x) |
|
|
129 |
x = layers.Dense(64, activation="relu")(x) |
|
|
130 |
x = layers.Dropout(0.5)(x) |
|
|
131 |
outputs = layers.Dense(2, activation="softmax")(x) |
|
|
132 |
|
|
|
133 |
model = keras.Model(inputs=inputs, outputs=outputs) |
|
|
134 |
|
|
|
135 |
return model |
|
|
136 |
|
|
|
137 |
|
|
|
138 |
# Load the pre-trained model |
|
|
139 |
pretrained_model = get_model() |
|
|
140 |
pretrained_model.load_weights('../transformer/model_weight') |
|
|
141 |
|
|
|
142 |
# Make sure the original model will not be updated |
|
|
143 |
# i.e., freezen the model parameters |
|
|
144 |
pretrained_model.trainable = False |
|
|
145 |
|
|
|
146 |
|
|
|
147 |
# Show the model architecture |
|
|
148 |
pretrained_model.summary() |
|
|
149 |
print('\n\n\n\n') |
|
|
150 |
|
|
|
151 |
# Remove the last three layers |
|
|
152 |
num_layer = len(pretrained_model.layers) |
|
|
153 |
pretrained_part = tf.keras.models.Sequential(pretrained_model.layers[0:(num_layer - 3)]) |
|
|
154 |
pretrained_part.summary() |
|
|
155 |
print('\n\n\n\n') |
|
|
156 |
|
|
|
157 |
|
|
|
158 |
# Add some new layers for fine tuning |
|
|
159 |
add_layers = tf.keras.models.Sequential([ |
|
|
160 |
tf.keras.layers.Dense(256, activation='relu'), |
|
|
161 |
tf.keras.layers.Dropout(0.5), |
|
|
162 |
tf.keras.layers.Dense(128, activation='relu'), |
|
|
163 |
tf.keras.layers.Dropout(0.5), |
|
|
164 |
tf.keras.layers.Dense(2, activation='softmax') |
|
|
165 |
]) |
|
|
166 |
|
|
|
167 |
|
|
|
168 |
# Build a new model |
|
|
169 |
rebuild_model = tf.keras.models.Sequential([ |
|
|
170 |
pretrained_part, |
|
|
171 |
add_layers |
|
|
172 |
]) |
|
|
173 |
|
|
|
174 |
rebuild_model.build((maxlen * embed_dim,)) |
|
|
175 |
rebuild_model.summary() |
|
|
176 |
print('\n\n\n\n') |
|
|
177 |
|
|
|
178 |
# Train and evaluate the new model |
|
|
179 |
model = rebuild_model |
|
|
180 |
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-4), |
|
|
181 |
loss="categorical_crossentropy", |
|
|
182 |
metrics=["accuracy", CatgoricalTP()]) |
|
|
183 |
|
|
|
184 |
history = model.fit( |
|
|
185 |
train_data, train_labels, batch_size=64, epochs=100, validation_data=(test_data, test_labels) |
|
|
186 |
) |
|
|
187 |
model.evaluate(test_data, test_labels, verbose=2) |
|
|
188 |
|