|
a |
|
b/Models/main-Transformer.py |
|
|
1 |
#!/usr/bin/env python |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
|
|
|
4 |
""" |
|
|
5 |
@Author: Bruce Shuyue Jia |
|
|
6 |
@Date: Jan 30, 2021 |
|
|
7 |
""" |
|
|
8 |
|
|
|
9 |
import numpy as np |
|
|
10 |
import pandas as pd |
|
|
11 |
import tensorflow as tf |
|
|
12 |
from tensorflow import keras |
|
|
13 |
from tensorflow.keras import layers |
|
|
14 |
|
|
|
15 |
# Read Training Data |
|
|
16 |
train_data = pd.read_csv('training_set.csv', header=None) |
|
|
17 |
train_data = np.array(train_data).astype('float32') |
|
|
18 |
|
|
|
19 |
# Read Training Labels |
|
|
20 |
train_labels = pd.read_csv('training_label.csv', header=None) |
|
|
21 |
train_labels = np.array(train_labels).astype('float32') |
|
|
22 |
train_labels = np.squeeze(train_labels) |
|
|
23 |
|
|
|
24 |
# Read Testing Data |
|
|
25 |
test_data = pd.read_csv('test_set.csv', header=None) |
|
|
26 |
test_data = np.array(test_data).astype('float32') |
|
|
27 |
|
|
|
28 |
# Read Testing Labels |
|
|
29 |
test_labels = pd.read_csv('test_label.csv', header=None) |
|
|
30 |
test_labels = np.array(test_labels).astype('float32') |
|
|
31 |
test_labels = np.squeeze(test_labels) |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
class TransformerBlock(layers.Layer): |
|
|
35 |
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.5): |
|
|
36 |
super(TransformerBlock, self).__init__() |
|
|
37 |
self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) |
|
|
38 |
self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]) |
|
|
39 |
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) |
|
|
40 |
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) |
|
|
41 |
self.dropout1 = layers.Dropout(rate) |
|
|
42 |
self.dropout2 = layers.Dropout(rate) |
|
|
43 |
|
|
|
44 |
def call(self, inputs, training): |
|
|
45 |
attn_output = self.att(inputs, inputs) |
|
|
46 |
attn_output = self.dropout1(attn_output, training=training) |
|
|
47 |
out1 = self.layernorm1(inputs + attn_output) |
|
|
48 |
ffn_output = self.ffn(out1) |
|
|
49 |
ffn_output = self.dropout2(ffn_output, training=training) |
|
|
50 |
out = self.layernorm2(out1 + ffn_output) |
|
|
51 |
return out |
|
|
52 |
|
|
|
53 |
|
|
|
54 |
class TokenAndPositionEmbedding(layers.Layer): |
|
|
55 |
def __init__(self, maxlen, embed_dim): |
|
|
56 |
super(TokenAndPositionEmbedding, self).__init__() |
|
|
57 |
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim) |
|
|
58 |
|
|
|
59 |
def call(self, x): |
|
|
60 |
positions = tf.range(start=0, limit=maxlen, delta=1) |
|
|
61 |
positions = self.pos_emb(positions) |
|
|
62 |
x = tf.reshape(x, [-1, maxlen, embed_dim]) |
|
|
63 |
out = x + positions |
|
|
64 |
return out |
|
|
65 |
|
|
|
66 |
|
|
|
67 |
maxlen = 3 # Only consider 3 input time points |
|
|
68 |
embed_dim = 97 # Features of each time point |
|
|
69 |
num_heads = 8 # Number of attention heads |
|
|
70 |
ff_dim = 64 # Hidden layer size in feed forward network inside transformer |
|
|
71 |
|
|
|
72 |
# Input Time-series |
|
|
73 |
inputs = layers.Input(shape=(maxlen*embed_dim,)) |
|
|
74 |
embedding_layer = TokenAndPositionEmbedding(maxlen, embed_dim) |
|
|
75 |
x = embedding_layer(inputs) |
|
|
76 |
|
|
|
77 |
# Encoder Architecture |
|
|
78 |
transformer_block_1 = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim) |
|
|
79 |
transformer_block_2 = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim) |
|
|
80 |
x = transformer_block_1(x) |
|
|
81 |
x = transformer_block_2(x) |
|
|
82 |
|
|
|
83 |
# Output |
|
|
84 |
x = layers.GlobalMaxPooling1D()(x) |
|
|
85 |
x = layers.Dropout(0.5)(x) |
|
|
86 |
x = layers.Dense(64, activation="relu")(x) |
|
|
87 |
x = layers.Dropout(0.5)(x) |
|
|
88 |
outputs = layers.Dense(1, activation="sigmoid")(x) |
|
|
89 |
|
|
|
90 |
model = keras.Model(inputs=inputs, outputs=outputs) |
|
|
91 |
|
|
|
92 |
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-4), |
|
|
93 |
loss="binary_crossentropy", |
|
|
94 |
metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Recall()]) |
|
|
95 |
|
|
|
96 |
history = model.fit( |
|
|
97 |
train_data, train_labels, batch_size=128, epochs=1000, validation_data=(test_data, test_labels) |
|
|
98 |
) |