|
a |
|
b/R/custom_layers.R |
|
|
1 |
#' Aggregation layer |
|
|
2 |
#' |
|
|
3 |
#' Aggregate output of time distribution representations using sum, max and/or mean function. |
|
|
4 |
#' |
|
|
5 |
#' @param load_r6 Whether to load the R6 layer class. |
|
|
6 |
#' @param method At least one of the options, `"sum", "max"` or `"mean"`. |
|
|
7 |
#' @param multi_in Whether to aggregate for a model with multiple inputs (and shared weights). |
|
|
8 |
#' @examples |
|
|
9 |
#' |
|
|
10 |
#' \donttest{ |
|
|
11 |
#' library(keras) |
|
|
12 |
#' l <- layer_aggregate_time_dist_wrapper() |
|
|
13 |
#' } |
|
|
14 |
#' @returns A keras layer applying pooling operation(s). |
|
|
15 |
#' @export |
|
|
16 |
layer_aggregate_time_dist_wrapper <- function(load_r6 = FALSE, method = "sum", multi_in = FALSE) { |
|
|
17 |
|
|
|
18 |
layer_aggregate_time_dist <- keras::new_layer_class( |
|
|
19 |
"layer_aggregate_time_dist", |
|
|
20 |
|
|
|
21 |
initialize = function(method, multi_in=FALSE, ...) { |
|
|
22 |
super$initialize(...) |
|
|
23 |
self$method <- method |
|
|
24 |
self$axis <- ifelse(multi_in, 0L, 1L) |
|
|
25 |
self$multi_in <- multi_in |
|
|
26 |
}, |
|
|
27 |
|
|
|
28 |
call = function(inputs, mask = NULL) { |
|
|
29 |
out <- list() |
|
|
30 |
if ("sum" %in% self$method) { |
|
|
31 |
out <- c(out, tensorflow::tf$math$reduce_sum(inputs, axis = self$axis)) |
|
|
32 |
} |
|
|
33 |
if ("mean" %in% self$method) { |
|
|
34 |
out <- c(out, tensorflow::tf$math$reduce_mean(inputs, axis = self$axis)) |
|
|
35 |
} |
|
|
36 |
if ("max" %in% self$method) { |
|
|
37 |
out <- c(out, tensorflow::tf$math$reduce_max(inputs, axis = self$axis)) |
|
|
38 |
} |
|
|
39 |
|
|
|
40 |
if (length(out) > 1) { |
|
|
41 |
out <- tensorflow::tf$concat(out, axis = -1L) |
|
|
42 |
} else { |
|
|
43 |
out <- out[[1]] |
|
|
44 |
} |
|
|
45 |
|
|
|
46 |
out |
|
|
47 |
}, |
|
|
48 |
|
|
|
49 |
get_config = function() { |
|
|
50 |
config <- super$get_config() |
|
|
51 |
config$method <- self$method |
|
|
52 |
config$multi_in <- self$multi_in |
|
|
53 |
config |
|
|
54 |
} |
|
|
55 |
) |
|
|
56 |
|
|
|
57 |
if (load_r6) { |
|
|
58 |
return(layer_aggregate_time_dist) |
|
|
59 |
} else { |
|
|
60 |
return(layer_aggregate_time_dist(method = method, multi_in = multi_in)) |
|
|
61 |
} |
|
|
62 |
|
|
|
63 |
} |
|
|
64 |
|
|
|
65 |
|
|
|
66 |
#' Layer for positional embedding |
|
|
67 |
#' |
|
|
68 |
#' Positional encoding layer with learned embedding. |
|
|
69 |
#' |
|
|
70 |
#' @inheritParams create_model_transformer |
|
|
71 |
#' @param load_r6 Whether to load the R6 layer class. |
|
|
72 |
#' @examples |
|
|
73 |
#' |
|
|
74 |
#' \donttest{ |
|
|
75 |
#' library(keras) |
|
|
76 |
#' l <- layer_pos_embedding_wrapper() |
|
|
77 |
#' } |
|
|
78 |
#' @returns A keras layer implementing positional embedding. |
|
|
79 |
#' @export |
|
|
80 |
layer_pos_embedding_wrapper <- function(maxlen = 100, vocabulary_size = 4, load_r6 = FALSE, embed_dim = 64) { |
|
|
81 |
|
|
|
82 |
layer_pos_embedding <- keras::new_layer_class( |
|
|
83 |
"layer_pos_embedding", |
|
|
84 |
|
|
|
85 |
initialize = function(maxlen=100, vocabulary_size=4, embed_dim=64, ...) { |
|
|
86 |
super$initialize(...) |
|
|
87 |
if (embed_dim != 0) { |
|
|
88 |
self$token_emb <- tensorflow::tf$keras$layers$Embedding(input_dim = as.integer(vocabulary_size), |
|
|
89 |
output_dim = as.integer(embed_dim)) |
|
|
90 |
self$position_embeddings <- tensorflow::tf$keras$layers$Embedding(input_dim = as.integer(maxlen), |
|
|
91 |
output_dim = as.integer(embed_dim)) |
|
|
92 |
} else { |
|
|
93 |
self$position_embeddings <- tensorflow::tf$keras$layers$Embedding(input_dim = as.integer(maxlen), |
|
|
94 |
output_dim = as.integer(vocabulary_size)) |
|
|
95 |
} |
|
|
96 |
self$embed_dim <- as.integer(embed_dim) |
|
|
97 |
self$maxlen <- as.integer(maxlen) |
|
|
98 |
self$vocabulary_size <- as.integer(vocabulary_size) |
|
|
99 |
}, |
|
|
100 |
|
|
|
101 |
call = function(inputs) { |
|
|
102 |
positions <- tensorflow::tf$range(self$maxlen, dtype = "int32") |
|
|
103 |
embedded_positions <- self$position_embeddings(positions) |
|
|
104 |
if (self$embed_dim != 0) inputs <- self$token_emb(inputs) |
|
|
105 |
inputs + embedded_positions |
|
|
106 |
}, |
|
|
107 |
|
|
|
108 |
get_config = function() { |
|
|
109 |
config <- super$get_config() |
|
|
110 |
config$maxlen <- self$maxlen |
|
|
111 |
config$vocabulary_size <- self$vocabulary_size |
|
|
112 |
config$embed_dim <- self$embed_dim |
|
|
113 |
config |
|
|
114 |
} |
|
|
115 |
) |
|
|
116 |
|
|
|
117 |
if (load_r6) { |
|
|
118 |
return(layer_pos_embedding) |
|
|
119 |
} else { |
|
|
120 |
return(layer_pos_embedding(maxlen=maxlen, vocabulary_size=vocabulary_size, embed_dim=embed_dim)) |
|
|
121 |
} |
|
|
122 |
|
|
|
123 |
} |
|
|
124 |
|
|
|
125 |
#' Layer for positional encoding |
|
|
126 |
#' |
|
|
127 |
#' Positional encoding layer with sine/cosine matrix of different frequencies. |
|
|
128 |
#' |
|
|
129 |
#' @inheritParams create_model_transformer |
|
|
130 |
#' @param load_r6 Whether to load the R6 layer class. |
|
|
131 |
#' @examples |
|
|
132 |
#' |
|
|
133 |
#' \donttest{ |
|
|
134 |
#' library(keras) |
|
|
135 |
#' l <- layer_pos_sinusoid_wrapper() |
|
|
136 |
#' } |
|
|
137 |
#' @returns A keras layer implementing positional encoding using sine/cosine waves. |
|
|
138 |
#' @export |
|
|
139 |
layer_pos_sinusoid_wrapper <- function(maxlen = 100, vocabulary_size = 4, n = 10000, load_r6 = FALSE, embed_dim = 64) { |
|
|
140 |
|
|
|
141 |
layer_pos_sinusoid <- keras::new_layer_class( |
|
|
142 |
"layer_pos_sinusoid", |
|
|
143 |
initialize = function(maxlen, vocabulary_size, n, embed_dim, ...) { |
|
|
144 |
super$initialize(...) |
|
|
145 |
self$maxlen <- as.integer(maxlen) |
|
|
146 |
self$vocabulary_size <- vocabulary_size |
|
|
147 |
self$n <- as.integer(n) |
|
|
148 |
self$pe_matrix <- positional_encoding(seq_len = maxlen, |
|
|
149 |
d_model = ifelse(embed_dim == 0, |
|
|
150 |
as.integer(vocabulary_size), |
|
|
151 |
as.integer(embed_dim)), |
|
|
152 |
n = n) |
|
|
153 |
|
|
|
154 |
if (embed_dim != 0) { |
|
|
155 |
self$token_emb <- tensorflow::tf$keras$layers$Embedding(input_dim = vocabulary_size, output_dim = as.integer(embed_dim)) |
|
|
156 |
} |
|
|
157 |
self$embed_dim <- as.integer(embed_dim) |
|
|
158 |
|
|
|
159 |
}, |
|
|
160 |
|
|
|
161 |
call = function(inputs) { |
|
|
162 |
if (self$embed_dim != 0) { |
|
|
163 |
inputs <- self$token_emb(inputs) |
|
|
164 |
} |
|
|
165 |
inputs + self$pe_matrix |
|
|
166 |
}, |
|
|
167 |
|
|
|
168 |
get_config = function() { |
|
|
169 |
config <- super$get_config() |
|
|
170 |
config$maxlen <- self$maxlen |
|
|
171 |
config$vocabulary_size <- self$vocabulary_size |
|
|
172 |
config$n <- self$n |
|
|
173 |
config$embed_dim <- self$embed_dim |
|
|
174 |
config$pe_matrix <- self$pe_matrix |
|
|
175 |
config |
|
|
176 |
} |
|
|
177 |
) |
|
|
178 |
|
|
|
179 |
if (load_r6) { |
|
|
180 |
return(layer_pos_sinusoid) |
|
|
181 |
} else { |
|
|
182 |
return(layer_pos_sinusoid(maxlen=maxlen, vocabulary_size=vocabulary_size, n=n, |
|
|
183 |
embed_dim = embed_dim)) |
|
|
184 |
} |
|
|
185 |
|
|
|
186 |
} |
|
|
187 |
|
|
|
188 |
|
|
|
189 |
#' Transformer block |
|
|
190 |
#' |
|
|
191 |
#' Create transformer block. Consists of self attention, dense layers, layer normalization, recurrent connection and dropout. |
|
|
192 |
#' |
|
|
193 |
#' @inheritParams create_model_transformer |
|
|
194 |
#' @param dropout_rate Rate to randomly drop out connections. |
|
|
195 |
#' @param load_r6 Whether to return the layer class. |
|
|
196 |
#' @examples |
|
|
197 |
#' |
|
|
198 |
#' \donttest{ |
|
|
199 |
#' library(keras) |
|
|
200 |
#' l <- layer_transformer_block_wrapper() |
|
|
201 |
#' } |
|
|
202 |
#' @returns A keras layer implementing a transformer block. |
|
|
203 |
#' @export |
|
|
204 |
layer_transformer_block_wrapper <- function(num_heads = 2, head_size = 4, dropout_rate = 0, ff_dim = 64, |
|
|
205 |
vocabulary_size = 4, load_r6 = FALSE, embed_dim = 64) { |
|
|
206 |
|
|
|
207 |
layer_transformer_block <- keras::new_layer_class( |
|
|
208 |
"layer_transformer_block", |
|
|
209 |
initialize = function(num_heads=2, head_size=4, dropout_rate=0, ff_dim=64L, vocabulary_size=4, embed_dim=64, ...) { |
|
|
210 |
super$initialize(...) |
|
|
211 |
self$num_heads <- num_heads |
|
|
212 |
self$head_size <- head_size |
|
|
213 |
self$dropout_rate <- dropout_rate |
|
|
214 |
self$ff_dim <- ff_dim |
|
|
215 |
self$embed_dim <- as.integer(embed_dim) |
|
|
216 |
self$vocabulary_size <- vocabulary_size |
|
|
217 |
self$att <- tensorflow::tf$keras$layers$MultiHeadAttention(num_heads=as.integer(num_heads), |
|
|
218 |
key_dim=as.integer(head_size)) |
|
|
219 |
|
|
|
220 |
self$ffn <- keras::keras_model_sequential() %>% keras::layer_dense(units=as.integer(ff_dim), activation="relu") %>% |
|
|
221 |
keras::layer_dense(units=ifelse(embed_dim == 0, as.integer(vocabulary_size), as.integer(embed_dim))) |
|
|
222 |
|
|
|
223 |
self$layernorm1 <- keras::layer_layer_normalization(epsilon=1e-6) |
|
|
224 |
self$layernorm2 <- keras::layer_layer_normalization(epsilon=1e-6) |
|
|
225 |
self$dropout1 <- keras::layer_dropout(rate=dropout_rate) |
|
|
226 |
self$dropout2 <- keras::layer_dropout(rate=dropout_rate) |
|
|
227 |
}, |
|
|
228 |
|
|
|
229 |
call = function(inputs) { |
|
|
230 |
attn_output <- self$att(inputs, inputs, inputs) |
|
|
231 |
attn_output <- self$dropout1(attn_output) |
|
|
232 |
out1 <- self$layernorm1(inputs + attn_output) |
|
|
233 |
ffn_output <- self$ffn(out1) |
|
|
234 |
ffn_output <- self$dropout2(ffn_output) |
|
|
235 |
seq_output <- self$layernorm2(out1 + ffn_output) |
|
|
236 |
return(seq_output) |
|
|
237 |
}, |
|
|
238 |
|
|
|
239 |
get_config = function() { |
|
|
240 |
config <- super$get_config() |
|
|
241 |
config$num_heads <- self$num_heads |
|
|
242 |
config$head_size <- self$head_size |
|
|
243 |
config$dropout_rate <- self$dropout_rate |
|
|
244 |
config$ff_dim <- self$ff_dim |
|
|
245 |
config$vocabulary_size <- self$vocabulary_size |
|
|
246 |
config$embed_dim <- self$embed_dim |
|
|
247 |
config |
|
|
248 |
} |
|
|
249 |
) |
|
|
250 |
|
|
|
251 |
if (load_r6) { |
|
|
252 |
return(layer_transformer_block) |
|
|
253 |
} else { |
|
|
254 |
return(layer_transformer_block(num_heads=num_heads, |
|
|
255 |
head_size=head_size, |
|
|
256 |
dropout_rate=dropout_rate, |
|
|
257 |
vocabulary_size=vocabulary_size, |
|
|
258 |
embed_dim=embed_dim, |
|
|
259 |
ff_dim=ff_dim)) |
|
|
260 |
} |
|
|
261 |
|
|
|
262 |
} |
|
|
263 |
|
|
|
264 |
|
|
|
265 |
layer_cosine_sim_wrapper <- function(load_r6 = FALSE) { |
|
|
266 |
|
|
|
267 |
layer_cosine_sim <- keras::new_layer_class( |
|
|
268 |
"layer_cosine_sim", |
|
|
269 |
|
|
|
270 |
initialize = function(...) { |
|
|
271 |
super$initialize(...) |
|
|
272 |
}, |
|
|
273 |
|
|
|
274 |
call = function(inputs) { |
|
|
275 |
cosine_similarity(vects=inputs) |
|
|
276 |
}, |
|
|
277 |
|
|
|
278 |
get_config = function() { |
|
|
279 |
config <- super$get_config() |
|
|
280 |
config |
|
|
281 |
} |
|
|
282 |
) |
|
|
283 |
|
|
|
284 |
if (load_r6) { |
|
|
285 |
return(layer_cosine_sim) |
|
|
286 |
} else { |
|
|
287 |
return(layer_cosine_sim()) |
|
|
288 |
} |
|
|
289 |
|
|
|
290 |
} |
|
|
291 |
|
|
|
292 |
|
|
|
293 |
layer_euc_dist_wrapper <- function(load_r6 = FALSE) { |
|
|
294 |
|
|
|
295 |
layer_euc_dist <- keras::new_layer_class( |
|
|
296 |
"layer_euc_dist", |
|
|
297 |
|
|
|
298 |
initialize = function(...) { |
|
|
299 |
super$initialize(...) |
|
|
300 |
}, |
|
|
301 |
|
|
|
302 |
call = function(inputs) { |
|
|
303 |
euclidean_distance(vects=inputs) |
|
|
304 |
}, |
|
|
305 |
|
|
|
306 |
get_config = function() { |
|
|
307 |
config <- super$get_config() |
|
|
308 |
config |
|
|
309 |
} |
|
|
310 |
) |
|
|
311 |
|
|
|
312 |
if (load_r6) { |
|
|
313 |
return(layer_euc_dist) |
|
|
314 |
} else { |
|
|
315 |
return(layer_euc_dist()) |
|
|
316 |
} |
|
|
317 |
|
|
|
318 |
} |