Diff of /R/custom_layers.R [000000] .. [409433]

Switch to unified view

a b/R/custom_layers.R
1
#' Aggregation layer 
2
#' 
3
#' Aggregate output of time distribution representations using sum, max and/or mean function.
4
#' 
5
#' @param load_r6 Whether to load the R6 layer class.
6
#' @param method At least one of the options, `"sum", "max"` or `"mean"`.
7
#' @param multi_in Whether to aggregate for a model with multiple inputs (and shared weights).
8
#' @examples
9
#' 
10
#' \donttest{
11
#' library(keras)
12
#' l <- layer_aggregate_time_dist_wrapper() 
13
#' }
14
#' @returns A keras layer applying pooling operation(s).
15
#' @export
16
layer_aggregate_time_dist_wrapper <- function(load_r6 = FALSE, method = "sum", multi_in = FALSE) {
17
  
18
  layer_aggregate_time_dist <- keras::new_layer_class(
19
    "layer_aggregate_time_dist",
20
    
21
    initialize = function(method, multi_in=FALSE, ...) {
22
      super$initialize(...)
23
      self$method <- method
24
      self$axis <- ifelse(multi_in, 0L, 1L)
25
      self$multi_in <- multi_in
26
    },
27
    
28
    call = function(inputs, mask = NULL) {
29
      out <- list()
30
      if ("sum" %in% self$method) {
31
        out <- c(out, tensorflow::tf$math$reduce_sum(inputs, axis = self$axis))
32
      }
33
      if ("mean" %in% self$method) {
34
        out <- c(out, tensorflow::tf$math$reduce_mean(inputs, axis = self$axis))
35
      }
36
      if ("max" %in% self$method) {
37
        out <- c(out, tensorflow::tf$math$reduce_max(inputs, axis = self$axis))
38
      }
39
      
40
      if (length(out) > 1) {
41
        out <- tensorflow::tf$concat(out, axis = -1L)
42
      } else {
43
        out <- out[[1]]
44
      }
45
      
46
      out
47
    },
48
    
49
    get_config = function() {
50
      config <- super$get_config()
51
      config$method <- self$method
52
      config$multi_in <- self$multi_in
53
      config
54
    }
55
  )
56
  
57
  if (load_r6) {
58
    return(layer_aggregate_time_dist)
59
  } else {
60
    return(layer_aggregate_time_dist(method = method, multi_in = multi_in))
61
  }
62
  
63
}
64
65
66
#' Layer for positional embedding
67
#' 
68
#' Positional encoding layer with learned embedding.
69
#' 
70
#' @inheritParams create_model_transformer
71
#' @param load_r6 Whether to load the R6 layer class.
72
#' @examples
73
#' 
74
#' \donttest{
75
#' library(keras)
76
#' l <- layer_pos_embedding_wrapper()
77
#' }
78
#' @returns A keras layer implementing positional embedding.
79
#' @export
80
layer_pos_embedding_wrapper <- function(maxlen = 100, vocabulary_size = 4, load_r6 = FALSE, embed_dim = 64) {
81
  
82
  layer_pos_embedding <- keras::new_layer_class(
83
    "layer_pos_embedding",
84
    
85
    initialize = function(maxlen=100, vocabulary_size=4, embed_dim=64, ...) {
86
      super$initialize(...)
87
      if (embed_dim != 0) {
88
        self$token_emb <- tensorflow::tf$keras$layers$Embedding(input_dim = as.integer(vocabulary_size),
89
                                                                output_dim = as.integer(embed_dim))
90
        self$position_embeddings <- tensorflow::tf$keras$layers$Embedding(input_dim = as.integer(maxlen),
91
                                                                          output_dim = as.integer(embed_dim))
92
      } else {
93
        self$position_embeddings <- tensorflow::tf$keras$layers$Embedding(input_dim = as.integer(maxlen),
94
                                                                          output_dim = as.integer(vocabulary_size))
95
      }
96
      self$embed_dim <- as.integer(embed_dim)
97
      self$maxlen <- as.integer(maxlen)
98
      self$vocabulary_size <- as.integer(vocabulary_size)
99
    },
100
    
101
    call = function(inputs) {
102
      positions <- tensorflow::tf$range(self$maxlen, dtype = "int32") 
103
      embedded_positions <- self$position_embeddings(positions)
104
      if (self$embed_dim != 0) inputs <- self$token_emb(inputs)
105
      inputs + embedded_positions
106
    },
107
    
108
    get_config = function() {
109
      config <- super$get_config()
110
      config$maxlen <- self$maxlen
111
      config$vocabulary_size <- self$vocabulary_size
112
      config$embed_dim <- self$embed_dim
113
      config
114
    }
115
  )
116
  
117
  if (load_r6) {
118
    return(layer_pos_embedding)
119
  } else {
120
    return(layer_pos_embedding(maxlen=maxlen, vocabulary_size=vocabulary_size, embed_dim=embed_dim))
121
  }
122
  
123
}
124
125
#' Layer for positional encoding
126
#' 
127
#' Positional encoding layer with sine/cosine matrix of different frequencies.
128
#' 
129
#' @inheritParams create_model_transformer
130
#' @param load_r6 Whether to load the R6 layer class.
131
#' @examples
132
#' 
133
#' \donttest{
134
#' library(keras)
135
#' l <- layer_pos_sinusoid_wrapper() 
136
#' }
137
#' @returns A keras layer implementing positional encoding using sine/cosine waves.
138
#' @export
139
layer_pos_sinusoid_wrapper <- function(maxlen = 100, vocabulary_size = 4, n = 10000, load_r6 = FALSE, embed_dim = 64) {
140
  
141
  layer_pos_sinusoid <- keras::new_layer_class(
142
    "layer_pos_sinusoid",
143
    initialize = function(maxlen, vocabulary_size, n, embed_dim, ...) {
144
      super$initialize(...)
145
      self$maxlen <- as.integer(maxlen)
146
      self$vocabulary_size <- vocabulary_size
147
      self$n <- as.integer(n)
148
      self$pe_matrix <- positional_encoding(seq_len = maxlen,
149
                                            d_model = ifelse(embed_dim == 0,
150
                                                             as.integer(vocabulary_size),
151
                                                             as.integer(embed_dim)),  
152
                                            n = n)
153
      
154
      if (embed_dim != 0) {
155
        self$token_emb <- tensorflow::tf$keras$layers$Embedding(input_dim = vocabulary_size, output_dim = as.integer(embed_dim))
156
      }
157
      self$embed_dim <- as.integer(embed_dim)
158
      
159
    },
160
    
161
    call = function(inputs) {
162
      if (self$embed_dim != 0) {
163
        inputs <- self$token_emb(inputs)
164
      } 
165
      inputs + self$pe_matrix
166
    },
167
    
168
    get_config = function() {
169
      config <- super$get_config()
170
      config$maxlen <- self$maxlen
171
      config$vocabulary_size <- self$vocabulary_size
172
      config$n <- self$n
173
      config$embed_dim <- self$embed_dim
174
      config$pe_matrix <- self$pe_matrix
175
      config
176
    }
177
  )
178
  
179
  if (load_r6) {
180
    return(layer_pos_sinusoid)
181
  } else {
182
    return(layer_pos_sinusoid(maxlen=maxlen, vocabulary_size=vocabulary_size, n=n,
183
                              embed_dim = embed_dim))
184
  }
185
  
186
}
187
188
189
#' Transformer block
190
#' 
191
#' Create transformer block. Consists of self attention, dense layers, layer normalization, recurrent connection and dropout.
192
#' 
193
#' @inheritParams create_model_transformer
194
#' @param dropout_rate Rate to randomly drop out connections.
195
#' @param load_r6 Whether to return the layer class.
196
#' @examples
197
#' 
198
#' \donttest{
199
#' library(keras)
200
#' l <- layer_transformer_block_wrapper()
201
#' }
202
#' @returns A keras layer implementing a transformer block.
203
#' @export
204
layer_transformer_block_wrapper <- function(num_heads = 2, head_size = 4, dropout_rate = 0, ff_dim = 64,  
205
                                            vocabulary_size = 4, load_r6 = FALSE, embed_dim = 64) {
206
  
207
  layer_transformer_block <- keras::new_layer_class(
208
    "layer_transformer_block",
209
    initialize = function(num_heads=2, head_size=4, dropout_rate=0, ff_dim=64L, vocabulary_size=4, embed_dim=64, ...) {
210
      super$initialize(...)
211
      self$num_heads <- num_heads
212
      self$head_size <- head_size
213
      self$dropout_rate <- dropout_rate
214
      self$ff_dim <- ff_dim
215
      self$embed_dim <- as.integer(embed_dim)
216
      self$vocabulary_size <- vocabulary_size
217
      self$att <- tensorflow::tf$keras$layers$MultiHeadAttention(num_heads=as.integer(num_heads),
218
                                                                 key_dim=as.integer(head_size))
219
      
220
      self$ffn <- keras::keras_model_sequential() %>% keras::layer_dense(units=as.integer(ff_dim), activation="relu") %>%
221
        keras::layer_dense(units=ifelse(embed_dim == 0, as.integer(vocabulary_size), as.integer(embed_dim)))
222
      
223
      self$layernorm1 <- keras::layer_layer_normalization(epsilon=1e-6)
224
      self$layernorm2 <- keras::layer_layer_normalization(epsilon=1e-6)
225
      self$dropout1 <- keras::layer_dropout(rate=dropout_rate)
226
      self$dropout2 <- keras::layer_dropout(rate=dropout_rate)
227
    },
228
    
229
    call = function(inputs) {
230
      attn_output <- self$att(inputs, inputs, inputs)
231
      attn_output <- self$dropout1(attn_output)
232
      out1 <- self$layernorm1(inputs + attn_output)
233
      ffn_output <- self$ffn(out1)
234
      ffn_output <- self$dropout2(ffn_output)
235
      seq_output <- self$layernorm2(out1 + ffn_output)
236
      return(seq_output)
237
    },
238
    
239
    get_config = function() {
240
      config <- super$get_config()
241
      config$num_heads <- self$num_heads
242
      config$head_size <- self$head_size
243
      config$dropout_rate <- self$dropout_rate
244
      config$ff_dim <- self$ff_dim
245
      config$vocabulary_size <- self$vocabulary_size
246
      config$embed_dim <- self$embed_dim
247
      config
248
    }
249
  )
250
  
251
  if (load_r6) {
252
    return(layer_transformer_block)
253
  } else {
254
    return(layer_transformer_block(num_heads=num_heads,
255
                                   head_size=head_size,
256
                                   dropout_rate=dropout_rate,
257
                                   vocabulary_size=vocabulary_size,
258
                                   embed_dim=embed_dim,
259
                                   ff_dim=ff_dim))
260
  }
261
  
262
}
263
264
265
layer_cosine_sim_wrapper <- function(load_r6 = FALSE) {
266
  
267
  layer_cosine_sim <- keras::new_layer_class(
268
    "layer_cosine_sim",
269
    
270
    initialize = function(...) {
271
      super$initialize(...)
272
    },
273
    
274
    call = function(inputs) {
275
      cosine_similarity(vects=inputs)
276
    },
277
    
278
    get_config = function() {
279
      config <- super$get_config()
280
      config
281
    }
282
  )
283
  
284
  if (load_r6) {
285
    return(layer_cosine_sim)
286
  } else {
287
    return(layer_cosine_sim())
288
  }
289
  
290
}
291
292
293
layer_euc_dist_wrapper <- function(load_r6 = FALSE) {
294
  
295
  layer_euc_dist <- keras::new_layer_class(
296
    "layer_euc_dist",
297
    
298
    initialize = function(...) {
299
      super$initialize(...)
300
    },
301
    
302
    call = function(inputs) {
303
      euclidean_distance(vects=inputs)
304
    },
305
    
306
    get_config = function() {
307
      config <- super$get_config()
308
      config
309
    }
310
  )
311
  
312
  if (load_r6) {
313
    return(layer_euc_dist)
314
  } else {
315
    return(layer_euc_dist())
316
  }
317
  
318
}