Switch to unified view

a b/R/create_model_genomenet.R
1
#' @title Create GenomeNet Model with Given Architecture Parameters
2
#'
3
#' @param maxlen (integer `numeric(1)`)\cr
4
#'   Input sequence length.
5
#' @param learning_rate (`numeric(1)`)\cr
6
#'   Used by the `keras` optimizer that is specified by `optimizer`.
7
#' @param number_of_cnn_layers (integer `numeric(1)`)\cr
8
#'   Target number of CNN-layers to use in total. If `number_of_cnn_layers` is
9
#'   greater than `conv_block_count`, then the effective number of CNN layers
10
#'   is set to the closest integer that is divisible by `conv_block_count`.
11
#' @param conv_block_count (integer `numeric(1)`)\cr
12
#'   Number of convolutional blocks, into which the CNN layers are divided.
13
#'   If this is greater than `number_of_cnn_layers`, then it is set to
14
#'   `number_of_cnn_layers` (the convolutional block size will then be 1).\cr
15
#'   Convolutional blocks are used when `model_type` is `"gap"` (the output of
16
#'   the last `conv_block_count * (1 - skip_block_fraction)` blocks is
17
#'   fed to global average pooling and then concatenated), and also when
18
#'   `residual_block` is `TRUE` (the number of filters is held constant within
19
#'   blocks). If neither of these is the case, `conv_block_count` has little
20
#'   effect besides the fact that `number_of_cnn_layers` is set to the closest
21
#'   integer divisible by `conv_block_count`.
22
#' @param kernel_size_0 (`numeric(1)`)\cr
23
#'   Target CNN kernel size of the first CNN-layer. Although CNN kernel size is
24
#'   always an integer, this value can be non-integer, potentially affecting
25
#'   the kernel-sizes of intermediate layers (which are geometrically
26
#'   interpolated between `kernel_size_0` and `kernel_size_end`).
27
#' @param kernel_size_end (`numeric(1)`)\cr
28
#'   Target CNN kernel size of the last CNN-layer; ignored if only one
29
#'   CNN-layer is used (i.e. if `number_of_cnn_layers` is 1). Although CNN
30
#'   kernel size is always an integer, this value can be non-integer,
31
#'   potentially affecting the kernel-sizes of intermediate layers (which are
32
#'   geometrically interpolated between `kernel_size_0` and `kernel_size_end`).
33
#' @param filters_0 (`numeric(1)`)\cr
34
#'   Target filter number of the first CNN-layer. Although CNN filter number is
35
#'   always an integer, this value can be non-integer, potentially affecting
36
#'   the filter-numbers of intermediate layers (which are geometrically
37
#'   interpolated between `filters_0` and `filters_end`).\cr
38
#'   Note that filters are constant within convolutional blocks when
39
#'   `residual_block` is `TRUE`.
40
#' @param filters_end (`numeric(1)`)\cr
41
#'   Target filter number of the last CNN-layer; ignored if only one CNN-layer
42
#'   is used (i.e. if `number_of_cnn_layers` is 1). Although CNN filter number
43
#'   is always an integer, this value can be non-integer, potentially affecting
44
#'   the filter-numbers of intermediate dilation_rates layers (which are geometrically
45
#'   interpolated between `kernel_size_0` and `kernel_size_end`).\cr
46
#'   Note that filters are constant within convolutional blocks when
47
#'   `residual_block` is `TRUE`.
48
#' @param dilation_end (`numeric(1)`)\cr
49
#'   Dilation of the last CNN-layer *within each block*. Dilation rates within
50
#'   each convolutional block grows exponentially from 1 (no dilation) for the
51
#'   first CNN-layer to each block, to this value. Set to 1 (default) to
52
#'   disable dilation.
53
#' @param max_pool_end (`numeric(1)`)\cr
54
#'   Target total effective pooling of CNN part of the network. "Effective
55
#'   pooling" here is the product of the pooling rates of all previous
56
#'   CNN-layers. A network with three CNN-layers, all of which are followed
57
#'   by pooling layers of size 2, therefore has effective pooling of 8, with
58
#'   the effective pooling at intermediate positions being 1 (beginning), 2,
59
#'   and 4. Effective pooling after each layer is set to the power of 2 that is,
60
#'   on a logarithmic scale, closest to
61
#'   `max_pool_end ^ (<CNN layer number> / <total number of CNN layers>)`.
62
#'   Therefore, even though the total effective pooling size of the whole
63
#'   CNN part of the network will always be a power of 2, having different,
64
#'   possibly non-integer values of `max_pool_end`, will still lead to
65
#'   different networks.
66
#' @param dense_layer_num (integer `numeric(1)`)\cr
67
#'   number of dense layers at the end of the network, not counting the output
68
#'   layer.
69
#' @param dense_layer_units (integer `numeric(1)`)\cr
70
#'   Number of units in each dense layer, except for the output layer.
71
#' @param dropout (`numeric(1)`)\cr
72
#'   Dropout rate of dense layers, except for the output layer.
73
#' @param batch_norm_momentum (`numeric(1)`)\cr
74
#'   `momentum`-parameter of `layer_batch_normalization` layers used in the
75
#'   convolutional part of the network.
76
#' @param leaky_relu_alpha (`numeric(1)`)\cr
77
#'   `alpha`-parameter of the `layer_activation_leaky_relu` activation layers
78
#'   used in the convolutional part of the network.
79
#' @param dense_activation (`character(1)`)\cr
80
#'   Which activation function to use for dense layers. Should be one of
81
#'   `"relu"`, `"sigmoid"`, or `"tanh"`.
82
#' @param skip_block_fraction (`numeric(1)`)\cr
83
#'   What fraction of the first convolutional blocks to skip.
84
#'   Only used when `model_type` is `"gap"`.
85
#' @param residual_block (`logical(1)`)\cr
86
#'   Whether to use residual layers in the convolutional part of the network.
87
#' @param reverse_encoding (`logical(1)`)\cr
88
#'   Whether the network should have a second input for reverse-complement
89
#'   sequences.
90
#' @param optimizer (`character(1)`)\cr
91
#'   Which optimizer to use. One of `"adam"`, `"adagrad"`, `"rmsprop"`, or `"sgd"`.
92
#' @param model_type (`character(1)`)\cr
93
#'   Whether to use the global average pooling (`"gap"`) or recurrent
94
#'   (`"recurrent"`) model type.
95
#' @param recurrent_type (`character(1)`)\cr
96
#'   Which recurrent network type to use. One of `"lstm"` or `"gru"`.
97
#'   Only used when `model_type` is `"recurrent"`.
98
#' @param recurrent_layers (integer `numeric(1)`)\cr
99
#'   Number of recurrent layers.
100
#'   Only used when `model_type` is `"recurrent"`.
101
#' @param recurrent_bidirectional (`logical(1)`)\cr
102
#'   Whether to use bidirectional recurrent layers.
103
#'   Only used when `model_type` is `"recurrent"`.
104
#' @param recurrent_units (integer `numeric(1)`)\cr
105
#'   Number of units in each recurrent layer.
106
#'   Only used when `model_type` is `"recurrent"`.
107
#' @param vocabulary_size (integer `numeric(1)`)\cr
108
#'   Vocabulary size of (one-hot encoded) input strings. This determines the
109
#'   input tensor shape, together with `maxlen`.
110
#' @param last_layer_activation Either `"sigmoid"` or `"softmax"`.
111
#' @param loss_fn Either `"categorical_crossentropy"` or `"binary_crossentropy"`. If `label_noise_matrix` given, will use custom `"noisy_loss"`.
112
#' @param auc_metric Whether to add AUC metric.
113
#' @param num_targets (integer `numeric(1)`)\cr
114
#'   Number of output units to create.
115
#' @return A keras model.
116
#' @inheritParams create_model_lstm_cnn
117
#' @examplesIf reticulate::py_module_available("tensorflow")
118
#' model <- create_model_genomenet()
119
#' model
120
#' 
121
#' @returns A keras model implementing genomenet architecture.
122
#' @export
123
create_model_genomenet <- function(
124
    maxlen = 300,
125
    learning_rate = 0.001,
126
    number_of_cnn_layers = 1,
127
    conv_block_count = 1,
128
    kernel_size_0 = 16,
129
    kernel_size_end = 16,
130
    filters_0 = 256,
131
    filters_end = 512,
132
    dilation_end = 1,
133
    max_pool_end = 1,
134
    dense_layer_num = 1,
135
    dense_layer_units = 100,
136
    dropout_lstm = 0,
137
    dropout = 0,
138
    batch_norm_momentum = 0.8,
139
    leaky_relu_alpha = 0,
140
    dense_activation = "relu",
141
    skip_block_fraction = 0,
142
    residual_block = FALSE,
143
    reverse_encoding = FALSE,
144
    optimizer = "adam",
145
    model_type = "gap",
146
    recurrent_type = "lstm",
147
    recurrent_layers = 1,
148
    recurrent_bidirectional = FALSE,
149
    recurrent_units = 100,
150
    vocabulary_size = 4,
151
    last_layer_activation = "softmax",
152
    loss_fn = "categorical_crossentropy",
153
    auc_metric = FALSE,
154
    num_targets = 2,
155
    model_seed = NULL,
156
    bal_acc = FALSE,
157
    f1_metric = FALSE,
158
    mixed_precision = FALSE,
159
    mirrored_strategy = NULL) {
160
  
161
  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
162
  
163
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
164
  if (mirrored_strategy) {
165
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
166
    with(mirrored_strategy$scope(), { 
167
      argg <- as.list(environment())
168
      argg$mirrored_strategy <- FALSE
169
      model <- do.call(create_model_genomenet, argg)
170
    })
171
    return(model)
172
  }
173
  
174
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
175
  stopifnot(maxlen > 0 & maxlen %% 1 == 0)
176
  stopifnot(learning_rate > 0)
177
  stopifnot(number_of_cnn_layers >= 1 & number_of_cnn_layers %% 1 == 0)
178
  stopifnot(conv_block_count >= 1 & conv_block_count %% 1 == 0)
179
  stopifnot(kernel_size_0 > 0)
180
  stopifnot(kernel_size_end > 0)
181
  stopifnot(filters_0 > 0)
182
  stopifnot(filters_end > 0)
183
  stopifnot(dilation_end >= 1)
184
  stopifnot(max_pool_end >= 1)
185
  stopifnot(dense_layer_num >= 0 & dense_layer_num %% 1 == 0)
186
  stopifnot(dense_layer_units >= 0 & dense_layer_units %% 1 == 0)
187
  stopifnot(0 <= dropout_lstm & dropout_lstm <= 1)
188
  stopifnot(0 <= batch_norm_momentum & batch_norm_momentum <= 1)
189
  
190
  stopifnot(0 <= leaky_relu_alpha& leaky_relu_alpha <= 1)
191
  dense_activation <- match.arg(dense_activation, c("relu", "sigmoid", "tanh"))
192
  stopifnot(0 <= skip_block_fraction & skip_block_fraction <= 1)
193
  
194
  model_type = match.arg(model_type, c("gap", "recurrent"))
195
  
196
  stopifnot(isTRUE(residual_block) || isFALSE(residual_block))
197
  stopifnot(isTRUE(residual_block) || isFALSE(residual_block))
198
  
199
  optimizer <- match.arg(optimizer, c("adam", "adagrad", "rmsprop", "sgd"))
200
  recurrent_type <- match.arg(recurrent_type, c("lstm", "gru"))
201
  stopifnot(recurrent_layers >= 1 & recurrent_layers %% 1 == 0)
202
  stopifnot(isTRUE(recurrent_bidirectional) || isFALSE(recurrent_bidirectional))
203
  stopifnot(recurrent_units >= 1 && recurrent_units %% 1 == 0)
204
  
205
  stopifnot(vocabulary_size >= 2 & vocabulary_size %% 1 == 0)
206
  stopifnot(num_targets >= 2 & num_targets %% 1 == 0)
207
  
208
  if (number_of_cnn_layers < conv_block_count){
209
    conv_block_size <- 1
210
    conv_block_count <- number_of_cnn_layers
211
  } else {
212
    conv_block_size <- round(number_of_cnn_layers / conv_block_count)
213
    number_of_cnn_layers <- conv_block_size * conv_block_count
214
  }
215
  
216
  if (residual_block) {
217
    filters_exponent <- rep(seq(from = 0, to = 1, length.out = conv_block_count), each = conv_block_size)
218
  } else {
219
    filters_exponent <- seq(from = 0, to = 1, length.out = number_of_cnn_layers)
220
  }
221
  
222
  filters <- ceiling(filters_0 * (filters_end / filters_0) ^ filters_exponent)
223
  
224
  kernel_size <- ceiling(kernel_size_0 * (kernel_size_end / kernel_size_0) ^ seq(from = 0, to = 1, length.out = number_of_cnn_layers))
225
  
226
  dilation_rates <- round(dilation_end ^ seq(0, 1, length.out = conv_block_size))
227
  dilation_rates <- rep(dilation_rates, conv_block_count)
228
  
229
  max_pool_divider <- round(log2(max_pool_end) * seq(0, 1, length.out = number_of_cnn_layers + 1))
230
  max_pool_array <- 2 ^ diff(max_pool_divider)
231
  
232
  input_tensor <- keras::layer_input(shape = c(maxlen, vocabulary_size))
233
  output_tensor <- input_tensor
234
  
235
  output_collection <- list()
236
  
237
  for (i in seq_len(number_of_cnn_layers)) {
238
    layer <- keras::layer_conv_1d(kernel_size = kernel_size[i],
239
                                  padding = "same",
240
                                  activation = "linear",
241
                                  filters = filters[i],
242
                                  dilation_rate = dilation_rates[i])
243
    
244
    output_tensor <- output_tensor %>% layer
245
    
246
    if (model_type == "gap" && i %% conv_block_size == 0) {
247
      output_collection[[length(output_collection) + 1]] <- keras::layer_global_average_pooling_1d(output_tensor)
248
    }
249
    
250
    if (max_pool_array[i] > 1) {
251
      layer <- keras::layer_max_pooling_1d(pool_size = max_pool_array[i], padding = "same")
252
      output_tensor <- output_tensor %>% layer
253
    }
254
    
255
    if (residual_block) {
256
      if (i > 1) {
257
        if (max_pool_array[i] > 1) {
258
          layer <- keras::layer_average_pooling_1d(pool_size = max_pool_array[i], padding = "same")
259
          residual_layer <- residual_layer %>% layer
260
        }
261
        if (filters[i - 1] != filters[i]) {
262
          layer <- keras::layer_conv_1d(kernel_size = 1,
263
                                        padding = "same",
264
                                        activation = "linear",
265
                                        filters = filters[i]
266
          )
267
          residual_layer <- residual_layer %>% layer
268
        }
269
        
270
        output_tensor <- keras::layer_add(list(output_tensor, residual_layer))
271
      }
272
      
273
      residual_layer <- output_tensor
274
    }
275
    
276
    layer <- keras::layer_batch_normalization(momentum = batch_norm_momentum)
277
    output_tensor <- output_tensor %>% layer
278
    
279
    layer <- keras::layer_activation_leaky_relu(alpha = leaky_relu_alpha)
280
    
281
    output_tensor <- output_tensor %>% layer
282
  }
283
  
284
  if (model_type == "gap") {
285
    # skip 'skip_block_fraction' of outputs we collected --> use the last (1 - skip_block_fraction) part of them
286
    use_blocks <- ceiling((1 - skip_block_fraction) * length(output_collection))
287
    use_blocks <- max(use_blocks, 1)
288
    
289
    output_collection <- utils::tail(output_collection, use_blocks)
290
    
291
    # concatenate outputs from blocks (that we are using)
292
    if (length(output_collection) > 1) {
293
      output_tensor <- keras::layer_concatenate(output_collection)
294
    } else {
295
      output_tensor <- output_collection[[1]]
296
    }
297
  } else {
298
    # recurrent model
299
    
300
    recurrent_layer_constructor = switch(recurrent_type,
301
                                         lstm = keras::layer_lstm,
302
                                         gru = keras::layer_gru
303
    )
304
    
305
    for (i in seq_len(recurrent_layers)) {
306
      if (recurrent_bidirectional) {
307
        layer <- keras::bidirectional(
308
          layer = recurrent_layer_constructor(units = recurrent_units, return_sequences = (i != recurrent_layers)))
309
      } else {
310
        layer <- recurrent_layer_constructor(units = recurrent_units, return_sequences = (i != recurrent_layers))
311
      }
312
      output_tensor <- output_tensor %>% layer
313
    }
314
  }
315
  
316
  for (i in seq_len(dense_layer_num)) {
317
    layer <- keras::layer_dropout(rate = dropout)
318
    output_tensor <- output_tensor %>% layer
319
    layer <- keras::layer_dense(units = dense_layer_units, activation = dense_activation)
320
    output_tensor <- output_tensor %>% layer
321
  }
322
  
323
  output_tensor <- output_tensor %>%
324
    keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32")
325
  
326
  # define "model" as the mapping from input_tensor to output_tensor
327
  model <- keras::keras_model(inputs = input_tensor, outputs = output_tensor)
328
  
329
  if (reverse_encoding) {
330
    input_tensor_reversed <- keras::layer_input(shape = c(maxlen, vocabulary_size))
331
    
332
    # define "output_tensor_reversed" as what comes out of input_tensor_reversed when model() is applied to it
333
    output_tensor_reversed <- model(input_tensor_reversed)
334
    
335
    # define a new model: model from above (with input_tensor, output_tensor), and
336
    model <- keras::keras_model(
337
      inputs = c(input_tensor, input_tensor_reversed),
338
      outputs = keras::layer_average(c(output_tensor, output_tensor_reversed))
339
    )
340
  }
341
  
342
  # assign optimization method
343
  keras_optimizer <- set_optimizer(optimizer, learning_rate) 
344
  
345
  #add metrics
346
  if (loss_fn == "binary_crossentropy") {
347
    model_metrics <- c(tensorflow::tf$keras$metrics$BinaryAccuracy(name = "acc"))
348
  } else {
349
    model_metrics <- c("acc")
350
  } 
351
  
352
  cm_dir <-
353
    file.path(tempdir(), paste(sample(letters, 7), collapse = ""))
354
  while (dir.exists(cm_dir)) {
355
    cm_dir <-
356
      file.path(tempdir(), paste(sample(letters, 7), collapse = ""))
357
  }
358
  dir.create(cm_dir)
359
  model$cm_dir <- cm_dir
360
  
361
  if (loss_fn == "categorical_crossentropy") {
362
    if (bal_acc) {
363
      macro_average_cb <- balanced_acc_wrapper(num_targets, cm_dir)
364
      model_metrics <- c(macro_average_cb, "acc")
365
    }
366
    
367
    if (f1_metric) {
368
      f1 <- f1_wrapper(num_targets)
369
      model_metrics <- c(model_metrics, f1)
370
    }
371
  }
372
  
373
  if (auc_metric) {
374
    auc <-
375
      auc_wrapper(model_output_size = dense_layer_units[length(dense_layer_units)],
376
                  loss = loss_fn)
377
    model_metrics <- c(model_metrics, auc)
378
  }
379
  
380
  model %>% keras::compile(loss = loss_fn, optimizer = keras_optimizer, metrics = model_metrics)
381
  
382
  argg <- c(as.list(environment()))
383
  model <- add_hparam_list(model, argg)
384
  
385
  model
386
}
387
388
389
# #' Load pretrained Genomenet model
390
# #'
391
# #' Classification model with labels "bacteria", "virus-no-phage","virus-phage".
392
# #' TODO: add link to paper
393
# #'
394
# #' @inheritParams create_model_lstm_cnn
395
# #' @param maxlen Model input size. Either 150 or 10000.
396
# #' @param learning_rate Learning rate for optimizer. If compile is TRUE and learning_rate is NULL,
397
# #' will use learning rate from previous training.
398
# #' @export
399
# load_model_self_genomenet <- function(maxlen, compile = FALSE, optimizer = "adam",
400
#                                       learning_rate = NULL) {
401
#   
402
#   stopifnot(any(maxlen == c(150,10000)))
403
#   
404
#   if (maxlen == 150) {
405
#     load(model_self_genomenet_maxlen_150)
406
#     model <- keras::unserialize_model(model_self_genomenet_maxlen_150, compile = FALSE)
407
#   }
408
#   
409
#   if (maxlen == 10000) {
410
#     load("data/self_genomenet_model_maxlen_10k.rda")
411
#     #data(model_self_genomenet_maxlen_10k)
412
#     model <- keras::unserialize_model(model_self_genomenet_maxlen_10k, compile = FALSE)
413
#   }
414
#   
415
#   if (is.null(learning_rate)) {
416
#     if (maxlen == 150) learning_rate <- 0.00039517784549691
417
#     if (maxlen == 10000) learning_rate <- 8.77530464905713e-05
418
#   }
419
#   
420
#   if (compile) {
421
#     keras_optimizer <- set_optimizer(optimizer, learning_rate)
422
#     model %>% keras::compile(loss = "categorical_crossentropy", optimizer = keras_optimizer, metrics = "acc")
423
#   }
424
#   
425
#   return(model)
426
# }