a b/R/create_model_utils.R
1
get_layer_names <- function(model) {
2
  
3
  n <- length(model$layers)
4
  layer_names <- vector("character", n)
5
  for (i in 1:n) {
6
    layer_names[i] <- model$layers[[i]]$name
7
  }
8
  layer_names
9
} 
10
11
#' Compile model
12
#' 
13
#' @inheritParams create_model_lstm_cnn
14
#' @param model A keras model.
15
#' @examplesIf reticulate::py_module_available("tensorflow")
16
#' 
17
#' model <- create_model_lstm_cnn(layer_lstm = 8, compile = FALSE)
18
#' model <- compile_model(model = model,
19
#'                        solver = 'adam',
20
#'                       learning_rate = 0.01,
21
#'                        loss_fn = 'categorical_crossentropy')
22
#' 
23
#' @returns A compiled keras model.
24
#' @export
25
compile_model <- function(model, solver, learning_rate, loss_fn, label_smoothing = 0,
26
                          num_output_layers = 1, label_noise_matrix = NULL,
27
                          bal_acc = FALSE, f1_metric = FALSE, auc_metric = FALSE,
28
                          layer_dense = NULL) {
29
  
30
  optimizer <- set_optimizer(solver, learning_rate) 
31
  if (num_output_layers == 1) num_targets <- model$output_shape[[2]]
32
  
33
  #add metrics
34
  if (loss_fn == "binary_crossentropy") {
35
    model_metrics <- c(tensorflow::tf$keras$metrics$BinaryAccuracy(name = "acc"))
36
  } else if (loss_fn == "sparse_categorical_crossentropy") {
37
    model_metrics <- tensorflow::tf$keras$metrics$SparseCategoricalAccuracy(name = "acc")
38
  } else {
39
    model_metrics <- c("acc")
40
  } 
41
  
42
  cm_dir <- NULL
43
  if (num_output_layers == 1) {
44
    cm_dir <- file.path(tempdir(), paste(sample(letters, 7), collapse = ""))
45
    while (dir.exists(cm_dir)) {
46
      cm_dir <- file.path(tempdir(), paste(sample(letters, 7), collapse = ""))
47
    }
48
    dir.create(cm_dir)
49
    model$cm_dir <- cm_dir
50
    
51
    if (loss_fn == "sparse_categorical_crossentropy") {
52
      
53
      if (length(model$outputs) == 1 & length(model$output_shape) == 3) {
54
        loss_fn <- tensorflow::tf$keras$losses$SparseCategoricalCrossentropy(
55
          reduction=tensorflow::tf$keras$losses$Reduction$NONE
56
        )
57
      }
58
    }
59
    
60
    if (loss_fn == "categorical_crossentropy") {
61
      
62
      if (length(model$outputs) == 1 & length(model$output_shape) == 3) {
63
        loss_fn <- tensorflow::tf$keras$losses$CategoricalCrossentropy(
64
          label_smoothing=label_smoothing,
65
          reduction=tensorflow::tf$keras$losses$Reduction$NONE,
66
          name='categorical_crossentropy'
67
        )
68
      }
69
    }  
70
    
71
    if (loss_fn == "categorical_crossentropy" | loss_fn == "sparse_categorical_crossentropy") {
72
      
73
      if (bal_acc) {
74
        macro_average_cb <- balanced_acc_wrapper(num_targets, cm_dir)
75
        model_metrics <- c(macro_average_cb, "acc")
76
      }
77
      
78
      if (f1_metric) {
79
        f1 <- f1_wrapper(num_targets)
80
        model_metrics <- c(model_metrics, f1)
81
      }
82
    }
83
    
84
    if (auc_metric) {
85
      auc <- auc_wrapper(model_output_size = layer_dense[length(layer_dense)],
86
                         loss = loss_fn)
87
      model_metrics <- c(model_metrics, auc)
88
    }
89
    
90
  }
91
  
92
  if (label_smoothing > 0 & !is.null(label_noise_matrix)) {
93
    stop("Can not use label smoothing and label noise at the same time. Either set label_smoothing = 0 or label_noise_matrix = NULL")
94
  }
95
  
96
  if (label_smoothing > 0) {
97
    if (loss_fn == "categorical_crossentropy") {
98
      smooth_loss <- tensorflow::tf$losses$CategoricalCrossentropy(label_smoothing = label_smoothing, name = "smooth_loss")
99
    }
100
    if (loss_fn == "binary_crossentropy") {
101
      smooth_loss <- tensorflow::tf$losses$BinaryCrossentropy(label_smoothing = label_smoothing, name = "smooth_loss")
102
    }
103
    model %>% keras::compile(loss = smooth_loss,
104
                             optimizer = optimizer, metrics = model_metrics)
105
  } else if (!is.null(label_noise_matrix)) {
106
    row_sums <- rowSums(label_noise_matrix)
107
    if (!all(row_sums == 1)) {
108
      warning("Sum of noise matrix rows don't add up to 1")
109
    }
110
    noisy_loss <- noisy_loss_wrapper(solve(label_noise_matrix))
111
    model %>% keras::compile(loss =  noisy_loss,
112
                             optimizer = optimizer, metrics = model_metrics)
113
  } else {
114
    model %>% keras::compile(loss = loss_fn,
115
                             optimizer = optimizer, metrics = model_metrics)
116
  }
117
  
118
  model
119
  
120
}
121
122
123
#' Load checkpoint 
124
#' 
125
#' Load checkpoint from directory. Chooses best checkpoint based on some condition. Condition
126
#' can be best accuracy, best loss, last epoch number or a specified epoch number.
127
#' 
128
#' @inheritParams create_model_lstm_cnn
129
#' @param cp_path A directory containing checkpoints or a single checkpoint file. 
130
#' If a directory, choose checkpoint based on `cp_filter` or `ep_index`.
131
#' @param cp_filter Condition to choose checkpoint if `cp_path` is a directory.
132
#' Either "acc" for best validation accuracy, "loss" for best validation loss or "last_ep" for last epoch.
133
#' @param ep_index Load checkpoint from specific epoch number. If not `NULL`, has priority over `cp_filter`.
134
#' @param compile Whether to load compiled model.
135
#' @param re_compile Whether to compile model with parameters from `learning_rate`,
136
#' `solver` and `loss`.  
137
#' @param add_custom_object Named list of custom objects.
138
#' @param verbose Whether to print chosen checkpoint path.
139
#' @param loss Loss function. Only used if model gets compiled.
140
#' @param margin Margin for contrastive loss, see \link{loss_cl}.
141
#' @examples
142
#' \donttest{
143
#' library(keras)
144
#' model <- create_model_lstm_cnn(layer_lstm = 8)
145
#' checkpoint_folder <- tempfile()
146
#' dir.create(checkpoint_folder)
147
#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.007-val_loss11.07-val_acc0.6.hdf5'))
148
#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.019-val_loss8.74-val_acc0.7.hdf5'))
149
#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.025-val_loss0.03-val_acc0.8.hdf5'))
150
#' model <- load_cp(cp_path = checkpoint_folder, cp_filter = "last_ep")
151
#' }
152
#' @returns A keras model loaded from a checkpoint.
153
#' @export
154
load_cp <- function(cp_path, cp_filter = "last_ep", ep_index = NULL, compile = FALSE,
155
                    learning_rate = 0.01, solver = "adam", re_compile = FALSE,
156
                    loss = "categorical_crossentropy",
157
                    add_custom_object = NULL, margin = 1,
158
                    verbose = TRUE, mirrored_strategy = FALSE) {
159
  
160
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
161
  if (mirrored_strategy) {
162
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
163
    with(mirrored_strategy$scope(), { 
164
      argg <- as.list(environment())
165
      argg$mirrored_strategy <- FALSE
166
      model <- do.call(load_cp, argg)
167
    })
168
    return(model)
169
  }
170
  
171
  # custom objects to load transformer architecture
172
  custom_objects <- list(
173
    "layer_pos_embedding" = layer_pos_embedding_wrapper(),
174
    "layer_pos_sinusoid" = layer_pos_sinusoid_wrapper(),
175
    "layer_transformer_block" = layer_transformer_block_wrapper(),
176
    "layer_euc_dist" = layer_euc_dist_wrapper(),
177
    "layer_cosine_sim" = layer_cosine_sim_wrapper(),
178
    "layer_aggregate_time_dist" = layer_aggregate_time_dist_wrapper(),
179
    "loss_cl_margin___margin_" = loss_cl(margin=margin)
180
  )
181
  
182
  if (!is.null(add_custom_object)) {
183
    for (i in 1:length(add_custom_object)) {
184
      custom_objects[[names(add_custom_object)[i]]] <- add_custom_object[[i]]
185
    }
186
  }
187
  
188
  cp <- get_cp(cp_path = cp_path, cp_filter = cp_filter,
189
               ep_index = ep_index, verbose = verbose) 
190
  
191
  model <- keras::load_model_hdf5(cp, compile = compile, custom_objects = custom_objects)
192
  
193
  if (re_compile) {
194
    optimizer <- set_optimizer(solver, learning_rate)
195
    model %>% keras::compile(loss = loss,
196
                             optimizer = optimizer,
197
                             metrics = model$metrics)
198
  }
199
  
200
  return(model)
201
  
202
}
203
204
get_cp  <- function(cp_path, cp_filter = "last_ep", ep_index = NULL, verbose = TRUE) {
205
  
206
  if (!is.null(cp_filter)) {
207
    stopifnot(cp_filter %in% c("acc", "loss", "last_ep"))
208
    if (!is.null(ep_index)) {
209
      cp_filter <- NULL
210
    }
211
  } 
212
  
213
  is_directory <- dir.exists(cp_path)
214
  if (is_directory) {
215
    cps <- list.files(cp_path, full.names = TRUE)
216
    files_basename <- basename(cps)
217
    stopifnot(xor(is.null(cp_filter), is.null(ep_index)))
218
  } else {
219
    stopifnot(file.exists(cp_path))
220
    cp <- cp_path
221
  } 
222
  
223
  if (is_directory & !is.null(cp_filter)) {
224
    
225
    if (cp_filter == "acc") {
226
      if (!all(stringr::str_detect(files_basename, "acc"))) {
227
        stop("No accuracy information in checkpoint names ('acc' string), use other metric.")
228
      }
229
      acc_scores <- files_basename %>% stringr::str_extract("acc\\d++\\.\\d++") %>% 
230
        stringr::str_remove("acc") %>% as.numeric()
231
      # use later epoch for ties
232
      index <- which.max(rank(acc_scores, ties.method = "last"))
233
    }
234
    
235
    if (cp_filter == "loss") {
236
      if (!all(stringr::str_detect(files_basename, "loss"))) {
237
        stop("No loss information in checkpoint names ('loss' string), use other metric.")
238
      }
239
      loss_scores <- files_basename %>% stringr::str_extract("loss\\d++\\.\\d++") %>% 
240
        stringr::str_remove("loss") %>% as.numeric()
241
      index <- which.min(rank(loss_scores, ties.method = "last"))
242
    }
243
    
244
    if (cp_filter == "last_ep") {
245
      ep_scores <- files_basename %>% stringr::str_extract("Ep\\.\\d++") %>% 
246
        stringr::str_remove("Ep\\.") %>% as.numeric()
247
      index <- which.max(ep_scores)
248
    }
249
    
250
  }
251
  
252
  if (is_directory & !is.null(ep_index)) {
253
    ep_scores <- files_basename %>% stringr::str_extract("Ep\\.\\d++") %>% 
254
      stringr::str_remove("Ep\\.") %>% as.numeric()
255
    index <- which(ep_scores == ep_index)
256
  }
257
  
258
  if (is_directory) {
259
    cp <- cps[index]
260
  }
261
  
262
  if (verbose) {
263
    cat("Using checkpoint", cp, "\n")
264
    cat("(Date created:", as.character(file.info(cp)$mtime), ")\n")
265
  }
266
  
267
  return(cp)
268
  
269
}
270
271
272
# temporary fix for metric bugs
273
manage_metrics <- function(model, compile = FALSE) {
274
  
275
  dummy_gen <- generator_dummy(model, batch_size = 1)
276
  z <- dummy_gen()
277
  x <- z[[1]]
278
  y <- z[[2]]
279
  
280
  if (length(model$metrics) == 0) {
281
    suppressMessages(
282
      eval <- model$evaluate(x, y, verbose = 0L)
283
    )
284
  }
285
  
286
  if (compile) {
287
    metric_names <- vector("character", length(model$metrics))
288
    for (i in 1:length(model$metrics)) {
289
      metric_names[i] <-  model$metrics[[i]]$name
290
    }
291
    
292
    duplicated_index <- duplicated(metric_names)
293
    loss_index <- stringr::str_detect(metric_names, "loss")
294
    index <- duplicated_index | loss_index
295
    
296
    # remove double metrics
297
    model <- model %>% keras::compile(loss = model$loss,
298
                                      optimizer = model$optimizer,
299
                                      metrics = model$metrics[!index])
300
    suppressMessages(
301
      eval <- model$evaluate(x, y, verbose = 0L)
302
    )
303
  }
304
  
305
  return(model)
306
  
307
}
308
309
310
#' Get activation functions of output layers
311
#' 
312
#' Get activation functions of output layers.
313
#' 
314
#' @param model A keras model.
315
#' @examplesIf reticulate::py_module_available("tensorflow")
316
#' model <-  create_model_lstm_cnn(
317
#'   maxlen = 50,
318
#'   layer_lstm = 8,
319
#'   layer_dense = c(64, 2),
320
#'   verbose = FALSE)
321
#' get_output_activations(model)
322
#' 
323
#' @returns Character vector with names of activation functions of model outputs.
324
#' @export
325
get_output_activations <- function(model) {
326
  
327
  out_names <- model$output_names
328
  act_vec <- vector("character", length(out_names))
329
  count <- 1
330
  
331
  for (layer_name in out_names) {
332
    act_name <- model$get_layer(layer_name)$get_config()$activation
333
    if (is.null(act_name)) act_name <- "linear"
334
    act_vec[count] <- act_name
335
    count <- count + 1
336
  }
337
  return(act_vec)
338
}
339
340
341
set_optimizer <- function(solver = "adam", learning_rate = 0.01) {
342
  
343
  stopifnot(solver %in% c("adam", "adagrad", "rmsprop", "sgd"))
344
  
345
  named_lr <- "lr" %in% names(formals(keras::optimizer_adam))
346
  if (named_lr) {
347
    arg_list <- list(lr = learning_rate)
348
  } else {
349
    arg_list <- list(learning_rate = learning_rate)
350
  }
351
  
352
  if (solver == "adam")
353
    keras_optimizer <- do.call(keras::optimizer_adam, arg_list)
354
  if (solver == "adagrad")
355
    keras_optimizer <- do.call(keras::optimizer_adagrad, arg_list)
356
  if (solver == "rmsprop")
357
    keras_optimizer <- do.call(keras::optimizer_rmsprop, arg_list)
358
  if (solver == "sgd")
359
    keras_optimizer <- do.call(keras::optimizer_sgd, arg_list)
360
  
361
  return(keras_optimizer)
362
  
363
}
364
365
366
#' Get solver and learning_rate from model.
367
#'
368
#' @returns Keras optimizer.
369
#' @noRd
370
get_optimizer <- function(model) {
371
  solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
372
  learning_rate <- keras::k_eval(model$optimizer$lr)
373
  if (solver == "adam") {
374
    optimizer <-  keras::optimizer_adam(learning_rate)
375
  }
376
  if (solver == "adagrad") {
377
    optimizer <- keras::optimizer_adagrad(learning_rate)
378
  }
379
  if (solver == "rmsprop") {
380
    optimizer <- keras::optimizer_rmsprop(learning_rate)
381
  }
382
  if (solver == "sgd") {
383
    optimizer <- keras::optimizer_sgd(learning_rate)
384
  }
385
  return(optimizer)
386
}
387
388
#' Replace input layer
389
#'
390
#' Replace first layer of model with new input layer of different shape. Only works for sequential models that
391
#' use CNN and LSTM layers.
392
#'
393
#' @param model A keras model.
394
#' @param input_shape The new input shape vector (without batch size).
395
#' @examplesIf reticulate::py_module_available("tensorflow")
396
#' model_1 <-  create_model_lstm_cnn(
397
#'   maxlen = 50,
398
#'   kernel_size = c(10, 10),
399
#'   filters = c(64, 128),
400
#'   pool_size = c(2, 2),
401
#'   layer_lstm = c(32),
402
#'   verbose = FALSE,
403
#'   layer_dense = c(64, 2))
404
#' model <- reshape_input(model_1, input_shape = c(120, 4))
405
#' model
406
#' 
407
#' @returns A keras model with changed input shape of input model.
408
#' @export
409
reshape_input <- function(model, input_shape) {
410
  
411
  in_layer <- keras::layer_input(shape = input_shape)
412
  for (i in 2:length(model$layers)) {
413
    layer_name <- model$layers[[i]]$name
414
    if (i == 2) {
415
      out_layer <- in_layer %>% model$get_layer(layer_name)()
416
    } else {
417
      out_layer <- out_layer %>% model$get_layer(layer_name)()
418
    }
419
  }
420
  new_model <- tensorflow::tf$keras$Model(in_layer, out_layer)
421
  return(new_model)
422
}
423
424
425
#' Check if layer is in model
426
#' 
427
#' @returns Error message if model does not contain layer of certain name.
428
#' @noRd
429
check_layer_name <- function(model, layer_name) {
430
  num_layers <- length(model$layers)
431
  layer_names <- vector("character")
432
  for (i in 1:num_layers) {
433
    layer_names[i] <- model$layers[[i]]$name
434
  }
435
  if (!(layer_name %in% layer_names)) {
436
    message <- paste0("Model has no layer named ", "'", layer_name, "'")
437
    stop(message)
438
  }
439
}
440
441
442
#' Remove layers from model and add dense layers
443
#' 
444
#' Function takes a model as input and removes all layers after a certain layer, specified in \code{layer_name} argument.
445
#' Optional to add dense layers on top of pruned model. Model can have multiple output layers with separate loss/activation functions.
446
#' You can freeze all the weights of the pruned model by setting \code{freeze_base_model = TRUE}.
447
#'
448
#' @inheritParams create_model_lstm_cnn
449
#' @param layer_name Name of last layer to use from old model.
450
#' @param model A keras model. 
451
#' @param dense_layers List of vectors specifying number of units for each dense layer. If this is a list of length > 1, model
452
#' has multiple output layers.
453
#' @param shared_dense_layers Vector with number of units for dense layer. These layers will be connected on top of layer in 
454
#' argument `layer_name`. Can be used to have shared dense layers, before model has multiple output layers. Don't use if model has just one output layer 
455
#' (use only `dense_layers`).   
456
#' @param last_activation List of activations for last entry for each list entry from \code{dense_layers}. Either `"softmax"`, `"sigmoid"` or `"linear"`.
457
#' @param output_names List of names for each output layer.
458
#' @param losses List of loss function for each output.
459
#' @param verbose Boolean.
460
#' @param dropout List of vectors with dropout rates for each new dense layer.
461
#' @param dropout_shared Vectors of dropout rates for dense layer from `shared_dense_layers`.
462
#' @param freeze_base_model Whether to freeze all weights before new dense layers.
463
#' @param compile Boolean, whether to compile the new model.
464
#' @param flatten Whether to add flatten layer before new dense layers.
465
#' @param learning_rate Learning rate if `compile = TRUE`, default learning rate of the old model.
466
#' @param global_pooling "max_ch_first" for global max pooling with channel first
467
#' ([keras docs](https://keras.io/api/layers/pooling_layers/global_average_pooling1d/)),
468
#' "max_ch_last" for global max pooling with channel last, "average_ch_first" for global average pooling with channel first, 
469
#' "average_ch_last" for global average pooling with channel last or `NULL` for no global pooling. 
470
#' "both_ch_first" or "both_ch_last" to combine average and max pooling. "all" for all 4 options at once.
471
#' @examplesIf reticulate::py_module_available("tensorflow")
472
#' model_1 <- create_model_lstm_cnn(layer_lstm = c(64, 64),
473
#'                                  maxlen = 50,
474
#'                                  layer_dense = c(32, 4), 
475
#'                                  verbose = FALSE)
476
#' # get name of second to last layer 
477
#' num_layers <- length(model_1$get_config()$layers)
478
#' layer_name <- model_1$get_config()$layers[[num_layers-1]]$name
479
#' # add dense layer with multi outputs and separate loss/activation functions
480
#' model_2 <- remove_add_layers(model = model_1,
481
#'                              layer_name = layer_name,
482
#'                              dense_layers = list(c(32, 16, 1), c(8, 1), c(12, 5)),
483
#'                              losses = list("binary_crossentropy", "mae",
484
#'                                            "categorical_crossentropy"),
485
#'                              last_activation = list("sigmoid", "linear", "softmax"),
486
#'                              freeze_base_model = TRUE,
487
#'                              output_names = list("out_1_binary_classsification", 
488
#'                                                  "out_2_regression", 
489
#'                                                  "out_3_classification")
490
#' ) 
491
#' 
492
#' @returns A keras model; added and/or removed layers from some base model. 
493
#' @export
494
remove_add_layers <- function(model = NULL,
495
                              layer_name = NULL,
496
                              dense_layers = NULL,
497
                              shared_dense_layers = NULL,
498
                              last_activation = list("softmax"),
499
                              output_names = NULL,
500
                              losses = NULL,
501
                              verbose = TRUE,
502
                              dropout = NULL,
503
                              dropout_shared = NULL,
504
                              freeze_base_model = FALSE,
505
                              compile = FALSE,
506
                              learning_rate = 0.001,
507
                              solver = "adam",
508
                              flatten = FALSE,
509
                              global_pooling = NULL,
510
                              model_seed = NULL,
511
                              mixed_precision = FALSE,
512
                              mirrored_strategy = NULL) {
513
  
514
  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
515
  
516
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
517
  if (mirrored_strategy) {
518
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
519
    with(mirrored_strategy$scope(), { 
520
      argg <- as.list(environment())
521
      argg$mirrored_strategy <- FALSE
522
      model <- do.call(remove_add_layers, argg)
523
    })
524
    return(model)
525
  }
526
  
527
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
528
  if (!is.null(layer_name)) check_layer_name(model, layer_name)
529
  if (!is.null(shared_dense_layers) & is.null(dense_layers)) {
530
    stop("You need to specify output layers in dense_layers argument")
531
  }
532
  if (!is.null(shared_dense_layers) & length(dense_layers) == 1) {
533
    stop("If your model has just one output layer, use only dense_layers argument (and set shared_dense_layers = NULL).")
534
  }
535
  if (!is.null(global_pooling)) {
536
    stopifnot(global_pooling %in% c("max_ch_first", "max_ch_last", "average_ch_first", "average_ch_last", "both_ch_first", "both_ch_last", "all"))
537
  }
538
  
539
  if (!is.list(dense_layers)) {
540
    dense_layers <- list(dense_layers)
541
  }
542
  
543
  if (!is.null(dropout) && !is.list(dropout)) {
544
    dropout <- list(dropout)
545
  }
546
  
547
  if (is.null(losses)) {
548
    losses <- list()
549
    for (i in 1:length(last_activation)) {
550
      if (last_activation[[i]] == "softmax") loss <- "categorical_crossentropy"
551
      if (last_activation[[i]] == "sigmoid") loss <- "binary_crossentropy"
552
      if (last_activation[[i]] == "linear") loss <- "mse"
553
      losses[[i]] <- loss
554
    }
555
  }
556
  
557
  if (is.null(output_names)) {
558
    output_names <- vector("list", length = length(dense_layers))
559
  }
560
  
561
  if (length(dense_layers) != length(last_activation)) {
562
    stop("Length of dense_layers and last_activation must be the same")
563
  }
564
  
565
  if (length(dense_layers) != length(output_names)) {
566
    stop("Length of dense_layers and output_names must be the same")
567
  }
568
  
569
  if (!is.null(dropout)) {
570
    for (i in 1:length(dense_layers)) {
571
      stopifnot(length(dropout[[i]]) == length(dense_layers[[i]]))
572
    }
573
  }
574
  
575
  if (verbose) {
576
    print("Original model: ")
577
    print(model$summary())
578
  }
579
  
580
  is_sequential <- any(stringr::str_detect(class(model), "sequential"))
581
  
582
  if (!is.null(layer_name)) {
583
    model_new <- tensorflow::tf$keras$Model(model$input, model$get_layer(layer_name)$output)
584
    
585
    if (freeze_base_model) {
586
      keras::freeze_weights(model_new)
587
    }
588
    
589
    if (!is.null(global_pooling)) {
590
      if (global_pooling == "max_ch_first") {
591
        out <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first")
592
      } else if (global_pooling == "max_ch_last") {
593
        out <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last")
594
      } else if (global_pooling ==  "average_ch_first") {
595
        out <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first")
596
      } else if (global_pooling ==  "average_ch_last") { 
597
        out <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last")
598
      } else if (global_pooling ==  "both_ch_last") { 
599
        out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last")
600
        out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last")
601
        out <- keras::layer_concatenate(list(out1, out2))
602
      } else if (global_pooling ==  "both_ch_first") {
603
        out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first")
604
        out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first")
605
        out <- keras::layer_concatenate(list(out1, out2))
606
      } else {
607
        out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first")
608
        out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first")
609
        out3 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last")
610
        out4 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last")
611
        out <- keras::layer_concatenate(list(out1, out2, out3, out4))
612
      }       
613
      model_new <- tensorflow::tf$keras$Model(model_new$input, out)
614
    }
615
    
616
    if (flatten) {
617
      out <- model_new$output %>% keras::layer_flatten()
618
      model_new <- tensorflow::tf$keras$Model(model_new$input, out)
619
    }
620
    
621
    if (!is.null(shared_dense_layers)) {
622
      out <- model_new$output 
623
      for (i in 1:length(shared_dense_layers)) {
624
        if (!is.null(dropout_shared)) {
625
          out <- out %>% keras::layer_dropout(dropout_shared[i])
626
        }
627
        out <- out %>% keras::layer_dense(shared_dense_layers[i], activation = "relu")
628
      }
629
      model_new <- tensorflow::tf$keras$Model(model_new$input, out)
630
    }
631
    
632
    output_list <- list()
633
    name_dense_index <- 1
634
    
635
    if (!is.null(dense_layers[[1]])) {
636
      for (output_num in 1:length(dense_layers)) {
637
        for (i in 1:length(dense_layers[[output_num]])) {
638
          if (i == length(dense_layers[[output_num]])) {
639
            activation <- last_activation[[output_num]]
640
            dtype <- "float32"
641
          } else {
642
            activation <- "relu"
643
            dtype <- NULL
644
          }
645
          
646
          if (i == length(dense_layers[[output_num]])) {
647
            layer_name <- output_names[[output_num]]
648
          } else {
649
            layer_name <- paste0("dense_new_", name_dense_index)
650
            name_dense_index <- name_dense_index + 1
651
          }
652
          
653
          if (is.null(dropout)) {
654
            if (i == 1) {
655
              output_list[[output_num]] <- model_new$output %>%
656
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
657
                                   name = layer_name, dtype = dtype)
658
            } else {
659
              output_list[[output_num]] <- output_list[[output_num]] %>%
660
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
661
                                   name = layer_name, dtype = dtype)
662
            }
663
          } else {
664
            if (i == 1) {
665
              output_list[[output_num]] <- model_new$output %>%
666
                keras::layer_dropout(rate = dropout[[output_num]][i]) %>%
667
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
668
                                   name = layer_name, dtype = dtype)
669
            } else {
670
              output_list[[output_num]] <- output_list[[output_num]] %>%
671
                keras::layer_dropout(rate = dropout[[output_num]][i]) %>%
672
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
673
                                   name = layer_name, dtype = dtype)
674
            }
675
          }
676
        }
677
      }
678
      model_new <- tensorflow::tf$keras$Model(model_new$input, output_list)
679
    }
680
  }
681
  
682
  if (verbose) {
683
    print("New model: ")
684
    print(model_new$summary())
685
    for (i in 1:length(model_new$layers)) {
686
      cat(model_new$layers[[i]]$name , "trainable:" , model_new$layers[[i]]$trainable, "\n")
687
    }
688
  }
689
  
690
  if (compile) {
691
    if (is.null(learning_rate)) {
692
      learning_rate <- keras::k_eval(model$optimizer$lr)
693
    } else {
694
      learning_rate <- learning_rate
695
    }
696
    
697
    if (is.null(solver)) {
698
      solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
699
    } 
700
    
701
    optimizer <- set_optimizer(solver, learning_rate) 
702
    
703
    metric_list <- list()
704
    for (i in 1:length(losses)) {
705
      metric_list[[i]] <- ifelse(losses[[i]] == "binary_crossentropy", "binary_accuracy", "acc")
706
    }
707
    
708
    model_new %>% keras::compile(loss = losses,
709
                                 optimizer = optimizer,
710
                                 metrics = metric_list)
711
  }
712
  
713
  model_new
714
}
715
716
717
#' Merge two models
718
#' 
719
#' Combine two models at certain layers and add dense layer(s) afterwards.
720
#'
721
#' @param models List of two models.
722
#' @param layer_names Vector of length 2 with names of layers to merge.
723
#' @param freeze_base_model Boolean vector of length 2. Whether to freeze weights of individual models.
724
#' @inheritParams create_model_lstm_cnn
725
#' @examplesIf reticulate::py_module_available("tensorflow")
726
#' model_1 <- create_model_lstm_cnn(layer_lstm = c(64, 64), maxlen = 50, layer_dense = c(32, 4),
727
#'                                  verbose = FALSE)
728
#' model_2 <- create_model_lstm_cnn(layer_lstm = c(32), maxlen = 40, 
729
#'                                  layer_dense = c(8, 2), verbose = FALSE)
730
#' # get names of second to last layers
731
#' num_layers_1 <- length(model_1$get_config()$layers)
732
#' layer_name_1 <- model_1$get_config()$layers[[num_layers_1 - 1]]$name
733
#' num_layers_2 <- length(model_2$get_config()$layers)
734
#' layer_name_2 <- model_2$get_config()$layers[[num_layers_2 - 1]]$name
735
#' # merge models
736
#' model <- merge_models(models = list(model_1, model_2),
737
#'                       layer_names = c(layer_name_1, layer_name_2),
738
#'                       layer_dense = c(6, 2), 
739
#'                       freeze_base_model = c(FALSE, FALSE)) 
740
#' 
741
#' @returns A keras model merging two input models.                        
742
#' @export
743
merge_models <- function(models, layer_names, layer_dense, solver = "adam",
744
                         learning_rate = 0.0001,
745
                         freeze_base_model = c(FALSE, FALSE),
746
                         model_seed = NULL) {
747
  
748
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
749
  
750
  model_1 <- remove_add_layers(model = models[[1]],
751
                               layer_name = layer_names[1],
752
                               dense_layers = NULL,
753
                               verbose = FALSE,
754
                               dropout = NULL,
755
                               freeze_base_model = freeze_base_model[1],
756
                               compile = FALSE,
757
                               learning_rate = NULL)
758
  
759
  model_2 <- remove_add_layers(model = models[[2]],
760
                               layer_name = layer_names[2],
761
                               dense_layers = NULL,
762
                               verbose = FALSE,
763
                               dropout = NULL,
764
                               freeze_base_model = freeze_base_model[2],
765
                               compile = FALSE,
766
                               learning_rate = NULL)
767
  
768
  # choose optimization method
769
  optimizer <- set_optimizer(solver, learning_rate) 
770
  
771
  output_tensor <- keras::layer_concatenate(c(model_1$output, model_2$output))
772
  num_targets <- layer_dense[length(layer_dense)]
773
  
774
  if (length(layer_dense) > 1) {
775
    for (i in 1:(length(layer_dense) - 1)) {
776
      output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu")
777
    }
778
  }
779
  
780
  output_tensor <- output_tensor %>%
781
    keras::layer_dense(units = num_targets, activation = "softmax")
782
  
783
  model <- keras::keras_model(inputs = list(model_1$input, model_2$input), outputs = output_tensor)
784
  model %>% keras::compile(loss = "categorical_crossentropy",
785
                           optimizer = optimizer, metrics = c("acc"))
786
  model
787
}
788
789
790
#' Extract hyperparameters from model
791
#'
792
#' @param model A keras model.
793
#' @returns List of hyperparameters.
794
#' @noRd
795
get_hyper_param <- function(model) {
796
  layers.lstm <- 0
797
  use.cudnn <- FALSE
798
  bidirectional <- FALSE
799
  use.codon.cnn <- FALSE
800
  learning_rate <- keras::k_eval(model$optimizer$lr)
801
  solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
802
  
803
  layerList <- keras::get_config(model)["layers"]
804
  for (i in 1:length(layerList)) {
805
    layer_class_name <- layerList[[i]]$class_name
806
    
807
    if (layer_class_name == "Conv1D") {
808
      use.codon.cnn <- TRUE
809
    }
810
    
811
    if (layer_class_name == "MaxPooling1D") {
812
    }
813
    
814
    if (layer_class_name == "BatchNormalization") {
815
    }
816
    
817
    if (layer_class_name == "CuDNNLSTM") {
818
      layers.lstm <- layers.lstm + 1
819
      use.cudnn <- TRUE
820
      lstm_layer_size <- layerList[[i]]$config$units
821
      recurrent_dropout_lstm <- 0
822
      dropout_lstm <- 0
823
    }
824
    
825
    if (layer_class_name == "LSTM") {
826
      layers.lstm <- layers.lstm + 1
827
      lstm_layer_size <- layerList[[i]]$config$units
828
      recurrent_dropout_lstm <- layerList[[i]]$config$recurrent_dropout_lstm
829
      dropout_lstm <- layerList[[i]]$config$dropout
830
    }
831
    # TODO: wrong output since bidirectional is layer wrapper (?)
832
    if (layer_class_name == "Bidirectional") {
833
      bidirectional <- TRUE
834
      if (layerList[[i]]$config$layer$class_name == "LSTM") {
835
        use.cudnn <- FALSE
836
        layers.lstm <- layers.lstm + 1
837
        lstm_layer_size <- layerList[[i]]$config$layer$config$units
838
        recurrent_dropout_lstm <- layerList[[i]]$config$layer$config$recurrent_dropout
839
        dropout_lstm <- layerList[[i]]$config$layer$config$dropout
840
      } else {
841
        use.cudnn <- FALSE
842
        layers.lstm <- layers.lstm + 1
843
        lstm_layer_size <- layerList[[i]]$config$layer$config$units
844
      }
845
    }
846
    
847
    if (layer_class_name == "Dense") {
848
    }
849
    
850
    if (layer_class_name == "Activation") {
851
    }
852
  }
853
  
854
  list(dropout = dropout_lstm,
855
       recurrent_dropout = recurrent_dropout_lstm,
856
       lstm_layer_size =  lstm_layer_size,
857
       solver = solver,
858
       use.cudnn = use.cudnn,
859
       layers.lstm = layers.lstm,
860
       learning_rate = learning_rate,
861
       use.codon.cnn = use.codon.cnn,
862
       bidirectional = bidirectional
863
  )
864
}