--- a +++ b/R/create_model_utils.R @@ -0,0 +1,864 @@ +get_layer_names <- function(model) { + + n <- length(model$layers) + layer_names <- vector("character", n) + for (i in 1:n) { + layer_names[i] <- model$layers[[i]]$name + } + layer_names +} + +#' Compile model +#' +#' @inheritParams create_model_lstm_cnn +#' @param model A keras model. +#' @examplesIf reticulate::py_module_available("tensorflow") +#' +#' model <- create_model_lstm_cnn(layer_lstm = 8, compile = FALSE) +#' model <- compile_model(model = model, +#' solver = 'adam', +#' learning_rate = 0.01, +#' loss_fn = 'categorical_crossentropy') +#' +#' @returns A compiled keras model. +#' @export +compile_model <- function(model, solver, learning_rate, loss_fn, label_smoothing = 0, + num_output_layers = 1, label_noise_matrix = NULL, + bal_acc = FALSE, f1_metric = FALSE, auc_metric = FALSE, + layer_dense = NULL) { + + optimizer <- set_optimizer(solver, learning_rate) + if (num_output_layers == 1) num_targets <- model$output_shape[[2]] + + #add metrics + if (loss_fn == "binary_crossentropy") { + model_metrics <- c(tensorflow::tf$keras$metrics$BinaryAccuracy(name = "acc")) + } else if (loss_fn == "sparse_categorical_crossentropy") { + model_metrics <- tensorflow::tf$keras$metrics$SparseCategoricalAccuracy(name = "acc") + } else { + model_metrics <- c("acc") + } + + cm_dir <- NULL + if (num_output_layers == 1) { + cm_dir <- file.path(tempdir(), paste(sample(letters, 7), collapse = "")) + while (dir.exists(cm_dir)) { + cm_dir <- file.path(tempdir(), paste(sample(letters, 7), collapse = "")) + } + dir.create(cm_dir) + model$cm_dir <- cm_dir + + if (loss_fn == "sparse_categorical_crossentropy") { + + if (length(model$outputs) == 1 & length(model$output_shape) == 3) { + loss_fn <- tensorflow::tf$keras$losses$SparseCategoricalCrossentropy( + reduction=tensorflow::tf$keras$losses$Reduction$NONE + ) + } + } + + if (loss_fn == "categorical_crossentropy") { + + if (length(model$outputs) == 1 & length(model$output_shape) == 3) { + loss_fn <- tensorflow::tf$keras$losses$CategoricalCrossentropy( + label_smoothing=label_smoothing, + reduction=tensorflow::tf$keras$losses$Reduction$NONE, + name='categorical_crossentropy' + ) + } + } + + if (loss_fn == "categorical_crossentropy" | loss_fn == "sparse_categorical_crossentropy") { + + if (bal_acc) { + macro_average_cb <- balanced_acc_wrapper(num_targets, cm_dir) + model_metrics <- c(macro_average_cb, "acc") + } + + if (f1_metric) { + f1 <- f1_wrapper(num_targets) + model_metrics <- c(model_metrics, f1) + } + } + + if (auc_metric) { + auc <- auc_wrapper(model_output_size = layer_dense[length(layer_dense)], + loss = loss_fn) + model_metrics <- c(model_metrics, auc) + } + + } + + if (label_smoothing > 0 & !is.null(label_noise_matrix)) { + stop("Can not use label smoothing and label noise at the same time. Either set label_smoothing = 0 or label_noise_matrix = NULL") + } + + if (label_smoothing > 0) { + if (loss_fn == "categorical_crossentropy") { + smooth_loss <- tensorflow::tf$losses$CategoricalCrossentropy(label_smoothing = label_smoothing, name = "smooth_loss") + } + if (loss_fn == "binary_crossentropy") { + smooth_loss <- tensorflow::tf$losses$BinaryCrossentropy(label_smoothing = label_smoothing, name = "smooth_loss") + } + model %>% keras::compile(loss = smooth_loss, + optimizer = optimizer, metrics = model_metrics) + } else if (!is.null(label_noise_matrix)) { + row_sums <- rowSums(label_noise_matrix) + if (!all(row_sums == 1)) { + warning("Sum of noise matrix rows don't add up to 1") + } + noisy_loss <- noisy_loss_wrapper(solve(label_noise_matrix)) + model %>% keras::compile(loss = noisy_loss, + optimizer = optimizer, metrics = model_metrics) + } else { + model %>% keras::compile(loss = loss_fn, + optimizer = optimizer, metrics = model_metrics) + } + + model + +} + + +#' Load checkpoint +#' +#' Load checkpoint from directory. Chooses best checkpoint based on some condition. Condition +#' can be best accuracy, best loss, last epoch number or a specified epoch number. +#' +#' @inheritParams create_model_lstm_cnn +#' @param cp_path A directory containing checkpoints or a single checkpoint file. +#' If a directory, choose checkpoint based on `cp_filter` or `ep_index`. +#' @param cp_filter Condition to choose checkpoint if `cp_path` is a directory. +#' Either "acc" for best validation accuracy, "loss" for best validation loss or "last_ep" for last epoch. +#' @param ep_index Load checkpoint from specific epoch number. If not `NULL`, has priority over `cp_filter`. +#' @param compile Whether to load compiled model. +#' @param re_compile Whether to compile model with parameters from `learning_rate`, +#' `solver` and `loss`. +#' @param add_custom_object Named list of custom objects. +#' @param verbose Whether to print chosen checkpoint path. +#' @param loss Loss function. Only used if model gets compiled. +#' @param margin Margin for contrastive loss, see \link{loss_cl}. +#' @examples +#' \donttest{ +#' library(keras) +#' model <- create_model_lstm_cnn(layer_lstm = 8) +#' checkpoint_folder <- tempfile() +#' dir.create(checkpoint_folder) +#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.007-val_loss11.07-val_acc0.6.hdf5')) +#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.019-val_loss8.74-val_acc0.7.hdf5')) +#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.025-val_loss0.03-val_acc0.8.hdf5')) +#' model <- load_cp(cp_path = checkpoint_folder, cp_filter = "last_ep") +#' } +#' @returns A keras model loaded from a checkpoint. +#' @export +load_cp <- function(cp_path, cp_filter = "last_ep", ep_index = NULL, compile = FALSE, + learning_rate = 0.01, solver = "adam", re_compile = FALSE, + loss = "categorical_crossentropy", + add_custom_object = NULL, margin = 1, + verbose = TRUE, mirrored_strategy = FALSE) { + + if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE) + if (mirrored_strategy) { + mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy() + with(mirrored_strategy$scope(), { + argg <- as.list(environment()) + argg$mirrored_strategy <- FALSE + model <- do.call(load_cp, argg) + }) + return(model) + } + + # custom objects to load transformer architecture + custom_objects <- list( + "layer_pos_embedding" = layer_pos_embedding_wrapper(), + "layer_pos_sinusoid" = layer_pos_sinusoid_wrapper(), + "layer_transformer_block" = layer_transformer_block_wrapper(), + "layer_euc_dist" = layer_euc_dist_wrapper(), + "layer_cosine_sim" = layer_cosine_sim_wrapper(), + "layer_aggregate_time_dist" = layer_aggregate_time_dist_wrapper(), + "loss_cl_margin___margin_" = loss_cl(margin=margin) + ) + + if (!is.null(add_custom_object)) { + for (i in 1:length(add_custom_object)) { + custom_objects[[names(add_custom_object)[i]]] <- add_custom_object[[i]] + } + } + + cp <- get_cp(cp_path = cp_path, cp_filter = cp_filter, + ep_index = ep_index, verbose = verbose) + + model <- keras::load_model_hdf5(cp, compile = compile, custom_objects = custom_objects) + + if (re_compile) { + optimizer <- set_optimizer(solver, learning_rate) + model %>% keras::compile(loss = loss, + optimizer = optimizer, + metrics = model$metrics) + } + + return(model) + +} + +get_cp <- function(cp_path, cp_filter = "last_ep", ep_index = NULL, verbose = TRUE) { + + if (!is.null(cp_filter)) { + stopifnot(cp_filter %in% c("acc", "loss", "last_ep")) + if (!is.null(ep_index)) { + cp_filter <- NULL + } + } + + is_directory <- dir.exists(cp_path) + if (is_directory) { + cps <- list.files(cp_path, full.names = TRUE) + files_basename <- basename(cps) + stopifnot(xor(is.null(cp_filter), is.null(ep_index))) + } else { + stopifnot(file.exists(cp_path)) + cp <- cp_path + } + + if (is_directory & !is.null(cp_filter)) { + + if (cp_filter == "acc") { + if (!all(stringr::str_detect(files_basename, "acc"))) { + stop("No accuracy information in checkpoint names ('acc' string), use other metric.") + } + acc_scores <- files_basename %>% stringr::str_extract("acc\\d++\\.\\d++") %>% + stringr::str_remove("acc") %>% as.numeric() + # use later epoch for ties + index <- which.max(rank(acc_scores, ties.method = "last")) + } + + if (cp_filter == "loss") { + if (!all(stringr::str_detect(files_basename, "loss"))) { + stop("No loss information in checkpoint names ('loss' string), use other metric.") + } + loss_scores <- files_basename %>% stringr::str_extract("loss\\d++\\.\\d++") %>% + stringr::str_remove("loss") %>% as.numeric() + index <- which.min(rank(loss_scores, ties.method = "last")) + } + + if (cp_filter == "last_ep") { + ep_scores <- files_basename %>% stringr::str_extract("Ep\\.\\d++") %>% + stringr::str_remove("Ep\\.") %>% as.numeric() + index <- which.max(ep_scores) + } + + } + + if (is_directory & !is.null(ep_index)) { + ep_scores <- files_basename %>% stringr::str_extract("Ep\\.\\d++") %>% + stringr::str_remove("Ep\\.") %>% as.numeric() + index <- which(ep_scores == ep_index) + } + + if (is_directory) { + cp <- cps[index] + } + + if (verbose) { + cat("Using checkpoint", cp, "\n") + cat("(Date created:", as.character(file.info(cp)$mtime), ")\n") + } + + return(cp) + +} + + +# temporary fix for metric bugs +manage_metrics <- function(model, compile = FALSE) { + + dummy_gen <- generator_dummy(model, batch_size = 1) + z <- dummy_gen() + x <- z[[1]] + y <- z[[2]] + + if (length(model$metrics) == 0) { + suppressMessages( + eval <- model$evaluate(x, y, verbose = 0L) + ) + } + + if (compile) { + metric_names <- vector("character", length(model$metrics)) + for (i in 1:length(model$metrics)) { + metric_names[i] <- model$metrics[[i]]$name + } + + duplicated_index <- duplicated(metric_names) + loss_index <- stringr::str_detect(metric_names, "loss") + index <- duplicated_index | loss_index + + # remove double metrics + model <- model %>% keras::compile(loss = model$loss, + optimizer = model$optimizer, + metrics = model$metrics[!index]) + suppressMessages( + eval <- model$evaluate(x, y, verbose = 0L) + ) + } + + return(model) + +} + + +#' Get activation functions of output layers +#' +#' Get activation functions of output layers. +#' +#' @param model A keras model. +#' @examplesIf reticulate::py_module_available("tensorflow") +#' model <- create_model_lstm_cnn( +#' maxlen = 50, +#' layer_lstm = 8, +#' layer_dense = c(64, 2), +#' verbose = FALSE) +#' get_output_activations(model) +#' +#' @returns Character vector with names of activation functions of model outputs. +#' @export +get_output_activations <- function(model) { + + out_names <- model$output_names + act_vec <- vector("character", length(out_names)) + count <- 1 + + for (layer_name in out_names) { + act_name <- model$get_layer(layer_name)$get_config()$activation + if (is.null(act_name)) act_name <- "linear" + act_vec[count] <- act_name + count <- count + 1 + } + return(act_vec) +} + + +set_optimizer <- function(solver = "adam", learning_rate = 0.01) { + + stopifnot(solver %in% c("adam", "adagrad", "rmsprop", "sgd")) + + named_lr <- "lr" %in% names(formals(keras::optimizer_adam)) + if (named_lr) { + arg_list <- list(lr = learning_rate) + } else { + arg_list <- list(learning_rate = learning_rate) + } + + if (solver == "adam") + keras_optimizer <- do.call(keras::optimizer_adam, arg_list) + if (solver == "adagrad") + keras_optimizer <- do.call(keras::optimizer_adagrad, arg_list) + if (solver == "rmsprop") + keras_optimizer <- do.call(keras::optimizer_rmsprop, arg_list) + if (solver == "sgd") + keras_optimizer <- do.call(keras::optimizer_sgd, arg_list) + + return(keras_optimizer) + +} + + +#' Get solver and learning_rate from model. +#' +#' @returns Keras optimizer. +#' @noRd +get_optimizer <- function(model) { + solver <- stringr::str_to_lower(model$optimizer$get_config()["name"]) + learning_rate <- keras::k_eval(model$optimizer$lr) + if (solver == "adam") { + optimizer <- keras::optimizer_adam(learning_rate) + } + if (solver == "adagrad") { + optimizer <- keras::optimizer_adagrad(learning_rate) + } + if (solver == "rmsprop") { + optimizer <- keras::optimizer_rmsprop(learning_rate) + } + if (solver == "sgd") { + optimizer <- keras::optimizer_sgd(learning_rate) + } + return(optimizer) +} + +#' Replace input layer +#' +#' Replace first layer of model with new input layer of different shape. Only works for sequential models that +#' use CNN and LSTM layers. +#' +#' @param model A keras model. +#' @param input_shape The new input shape vector (without batch size). +#' @examplesIf reticulate::py_module_available("tensorflow") +#' model_1 <- create_model_lstm_cnn( +#' maxlen = 50, +#' kernel_size = c(10, 10), +#' filters = c(64, 128), +#' pool_size = c(2, 2), +#' layer_lstm = c(32), +#' verbose = FALSE, +#' layer_dense = c(64, 2)) +#' model <- reshape_input(model_1, input_shape = c(120, 4)) +#' model +#' +#' @returns A keras model with changed input shape of input model. +#' @export +reshape_input <- function(model, input_shape) { + + in_layer <- keras::layer_input(shape = input_shape) + for (i in 2:length(model$layers)) { + layer_name <- model$layers[[i]]$name + if (i == 2) { + out_layer <- in_layer %>% model$get_layer(layer_name)() + } else { + out_layer <- out_layer %>% model$get_layer(layer_name)() + } + } + new_model <- tensorflow::tf$keras$Model(in_layer, out_layer) + return(new_model) +} + + +#' Check if layer is in model +#' +#' @returns Error message if model does not contain layer of certain name. +#' @noRd +check_layer_name <- function(model, layer_name) { + num_layers <- length(model$layers) + layer_names <- vector("character") + for (i in 1:num_layers) { + layer_names[i] <- model$layers[[i]]$name + } + if (!(layer_name %in% layer_names)) { + message <- paste0("Model has no layer named ", "'", layer_name, "'") + stop(message) + } +} + + +#' Remove layers from model and add dense layers +#' +#' Function takes a model as input and removes all layers after a certain layer, specified in \code{layer_name} argument. +#' Optional to add dense layers on top of pruned model. Model can have multiple output layers with separate loss/activation functions. +#' You can freeze all the weights of the pruned model by setting \code{freeze_base_model = TRUE}. +#' +#' @inheritParams create_model_lstm_cnn +#' @param layer_name Name of last layer to use from old model. +#' @param model A keras model. +#' @param dense_layers List of vectors specifying number of units for each dense layer. If this is a list of length > 1, model +#' has multiple output layers. +#' @param shared_dense_layers Vector with number of units for dense layer. These layers will be connected on top of layer in +#' argument `layer_name`. Can be used to have shared dense layers, before model has multiple output layers. Don't use if model has just one output layer +#' (use only `dense_layers`). +#' @param last_activation List of activations for last entry for each list entry from \code{dense_layers}. Either `"softmax"`, `"sigmoid"` or `"linear"`. +#' @param output_names List of names for each output layer. +#' @param losses List of loss function for each output. +#' @param verbose Boolean. +#' @param dropout List of vectors with dropout rates for each new dense layer. +#' @param dropout_shared Vectors of dropout rates for dense layer from `shared_dense_layers`. +#' @param freeze_base_model Whether to freeze all weights before new dense layers. +#' @param compile Boolean, whether to compile the new model. +#' @param flatten Whether to add flatten layer before new dense layers. +#' @param learning_rate Learning rate if `compile = TRUE`, default learning rate of the old model. +#' @param global_pooling "max_ch_first" for global max pooling with channel first +#' ([keras docs](https://keras.io/api/layers/pooling_layers/global_average_pooling1d/)), +#' "max_ch_last" for global max pooling with channel last, "average_ch_first" for global average pooling with channel first, +#' "average_ch_last" for global average pooling with channel last or `NULL` for no global pooling. +#' "both_ch_first" or "both_ch_last" to combine average and max pooling. "all" for all 4 options at once. +#' @examplesIf reticulate::py_module_available("tensorflow") +#' model_1 <- create_model_lstm_cnn(layer_lstm = c(64, 64), +#' maxlen = 50, +#' layer_dense = c(32, 4), +#' verbose = FALSE) +#' # get name of second to last layer +#' num_layers <- length(model_1$get_config()$layers) +#' layer_name <- model_1$get_config()$layers[[num_layers-1]]$name +#' # add dense layer with multi outputs and separate loss/activation functions +#' model_2 <- remove_add_layers(model = model_1, +#' layer_name = layer_name, +#' dense_layers = list(c(32, 16, 1), c(8, 1), c(12, 5)), +#' losses = list("binary_crossentropy", "mae", +#' "categorical_crossentropy"), +#' last_activation = list("sigmoid", "linear", "softmax"), +#' freeze_base_model = TRUE, +#' output_names = list("out_1_binary_classsification", +#' "out_2_regression", +#' "out_3_classification") +#' ) +#' +#' @returns A keras model; added and/or removed layers from some base model. +#' @export +remove_add_layers <- function(model = NULL, + layer_name = NULL, + dense_layers = NULL, + shared_dense_layers = NULL, + last_activation = list("softmax"), + output_names = NULL, + losses = NULL, + verbose = TRUE, + dropout = NULL, + dropout_shared = NULL, + freeze_base_model = FALSE, + compile = FALSE, + learning_rate = 0.001, + solver = "adam", + flatten = FALSE, + global_pooling = NULL, + model_seed = NULL, + mixed_precision = FALSE, + mirrored_strategy = NULL) { + + if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16") + + if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE) + if (mirrored_strategy) { + mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy() + with(mirrored_strategy$scope(), { + argg <- as.list(environment()) + argg$mirrored_strategy <- FALSE + model <- do.call(remove_add_layers, argg) + }) + return(model) + } + + if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed) + if (!is.null(layer_name)) check_layer_name(model, layer_name) + if (!is.null(shared_dense_layers) & is.null(dense_layers)) { + stop("You need to specify output layers in dense_layers argument") + } + if (!is.null(shared_dense_layers) & length(dense_layers) == 1) { + stop("If your model has just one output layer, use only dense_layers argument (and set shared_dense_layers = NULL).") + } + if (!is.null(global_pooling)) { + stopifnot(global_pooling %in% c("max_ch_first", "max_ch_last", "average_ch_first", "average_ch_last", "both_ch_first", "both_ch_last", "all")) + } + + if (!is.list(dense_layers)) { + dense_layers <- list(dense_layers) + } + + if (!is.null(dropout) && !is.list(dropout)) { + dropout <- list(dropout) + } + + if (is.null(losses)) { + losses <- list() + for (i in 1:length(last_activation)) { + if (last_activation[[i]] == "softmax") loss <- "categorical_crossentropy" + if (last_activation[[i]] == "sigmoid") loss <- "binary_crossentropy" + if (last_activation[[i]] == "linear") loss <- "mse" + losses[[i]] <- loss + } + } + + if (is.null(output_names)) { + output_names <- vector("list", length = length(dense_layers)) + } + + if (length(dense_layers) != length(last_activation)) { + stop("Length of dense_layers and last_activation must be the same") + } + + if (length(dense_layers) != length(output_names)) { + stop("Length of dense_layers and output_names must be the same") + } + + if (!is.null(dropout)) { + for (i in 1:length(dense_layers)) { + stopifnot(length(dropout[[i]]) == length(dense_layers[[i]])) + } + } + + if (verbose) { + print("Original model: ") + print(model$summary()) + } + + is_sequential <- any(stringr::str_detect(class(model), "sequential")) + + if (!is.null(layer_name)) { + model_new <- tensorflow::tf$keras$Model(model$input, model$get_layer(layer_name)$output) + + if (freeze_base_model) { + keras::freeze_weights(model_new) + } + + if (!is.null(global_pooling)) { + if (global_pooling == "max_ch_first") { + out <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first") + } else if (global_pooling == "max_ch_last") { + out <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last") + } else if (global_pooling == "average_ch_first") { + out <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first") + } else if (global_pooling == "average_ch_last") { + out <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last") + } else if (global_pooling == "both_ch_last") { + out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last") + out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last") + out <- keras::layer_concatenate(list(out1, out2)) + } else if (global_pooling == "both_ch_first") { + out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first") + out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first") + out <- keras::layer_concatenate(list(out1, out2)) + } else { + out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first") + out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first") + out3 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last") + out4 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last") + out <- keras::layer_concatenate(list(out1, out2, out3, out4)) + } + model_new <- tensorflow::tf$keras$Model(model_new$input, out) + } + + if (flatten) { + out <- model_new$output %>% keras::layer_flatten() + model_new <- tensorflow::tf$keras$Model(model_new$input, out) + } + + if (!is.null(shared_dense_layers)) { + out <- model_new$output + for (i in 1:length(shared_dense_layers)) { + if (!is.null(dropout_shared)) { + out <- out %>% keras::layer_dropout(dropout_shared[i]) + } + out <- out %>% keras::layer_dense(shared_dense_layers[i], activation = "relu") + } + model_new <- tensorflow::tf$keras$Model(model_new$input, out) + } + + output_list <- list() + name_dense_index <- 1 + + if (!is.null(dense_layers[[1]])) { + for (output_num in 1:length(dense_layers)) { + for (i in 1:length(dense_layers[[output_num]])) { + if (i == length(dense_layers[[output_num]])) { + activation <- last_activation[[output_num]] + dtype <- "float32" + } else { + activation <- "relu" + dtype <- NULL + } + + if (i == length(dense_layers[[output_num]])) { + layer_name <- output_names[[output_num]] + } else { + layer_name <- paste0("dense_new_", name_dense_index) + name_dense_index <- name_dense_index + 1 + } + + if (is.null(dropout)) { + if (i == 1) { + output_list[[output_num]] <- model_new$output %>% + keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation, + name = layer_name, dtype = dtype) + } else { + output_list[[output_num]] <- output_list[[output_num]] %>% + keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation, + name = layer_name, dtype = dtype) + } + } else { + if (i == 1) { + output_list[[output_num]] <- model_new$output %>% + keras::layer_dropout(rate = dropout[[output_num]][i]) %>% + keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation, + name = layer_name, dtype = dtype) + } else { + output_list[[output_num]] <- output_list[[output_num]] %>% + keras::layer_dropout(rate = dropout[[output_num]][i]) %>% + keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation, + name = layer_name, dtype = dtype) + } + } + } + } + model_new <- tensorflow::tf$keras$Model(model_new$input, output_list) + } + } + + if (verbose) { + print("New model: ") + print(model_new$summary()) + for (i in 1:length(model_new$layers)) { + cat(model_new$layers[[i]]$name , "trainable:" , model_new$layers[[i]]$trainable, "\n") + } + } + + if (compile) { + if (is.null(learning_rate)) { + learning_rate <- keras::k_eval(model$optimizer$lr) + } else { + learning_rate <- learning_rate + } + + if (is.null(solver)) { + solver <- stringr::str_to_lower(model$optimizer$get_config()["name"]) + } + + optimizer <- set_optimizer(solver, learning_rate) + + metric_list <- list() + for (i in 1:length(losses)) { + metric_list[[i]] <- ifelse(losses[[i]] == "binary_crossentropy", "binary_accuracy", "acc") + } + + model_new %>% keras::compile(loss = losses, + optimizer = optimizer, + metrics = metric_list) + } + + model_new +} + + +#' Merge two models +#' +#' Combine two models at certain layers and add dense layer(s) afterwards. +#' +#' @param models List of two models. +#' @param layer_names Vector of length 2 with names of layers to merge. +#' @param freeze_base_model Boolean vector of length 2. Whether to freeze weights of individual models. +#' @inheritParams create_model_lstm_cnn +#' @examplesIf reticulate::py_module_available("tensorflow") +#' model_1 <- create_model_lstm_cnn(layer_lstm = c(64, 64), maxlen = 50, layer_dense = c(32, 4), +#' verbose = FALSE) +#' model_2 <- create_model_lstm_cnn(layer_lstm = c(32), maxlen = 40, +#' layer_dense = c(8, 2), verbose = FALSE) +#' # get names of second to last layers +#' num_layers_1 <- length(model_1$get_config()$layers) +#' layer_name_1 <- model_1$get_config()$layers[[num_layers_1 - 1]]$name +#' num_layers_2 <- length(model_2$get_config()$layers) +#' layer_name_2 <- model_2$get_config()$layers[[num_layers_2 - 1]]$name +#' # merge models +#' model <- merge_models(models = list(model_1, model_2), +#' layer_names = c(layer_name_1, layer_name_2), +#' layer_dense = c(6, 2), +#' freeze_base_model = c(FALSE, FALSE)) +#' +#' @returns A keras model merging two input models. +#' @export +merge_models <- function(models, layer_names, layer_dense, solver = "adam", + learning_rate = 0.0001, + freeze_base_model = c(FALSE, FALSE), + model_seed = NULL) { + + if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed) + + model_1 <- remove_add_layers(model = models[[1]], + layer_name = layer_names[1], + dense_layers = NULL, + verbose = FALSE, + dropout = NULL, + freeze_base_model = freeze_base_model[1], + compile = FALSE, + learning_rate = NULL) + + model_2 <- remove_add_layers(model = models[[2]], + layer_name = layer_names[2], + dense_layers = NULL, + verbose = FALSE, + dropout = NULL, + freeze_base_model = freeze_base_model[2], + compile = FALSE, + learning_rate = NULL) + + # choose optimization method + optimizer <- set_optimizer(solver, learning_rate) + + output_tensor <- keras::layer_concatenate(c(model_1$output, model_2$output)) + num_targets <- layer_dense[length(layer_dense)] + + if (length(layer_dense) > 1) { + for (i in 1:(length(layer_dense) - 1)) { + output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu") + } + } + + output_tensor <- output_tensor %>% + keras::layer_dense(units = num_targets, activation = "softmax") + + model <- keras::keras_model(inputs = list(model_1$input, model_2$input), outputs = output_tensor) + model %>% keras::compile(loss = "categorical_crossentropy", + optimizer = optimizer, metrics = c("acc")) + model +} + + +#' Extract hyperparameters from model +#' +#' @param model A keras model. +#' @returns List of hyperparameters. +#' @noRd +get_hyper_param <- function(model) { + layers.lstm <- 0 + use.cudnn <- FALSE + bidirectional <- FALSE + use.codon.cnn <- FALSE + learning_rate <- keras::k_eval(model$optimizer$lr) + solver <- stringr::str_to_lower(model$optimizer$get_config()["name"]) + + layerList <- keras::get_config(model)["layers"] + for (i in 1:length(layerList)) { + layer_class_name <- layerList[[i]]$class_name + + if (layer_class_name == "Conv1D") { + use.codon.cnn <- TRUE + } + + if (layer_class_name == "MaxPooling1D") { + } + + if (layer_class_name == "BatchNormalization") { + } + + if (layer_class_name == "CuDNNLSTM") { + layers.lstm <- layers.lstm + 1 + use.cudnn <- TRUE + lstm_layer_size <- layerList[[i]]$config$units + recurrent_dropout_lstm <- 0 + dropout_lstm <- 0 + } + + if (layer_class_name == "LSTM") { + layers.lstm <- layers.lstm + 1 + lstm_layer_size <- layerList[[i]]$config$units + recurrent_dropout_lstm <- layerList[[i]]$config$recurrent_dropout_lstm + dropout_lstm <- layerList[[i]]$config$dropout + } + # TODO: wrong output since bidirectional is layer wrapper (?) + if (layer_class_name == "Bidirectional") { + bidirectional <- TRUE + if (layerList[[i]]$config$layer$class_name == "LSTM") { + use.cudnn <- FALSE + layers.lstm <- layers.lstm + 1 + lstm_layer_size <- layerList[[i]]$config$layer$config$units + recurrent_dropout_lstm <- layerList[[i]]$config$layer$config$recurrent_dropout + dropout_lstm <- layerList[[i]]$config$layer$config$dropout + } else { + use.cudnn <- FALSE + layers.lstm <- layers.lstm + 1 + lstm_layer_size <- layerList[[i]]$config$layer$config$units + } + } + + if (layer_class_name == "Dense") { + } + + if (layer_class_name == "Activation") { + } + } + + list(dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + lstm_layer_size = lstm_layer_size, + solver = solver, + use.cudnn = use.cudnn, + layers.lstm = layers.lstm, + learning_rate = learning_rate, + use.codon.cnn = use.codon.cnn, + bidirectional = bidirectional + ) +}