--- a +++ b/R/create_model_lstm_cnn.R @@ -0,0 +1,824 @@ +#' @title Create LSTM/CNN network +#' +#' @description Creates a network consisting of an arbitrary number of CNN, LSTM and dense layers. +#' Last layer is a dense layer. +#' +#' @param maxlen Length of predictor sequence. +#' @param dropout_lstm Fraction of the units to drop for inputs. +#' @param recurrent_dropout_lstm Fraction of the units to drop for recurrent state. +#' @param layer_lstm Number of cells per network layer. Can be a scalar or vector. +#' @param layer_dense Vector specifying number of neurons per dense layer after last LSTM or CNN layer (if no LSTM used). +#' @param dropout_dense Dropout rates between dense layers. No dropout if `NULL`. +#' @param solver Optimization method, options are `"adam", "adagrad", "rmsprop"` or `"sgd"`. +#' @param learning_rate Learning rate for optimizer. +#' @param bidirectional Use bidirectional wrapper for lstm layers. +#' @param vocabulary_size Number of unique character in vocabulary. +#' @param stateful Boolean. Whether to use stateful LSTM layer. +#' @param batch_size Number of samples that are used for one network update. Only used if \code{stateful = TRUE}. +#' @param compile Whether to compile the model. +#' @param kernel_size Size of 1d convolutional layers. For multiple layers, assign a vector. (e.g, `rep(3,2)` for two layers and kernel size 3) +#' @param filters Number of filters. For multiple layers, assign a vector. +#' @param strides Stride values. For multiple layers, assign a vector. +#' @param pool_size Integer, size of the max pooling windows. For multiple layers, assign a vector. +#' @param padding Padding of CNN layers, e.g. `"same", "valid"` or `"causal"`. +#' @param dilation_rate Integer, the dilation rate to use for dilated convolution. +#' @param gap Whether to apply global average pooling after last CNN layer. +#' @param use_bias Boolean. Usage of bias for CNN layers. +#' @param residual_block Boolean. If true, the residual connections are used in CNN. It is not used in the first convolutional layer. +#' @param residual_block_length Integer. Determines how many convolutional layers (or triplets when `size_reduction_1D_conv` is `TRUE`) exist +# between the legs of a residual connection. e.g. if the `length kernel_size/filters` is 7 and `residual_block_length` is 2, there are 1+(7-1)*2 convolutional +# layers in the model when `size_reduction_1Dconv` is FALSE and 1+(7-1)*2*3 convolutional layers when `size_reduction_1Dconv` is TRUE. +#' @param size_reduction_1Dconv Boolean. When `TRUE`, the number of filters in the convolutional layers is reduced to 1/4 of the number of filters of +# the original layer by a convolution layer with kernel size 1, and number of filters are increased back to the original value by a convolution layer +# with kernel size 1 after the convolution with original kernel size with reduced number of filters. +#' @param label_input Integer or `NULL`. If not `NULL`, adds additional input layer of \code{label_input} size. +#' @param zero_mask Boolean, whether to apply zero masking before LSTM layer. Only used if model does not use any CNN layers. +#' @param label_smoothing Float in \[0, 1\]. If 0, no smoothing is applied. If > 0, loss between the predicted +#' labels and a smoothed version of the true labels, where the smoothing squeezes the labels towards 0.5. +#' The closer the argument is to 1 the more the labels get smoothed. +#' @param label_noise_matrix Matrix of label noises. Every row stands for one class and columns for percentage of labels in that class. +#' If first label contains 5 percent wrong labels and second label no noise, then +#' +#' \code{label_noise_matrix <- matrix(c(0.95, 0.05, 0, 1), nrow = 2, byrow = TRUE )} +#' @param last_layer_activation Activation function of output layer(s). For example `"sigmoid"` or `"softmax"`. +#' @param loss_fn Either `"categorical_crossentropy"` or `"binary_crossentropy"`. If `label_noise_matrix` given, will use custom `"noisy_loss"`. +#' @param num_output_layers Number of output layers. +#' @param auc_metric Whether to add AUC metric. +#' @param f1_metric Whether to add F1 metric. +#' @param bal_acc Whether to add balanced accuracy. +#' @param verbose Boolean. +#' @param batch_norm_momentum Momentum for the moving mean and the moving variance. +#' @param model_seed Set seed for model parameters in tensorflow if not `NULL`. +#' @param mixed_precision Whether to use mixed precision (https://www.tensorflow.org/guide/mixed_precision). +#' @param mirrored_strategy Whether to use distributed mirrored strategy. If NULL, will use distributed mirrored strategy only if >1 GPU available. +#' @examplesIf reticulate::py_module_available("tensorflow") +#' create_model_lstm_cnn( +#' maxlen = 500, +#' vocabulary_size = 4, +#' kernel_size = c(8, 8, 8), +#' filters = c(16, 32, 64), +#' pool_size = c(3, 3, 3), +#' layer_lstm = c(32, 64), +#' layer_dense = c(128, 4), +#' learning_rate = 0.001) +#' +#' @returns A keras model, stacks CNN, LSTM and dense layers. +#' @export +create_model_lstm_cnn <- function( + maxlen = 50, + dropout_lstm = 0, + recurrent_dropout_lstm = 0, + layer_lstm = NULL, + layer_dense = c(4), + dropout_dense = NULL, + kernel_size = NULL, + filters = NULL, + strides = NULL, + pool_size = NULL, + solver = "adam", + learning_rate = 0.001, + vocabulary_size = 4, + bidirectional = FALSE, + stateful = FALSE, + batch_size = NULL, + compile = TRUE, + padding = "same", + dilation_rate = NULL, + gap = FALSE, + use_bias = TRUE, + residual_block = FALSE, + residual_block_length = 1, + size_reduction_1Dconv = FALSE, + label_input = NULL, + zero_mask = FALSE, + label_smoothing = 0, + label_noise_matrix = NULL, + last_layer_activation = "softmax", + loss_fn = "categorical_crossentropy", + num_output_layers = 1, + auc_metric = FALSE, + f1_metric = FALSE, + bal_acc = FALSE, + verbose = TRUE, + batch_norm_momentum = 0.99, + model_seed = NULL, + mixed_precision = FALSE, + mirrored_strategy = NULL) { + + if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16") + + if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE) + if (mirrored_strategy) { + mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy() + with(mirrored_strategy$scope(), { + argg <- as.list(environment()) + argg$mirrored_strategy <- FALSE + model <- do.call(create_model_lstm_cnn, argg) + }) + return(model) + } + + layer_dense <- as.integer(layer_dense) + #browser() + if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed) + num_targets <- layer_dense[length(layer_dense)] + layers.lstm <- length(layer_lstm) + use.cnn <- ifelse(!is.null(kernel_size), TRUE, FALSE) + + if (!is.null(layer_lstm)) { + stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) == layers.lstm)) + } + + if (layers.lstm == 0 & !use.cnn) { + stop("Model does not use LSTM or CNN layers.") + } + + if (is.null(strides)) strides <- rep(1L, length(filters)) + if (is.null(dilation_rate) & use.cnn) dilation_rate <- rep(1L, length(filters)) + + if (use.cnn) { + same_length <- (length(kernel_size) == length(filters)) & + (length(filters) == length(strides)) & + (length(strides) == length(dilation_rate)) + if (!same_length) { + stop("kernel_size, filters, dilation_rate and strides must have the same length") + } + if (residual_block & (padding != "same")) { + stop("Padding option must be same when residual block is used.") + } + } + + stopifnot(maxlen > 0) + stopifnot(dropout_lstm <= 1 & dropout_lstm >= 0) + stopifnot(recurrent_dropout_lstm <= 1 & recurrent_dropout_lstm >= 0) + + if (length(layer_lstm) == 1) { + layer_lstm <- rep(layer_lstm, layers.lstm) + } + + if (stateful) { + input_tensor <- keras::layer_input(batch_shape = c(batch_size, maxlen, vocabulary_size)) + } else { + input_tensor <- keras::layer_input(shape = c(maxlen, vocabulary_size)) + } + + if (use.cnn) { + for (i in 1:length(filters)) { + if (i == 1) { + output_tensor <- input_tensor %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i], + strides = strides[i], + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + if (!is.null(pool_size) && pool_size[i] > 1) { + output_tensor <- output_tensor %>% keras::layer_max_pooling_1d(pool_size = pool_size[i]) + } + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } else { + if (residual_block){ + if ((strides[i] > 1) | (pool_size[i] > 1)) { + residual_layer <- output_tensor %>% keras::layer_average_pooling_1d(pool_size=strides[i]*pool_size[i]) + } else { + residual_layer <- output_tensor + } + if (filters[i-1] != filters[i]){ + residual_layer <- residual_layer %>% + keras::layer_conv_1d( + kernel_size = 1, + padding = padding, + activation = "relu", + filters = filters[i], + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + residual_layer <- residual_layer %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } + if (residual_block_length > 1){ + for (j in 1:(residual_block_length-1)){ + if (size_reduction_1Dconv){ + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = 1, + padding = padding, + activation = "relu", + filters = filters[i]/4, + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i]/4, + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = 1, + padding = padding, + activation = "relu", + filters = filters[i], + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + + } else { + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i], + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } + } + } + } + if (size_reduction_1Dconv){ + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = 1, + padding = padding, + activation = "relu", + filters = filters[i]/4, + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i]/4, + strides = strides[i], + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = 1, + padding = padding, + activation = "relu", + filters = filters[i], + strides = 1, + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + + } else { + output_tensor <- output_tensor %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i], + strides = strides[i], + dilation_rate = dilation_rate[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + use_bias = use_bias + ) + output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } + if (!is.null(pool_size) && pool_size[i] > 1) { + output_tensor <- output_tensor %>% keras::layer_max_pooling_1d(pool_size = pool_size[i]) + } + #output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + if (residual_block){ + output_tensor <- keras::layer_add(list(output_tensor, residual_layer)) + } + } + } + } else { + if (zero_mask) { + output_tensor <- input_tensor %>% keras::layer_masking() + } else { + output_tensor <- input_tensor + } + } + # lstm layers + if (layers.lstm > 0) { + if (layers.lstm > 1) { + if (bidirectional) { + for (i in 1:(layers.lstm - 1)) { + output_tensor <- output_tensor %>% + keras::bidirectional( + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + keras::layer_lstm( + units = layer_lstm[i], + return_sequences = TRUE, + dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, + recurrent_activation = "sigmoid" + ) + ) + } + } else { + for (i in 1:(layers.lstm - 1)) { + output_tensor <- output_tensor %>% + keras::layer_lstm( + layer_lstm[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + return_sequences = TRUE, + dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, + recurrent_activation = "sigmoid" + ) + } + } + } + # last LSTM layer + if (bidirectional) { + output_tensor <- output_tensor %>% + keras::bidirectional( + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, recurrent_activation = "sigmoid") + ) + } else { + output_tensor <- output_tensor %>% + keras::layer_lstm(units = layer_lstm[length(layer_lstm)], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful, + recurrent_activation = "sigmoid") + } + } + + if (gap) { + if (layers.lstm != 0) { + stop("Global average pooling not compatible with using LSTM layer") + } + output_tensor <- output_tensor %>% keras::layer_global_average_pooling_1d() + } else { + if (layers.lstm == 0) { + output_tensor <- output_tensor %>% keras::layer_flatten() + } + } + + if (!is.null(label_input)) { + input_label_list <- list() + for (i in 1:length(label_input)) { + if (!stateful) { + eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(c(label_input[i]))"))) + } else { + eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(batch_shape = c(batch_size, label_input[i]))"))) + } + input_label_list[[i]] <- eval(parse(text = paste0("label_input_layer_", as.character(i)))) + } + output_tensor <- keras::layer_concatenate(c( + input_label_list, output_tensor + ) + ) + } + + if (length(layer_dense) > 1) { + for (i in 1:(length(layer_dense) - 1)) { + if (!is.null(dropout_dense)) output_tensor <- output_tensor %>% keras::layer_dropout(dropout_dense[i]) + output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu") + } + } + + if (num_output_layers == 1) { + if (!is.null(dropout_dense)) output_tensor <- output_tensor %>% keras::layer_dropout(dropout_dense[length(dropout_dense)]) + output_tensor <- output_tensor %>% + keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32") + } else { + output_list <- list() + for (i in 1:num_output_layers) { + layer_name <- paste0("output_", i, "_", num_output_layers) + if (!is.null(dropout_dense)) { + output_list[[i]] <- output_tensor %>% keras::layer_dropout(dropout_dense[length(dropout_dense)]) + output_list[[i]] <- output_list[[i]] %>% + keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32") + } else { + output_list[[i]] <- output_tensor %>% + keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32") + } + } + } + + if (!is.null(label_input)) { + label_inputs <- list() + for (i in 1:length(label_input)) { + eval(parse(text = paste0("label_inputs$label_input_layer_", as.character(i), "<- label_input_layer_", as.character(i)))) + } + if (num_output_layers == 1) { + model <- keras::keras_model(inputs = list(label_inputs, input_tensor), outputs = output_tensor) + } else { + model <- keras::keras_model(inputs = list(label_inputs, input_tensor), outputs = output_list) + } + } else { + if (num_output_layers == 1) { + model <- keras::keras_model(inputs = input_tensor, outputs = output_tensor) + } else { + model <- keras::keras_model(inputs = input_tensor, outputs = output_list) + } + } + + if (compile) { + model <- compile_model(model = model, label_smoothing = label_smoothing, layer_dense = layer_dense, + solver = solver, learning_rate = learning_rate, loss_fn = loss_fn, + num_output_layers = num_output_layers, label_noise_matrix = label_noise_matrix, + bal_acc = bal_acc, f1_metric = f1_metric, auc_metric = auc_metric) + } + + argg <- c(as.list(environment())) + model <- add_hparam_list(model, argg) + + if (verbose) model$summary() + return(model) +} + + +#' @title Create LSTM/CNN network to predict middle part of a sequence +#' +#' @description +#' Creates a network consisting of an arbitrary number of CNN, LSTM and dense layers. +#' Function creates two sub networks consisting each of (optional) CNN layers followed by an arbitrary number of LSTM layers. Afterwards the last LSTM layers +#' get concatenated and followed by one or more dense layers. Last layer is a dense layer. +#' Network tries to predict target in the middle of a sequence. If input is AACCTAAGG, input tensors should correspond to x1 = AACC, x2 = GGAA and y = T. +#' +#' @inheritParams create_model_lstm_cnn +#' @examplesIf reticulate::py_module_available("tensorflow") +#' create_model_lstm_cnn_target_middle( +#' maxlen = 500, +#' vocabulary_size = 4, +#' kernel_size = c(8, 8, 8), +#' filters = c(16, 32, 64), +#' pool_size = c(3, 3, 3), +#' layer_lstm = c(32, 64), +#' layer_dense = c(128, 4), +#' learning_rate = 0.001) +#' +#' @returns A keras model with two input layers. Consists of LSTN, CNN and dense layers. +#' @export +create_model_lstm_cnn_target_middle <- function( + maxlen = 50, + dropout_lstm = 0, + recurrent_dropout_lstm = 0, + layer_lstm = 128, + solver = "adam", + learning_rate = 0.001, + vocabulary_size = 4, + bidirectional = FALSE, + stateful = FALSE, + batch_size = NULL, + padding = "same", + compile = TRUE, + layer_dense = NULL, + kernel_size = NULL, + filters = NULL, + pool_size = NULL, + strides = NULL, + label_input = NULL, + zero_mask = FALSE, + label_smoothing = 0, + label_noise_matrix = NULL, + last_layer_activation = "softmax", + loss_fn = "categorical_crossentropy", + num_output_layers = 1, + f1_metric = FALSE, + auc_metric = FALSE, + bal_acc = FALSE, + verbose = TRUE, + batch_norm_momentum = 0.99, + model_seed = NULL, + mixed_precision = FALSE, + mirrored_strategy = NULL) { + + if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16") + + if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE) + if (mirrored_strategy) { + mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy() + with(mirrored_strategy$scope(), { + argg <- as.list(environment()) + argg$mirrored_strategy <- FALSE + model <- do.call(create_model_lstm_cnn_target_middle, argg) + }) + return(model) + } + + layer_dense <- as.integer(layer_dense) + if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed) + use.cnn <- ifelse(!is.null(kernel_size), TRUE, FALSE) + num_targets <- layer_dense[length(layer_dense)] + layers.lstm <- length(layer_lstm) + + stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) == layers.lstm)) + stopifnot(maxlen > 0) + stopifnot(dropout_lstm <= 1 & dropout_lstm >= 0) + stopifnot(recurrent_dropout_lstm <= 1 & recurrent_dropout_lstm >= 0) + + if (!is.null(layer_lstm)) { + stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) == layers.lstm)) + } + + if (is.null(strides)) { + strides <- rep(1L, length(filters)) + } + + if (use.cnn) { + same_length <- (length(kernel_size) == length(filters)) & (length(filters) == length(strides)) + if (!same_length) { + stop("kernel_size, filters and strides must have the same length") + } + } + + # length of split sequences + maxlen_1 <- ceiling(maxlen/2) + maxlen_2 <- floor(maxlen/2) + if (stateful) { + input_tensor_1 <- keras::layer_input(batch_shape = c(batch_size, maxlen_1, vocabulary_size)) + } else { + input_tensor_1 <- keras::layer_input(shape = c(maxlen_1, vocabulary_size)) + } + + if (use.cnn) { + for (i in 1:length(filters)) { + if (i == 1) { + output_tensor_1 <- input_tensor_1 %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i], + strides = strides[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL) + ) + if (!is.null(pool_size) && pool_size[i] > 1) { + output_tensor_1 <- output_tensor_1 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i]) + } + output_tensor_1 <- output_tensor_1 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } else { + output_tensor_1 <- output_tensor_1 %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + strides = strides[i], + filters = filters[i] + ) + if (!is.null(pool_size) && pool_size[i] > 1) { + output_tensor_1 <- output_tensor_1 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i]) + } + output_tensor_1 <- output_tensor_1 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } + } + } else { + if (zero_mask) { + output_tensor_1 <- input_tensor_1 %>% keras::layer_masking() + } else { + output_tensor_1 <- input_tensor_1 + } + } + + # lstm layers + if (!is.null(layers.lstm) && layers.lstm > 0) { + if (layers.lstm > 1) { + if (bidirectional) { + for (i in 1:(layers.lstm - 1)) { + output_tensor_1 <- output_tensor_1 %>% + keras::bidirectional( + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + keras::layer_lstm( + units = layer_lstm[i], + return_sequences = TRUE, + dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, + recurrent_activation = "sigmoid" + ) + ) + } + } else { + for (i in 1:(layers.lstm - 1)) { + output_tensor_1 <- output_tensor_1 %>% + keras::layer_lstm( + units = layer_lstm[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + return_sequences = TRUE, + dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, + recurrent_activation = "sigmoid" + ) + } + } + } + + # last LSTM layer + if (bidirectional) { + output_tensor_1 <- output_tensor_1 %>% + keras::bidirectional( + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, recurrent_activation = "sigmoid") + ) + } else { + output_tensor_1 <- output_tensor_1 %>% + keras::layer_lstm(units = layer_lstm[length(layer_lstm)], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful, + recurrent_activation = "sigmoid") + } + } + + if (stateful) { + input_tensor_2 <- keras::layer_input(batch_shape = c(batch_size, maxlen_2, vocabulary_size)) + } else { + input_tensor_2 <- keras::layer_input(shape = c(maxlen_2, vocabulary_size)) + } + + if (use.cnn) { + for (i in 1:length(filters)) { + if (i == 1) { + output_tensor_2 <- input_tensor_2 %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + filters = filters[i], + strides = strides[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL) + ) + if (!is.null(pool_size) && pool_size[i] > 1) { + output_tensor_2 <- output_tensor_2 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i]) + } + output_tensor_2 <- output_tensor_2 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } else { + output_tensor_2 <- output_tensor_2 %>% + keras::layer_conv_1d( + kernel_size = kernel_size[i], + padding = padding, + activation = "relu", + strides = strides[i], + filters = filters[i] + ) + if (!is.null(pool_size) && pool_size[i] > 1) { + output_tensor_2 <- output_tensor_2 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i]) + } + output_tensor_2 <- output_tensor_2 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum) + } + } + } else { + if (zero_mask) { + output_tensor_2 <- input_tensor_2 %>% keras::layer_masking() + } else { + output_tensor_2 <- input_tensor_2 + } + } + + + # lstm layers + if (!is.null(layers.lstm) && layers.lstm > 0) { + if (layers.lstm > 1) { + if (bidirectional) { + for (i in 1:(layers.lstm - 1)) { + output_tensor_2 <- output_tensor_2 %>% + keras::bidirectional( + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + keras::layer_lstm( + units = layer_lstm[i], + return_sequences = TRUE, + dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, + recurrent_activation = "sigmoid" + ) + ) + } + } else { + for (i in 1:(layers.lstm - 1)) { + output_tensor_2 <- output_tensor_2 %>% + keras::layer_lstm( + units = layer_lstm[i], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + return_sequences = TRUE, + dropout = dropout_lstm, + recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, + recurrent_activation = "sigmoid" + ) + } + } + } + + # last LSTM layer + if (bidirectional) { + output_tensor_2 <- output_tensor_2 %>% + keras::bidirectional( + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, + stateful = stateful, recurrent_activation = "sigmoid") + ) + } else { + output_tensor_2 <- output_tensor_2 %>% + keras::layer_lstm(units = layer_lstm[length(layer_lstm)], + input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL), + dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful, + recurrent_activation = "sigmoid") + } + } + + output_tensor <- keras::layer_concatenate(list(output_tensor_1, output_tensor_2)) + + if (layers.lstm == 0) { + output_tensor <- output_tensor %>% keras::layer_flatten() + } + + if (!is.null(label_input)) { + input_label_list <- list() + for (i in 1:length(label_input)) { + if (!stateful) { + eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(c(label_input[i]))"))) + } else { + eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(batch_shape = c(batch_size, label_input[i]))"))) + } + input_label_list[[i]] <- eval(parse(text = paste0("label_input_layer_", as.character(i)))) + } + output_tensor <- keras::layer_concatenate(c( + input_label_list, output_tensor + ) + ) + } + + if (length(layer_dense) > 1) { + for (i in 1:(length(layer_dense) - 1)) { + output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu") + } + } + + if (num_output_layers == 1) { + output_tensor <- output_tensor %>% + keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32") + } else { + output_list <- list() + for (i in 1:num_output_layers) { + layer_name <- paste0("output_", i, "_", num_output_layers) + output_list[[i]] <- output_tensor %>% + keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32") + } + } + + # print model layout to screen + if (!is.null(label_input)) { + label_inputs <- list() + for (i in 1:length(label_input)) { + eval(parse(text = paste0("label_inputs$label_input_layer_", as.character(i), "<- label_input_layer_", as.character(i)))) + } + model <- keras::keras_model(inputs = c(label_inputs, input_tensor_1, input_tensor_2), outputs = output_tensor) + } else { + model <- keras::keras_model(inputs = list(input_tensor_1, input_tensor_2), outputs = output_tensor) + } + + if (compile) { + model <- compile_model(model = model, label_smoothing = label_smoothing, layer_dense = layer_dense, + solver = solver, learning_rate = learning_rate, loss_fn = loss_fn, + num_output_layers = num_output_layers, label_noise_matrix = label_noise_matrix, + bal_acc = bal_acc, f1_metric = f1_metric, auc_metric = auc_metric) + } + + argg <- c(as.list(environment())) + model <- add_hparam_list(model, argg) + reticulate::py_set_attr(x = model, name = "hparam", value = model$hparam) + + if (verbose) model$summary() + return(model) +}