Switch to side-by-side view

--- a
+++ b/R/create_model_set_learning.R
@@ -0,0 +1,610 @@
+#' @title Create LSTM/CNN network for combining multiple sequences 
+#' 
+#' @description Creates a network consisting of an arbitrary number of CNN, LSTM and dense layers.
+#' Input is a 4D tensor, where axis correspond to:
+#' \enumerate{
+#'    \item batch size
+#'    \item number of samples in one batch
+#'    \item length of one sample
+#'    \item size of vocabulary
+#' }
+#' After LSTM/CNN part all representations get aggregated by summation.
+#' Can be used to make single prediction for combination of multiple input sequences. Architecture
+#' is equivalent to \link{create_model_lstm_cnn_multi_input} but instead of multiple input layers with 3D input, 
+#' input here in one 4D tensor.  
+#'     
+#' @inheritParams create_model_lstm_cnn
+#' @param samples_per_target Number of samples to combine for one target.
+#' @param aggregation_method At least one of the options `"sum", "mean", "max"`.
+#' @param gap_time_dist Pooling or flatten method after last time distribution wrapper. Same options as for `flatten_method` argument
+#' in \link{create_model_transformer} function.
+#' @param lstm_time_dist Vector containing number of units per LSTM cell. Applied after time distribution part. 
+#' @param transformer_args List of arguments for transformer blocks; see \link{layer_transformer_block_wrapper}.
+#' Additionally, list can contain `pool_flatten` argument to apply global pooling or flattening after last transformer block (same options
+#' as `flatten_method` argument in \link{create_model_transformer} function).
+#' @examplesIf reticulate::py_module_available("tensorflow")
+#' 
+#' # Examples needs keras attached to run 
+#' maxlen <- 50
+#' \donttest{
+#' library(keras)
+#' create_model_lstm_cnn_time_dist(
+#'   maxlen = maxlen,
+#'   vocabulary_size = 4,
+#'   samples_per_target = 7,
+#'   kernel_size = c(10, 10),
+#'   filters = c(64, 128),
+#'   pool_size = c(2, 2),
+#'   layer_lstm = c(32),
+#'   aggregation_method = c("max"),
+#'   layer_dense = c(64, 2),
+#'   learning_rate = 0.001)
+#'  }
+#'  
+#' @returns A keras model with time distribution wrapper applied to LSTM and CNN layers.   
+#' @export
+create_model_lstm_cnn_time_dist <- function(
+    maxlen = 50,
+    dropout_lstm = 0,
+    recurrent_dropout_lstm = 0,
+    layer_lstm = NULL,
+    layer_dense = c(4),
+    solver = "adam",
+    learning_rate = 0.001,
+    vocabulary_size = 4,
+    bidirectional = FALSE,
+    stateful = FALSE,
+    batch_size = NULL,
+    compile = TRUE,
+    kernel_size = NULL,
+    filters = NULL,
+    strides = NULL,
+    pool_size = NULL,
+    padding = "same",
+    dilation_rate = NULL,
+    gap_time_dist = NULL,
+    use_bias = TRUE,
+    zero_mask = FALSE,
+    label_smoothing = 0,
+    label_noise_matrix = NULL,
+    last_layer_activation = "softmax",
+    loss_fn = "categorical_crossentropy",
+    auc_metric = FALSE,
+    f1_metric = FALSE,
+    samples_per_target,
+    batch_norm_momentum = 0.99,
+    verbose = TRUE,
+    model_seed = NULL,
+    aggregation_method = NULL, 
+    transformer_args = NULL,
+    lstm_time_dist = NULL,
+    mixed_precision = FALSE,
+    bal_acc = FALSE,
+    mirrored_strategy = NULL) {
+  
+  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
+  
+  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
+  if (mirrored_strategy) {
+    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
+    with(mirrored_strategy$scope(), { 
+      argg <- as.list(environment())
+      argg$mirrored_strategy <- FALSE
+      model <- do.call(create_model_lstm_cnn_time_dist, argg)
+    })
+    return(model)
+  }
+  
+  #stopifnot(aggregation_method %in% c("sum", "lstm", "lstm_sum"))
+  
+  layer_dense <- as.integer(layer_dense)
+  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
+  num_output_layers <- 1
+  num_input_layers <- 1
+  
+  num_targets <- layer_dense[length(layer_dense)]
+  layers.lstm <- length(layer_lstm)
+  use.cnn <- ifelse(!is.null(kernel_size), TRUE, FALSE)
+  
+  if (!is.null(layer_lstm)) {
+    stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) ==  layers.lstm))
+  }
+  
+  if (layers.lstm == 0 & !use.cnn) {
+    stop("Model does not use LSTM or CNN layers.")
+  }
+  
+  if (is.null(strides)) strides <- rep(1L, length(filters))
+  if (is.null(dilation_rate) & use.cnn) dilation_rate <- rep(1L, length(filters))
+  
+  if (use.cnn) {
+    same_length <- (length(kernel_size) == length(filters)) &
+      (length(filters) == length(strides)) &
+      (length(strides) == length(dilation_rate))
+    if (!same_length) {
+      stop("kernel_size, filters, dilation_rate and strides must have the same length")
+    }
+  }
+  
+  stopifnot(maxlen > 0)
+  stopifnot(dropout_lstm <= 1 & dropout_lstm >= 0)
+  stopifnot(recurrent_dropout_lstm <= 1 & recurrent_dropout_lstm >= 0)
+  
+  if (length(layer_lstm) == 1) {
+    layer_lstm <- rep(layer_lstm, layers.lstm)
+  }
+  
+  if (stateful) {
+    input_tensor <- keras::layer_input(batch_shape = c(batch_size, maxlen, vocabulary_size))
+  } else {
+    input_tensor <- keras::layer_input(shape = c(samples_per_target, maxlen, vocabulary_size))
+  }
+  
+  if (use.cnn) {
+    for (i in 1:length(filters)) {
+      if (i == 1) {
+        output_tensor <- input_tensor %>%
+          keras::time_distributed(keras::layer_conv_1d(
+            kernel_size = kernel_size[i],
+            padding = padding,
+            activation = "relu",
+            filters = filters[i],
+            strides = strides[i],
+            dilation_rate = dilation_rate[i],
+            input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
+            use_bias = use_bias
+          ))
+        if (!is.null(pool_size) && pool_size[i] > 1) {
+          output_tensor <- output_tensor %>% keras::time_distributed(keras::layer_max_pooling_1d(pool_size = pool_size[i]))
+        }
+        output_tensor <- output_tensor %>% keras::time_distributed(keras::layer_batch_normalization(momentum = batch_norm_momentum))
+      } else {
+        
+        output_tensor <- output_tensor %>%
+          keras::time_distributed(keras::layer_conv_1d(
+            kernel_size = kernel_size[i],
+            padding = padding,
+            activation = "relu",
+            filters = filters[i],
+            strides = strides[i],
+            dilation_rate = dilation_rate[i],
+            input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
+            use_bias = use_bias
+          ))
+        output_tensor <- output_tensor %>% keras::time_distributed(keras::layer_batch_normalization(momentum = batch_norm_momentum))
+        
+        if (!is.null(pool_size) && pool_size[i] > 1) {
+          output_tensor <- output_tensor %>% keras::time_distributed(keras::layer_max_pooling_1d(pool_size = pool_size[i]))
+        }
+        #output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
+      }
+    }
+  } else {
+    if (zero_mask) {
+      output_tensor <- input_tensor %>% keras::time_distributed(keras::layer_masking())
+    } else {
+      output_tensor <- input_tensor
+    }
+  }
+  # lstm layers
+  if (layers.lstm > 0) {
+    if (layers.lstm > 1) {
+      if (bidirectional) {
+        for (i in 1:(layers.lstm - 1)) {
+          output_tensor <- output_tensor %>%
+            keras::time_distributed(keras::bidirectional(
+              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
+              keras::layer_lstm(
+                units = layer_lstm[i],
+                return_sequences = TRUE,
+                dropout = dropout_lstm,
+                recurrent_dropout = recurrent_dropout_lstm,
+                stateful = stateful,
+                recurrent_activation = "sigmoid"
+              )
+            ))
+        }
+      } else {
+        for (i in 1:(layers.lstm - 1)) {
+          output_tensor <- output_tensor %>%
+            keras::time_distributed(keras::layer_lstm(
+              units = layer_lstm[i],
+              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
+              return_sequences = TRUE,
+              dropout = dropout_lstm,
+              recurrent_dropout = recurrent_dropout_lstm,
+              stateful = stateful,
+              recurrent_activation = "sigmoid"
+            ))
+        }
+      }
+    }
+    # last LSTM layer
+    if (bidirectional) {
+      output_tensor <- output_tensor %>%
+        keras::time_distributed(keras::bidirectional(
+          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
+          keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm,
+                            stateful = stateful, recurrent_activation = "sigmoid")
+        ))
+    } else {
+      output_tensor <- output_tensor %>%
+        keras::time_distributed(keras::layer_lstm(units = layer_lstm[length(layer_lstm)],
+                                                  input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
+                                                  dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful,
+                                                  recurrent_activation = "sigmoid"))
+    }
+  }
+  
+  if (!is.null(gap_time_dist)) {
+    if (layers.lstm != 0) {
+      stop("Global average pooling not compatible with using LSTM layer")
+    }
+    output_tensor <- pooling_flatten_time_dist(gap_time_dist, output_tensor)
+  } else {
+    if (layers.lstm == 0) {
+      output_tensor <- output_tensor %>% keras::time_distributed(keras::layer_flatten())
+    }
+  }
+  
+  num_aggr_layers <- 0
+  aggr_layer_list <- list()
+  
+  if (!is.null(transformer_args)) {
+    num_aggr_layers <- num_aggr_layers + 1
+    for (i in seq_along(transformer_args$num_heads)) {
+      attn_block <- layer_transformer_block_wrapper(
+        num_heads = as.integer(transformer_args$num_heads[i]),
+        head_size = as.integer(transformer_args$head_size[i]),
+        dropout_rate = transformer_args$dropout[i],
+        ff_dim = as.integer(transformer_args$ff_dim[i]),
+        embed_dim = as.integer(output_tensor$shape[[3]]),
+        vocabulary_size = as.integer(output_tensor$shape[[2]]),
+        load_r6 = FALSE)
+      if (i == 1) {
+        output_tensor_attn <- output_tensor %>% attn_block
+      } else {
+        output_tensor_attn <- output_tensor_attn %>% attn_block
+      }
+    }
+    output_tensor_attn <- pooling_flatten(transformer_args$pool_flatten, output_tensor_attn)
+    aggr_layer_list[[num_aggr_layers]] <- output_tensor_attn
+  }
+  
+  if (!is.null(aggregation_method)) {
+    num_aggr_layers <- num_aggr_layers + 1
+    layer_aggregate_td <- layer_aggregate_time_dist_wrapper(method = aggregation_method)
+    output_tensor_aggregation_sum <- output_tensor %>% layer_aggregate_td
+    aggr_layer_list[[num_aggr_layers]] <- output_tensor_aggregation_sum
+  }
+  
+  if (!is.null(lstm_time_dist)) {
+    num_aggr_layers <- num_aggr_layers + 1
+    return_sequences <- TRUE
+    for (i in 1:length(lstm_time_dist)) {
+      if (i == length(lstm_time_dist)) {
+        return_sequences <- FALSE 
+      } 
+      if (i == 1) {
+        output_tensor_lstm <- output_tensor %>% keras::layer_lstm(units=lstm_time_dist[i], return_sequences = return_sequences)
+      } else {
+        output_tensor_lstm <- output_tensor_lstm %>% keras::layer_lstm(units=lstm_time_dist[i], return_sequences = return_sequences)
+      }
+    }
+    aggr_layer_list[[num_aggr_layers]] <- output_tensor_lstm
+  }
+  
+  if (num_aggr_layers == 0) {
+    stop("You need to choose an aggregation method, either with aggregation_method, transformer_args or lstm_time_dist.")
+  }
+  
+  if (num_aggr_layers == 1) {
+    output_tensor <- aggr_layer_list[[1]]
+  }
+  
+  if (num_aggr_layers > 1) {
+    output_tensor <- keras::layer_concatenate(aggr_layer_list) 
+  }
+  
+  if (length(layer_dense) > 1) {
+    for (i in 1:(length(layer_dense) - 1)) {
+      output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu")
+    }
+  }
+  
+  if (num_output_layers == 1) {
+    output_tensor <- output_tensor %>%
+      keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32")
+  } else {
+    output_list <- list()
+    for (i in 1:num_output_layers) {
+      layer_name <- paste0("output_", i, "_", num_output_layers)
+      output_list[[i]] <- output_tensor %>%
+        keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32")
+    }
+  }
+  
+  if (num_output_layers == 1) {
+    model <- keras::keras_model(inputs = input_tensor, outputs = output_tensor)
+  } else {
+    model <- keras::keras_model(inputs = input_tensor, outputs = output_list)
+  }
+  
+  if (compile) {
+    model <- compile_model(model = model, label_smoothing = label_smoothing, layer_dense = layer_dense,
+                           solver = solver, learning_rate = learning_rate, loss_fn = loss_fn, 
+                           num_output_layers = num_output_layers, label_noise_matrix = label_noise_matrix,
+                           bal_acc = bal_acc, f1_metric = f1_metric, auc_metric = auc_metric)
+  }
+  
+  argg <- as.list(environment())
+  model <- add_hparam_list(model, argg)
+  
+  if (verbose) model$summary()
+  return(model)
+}
+
+#' @title Create LSTM/CNN network that can process multiple samples for one target
+#'
+#' @description Creates a network consisting of an arbitrary number of CNN, LSTM and dense layers with multiple 
+#' input layers. After LSTM/CNN part all representations get aggregated by summation. 
+#' Can be used to make single prediction for combination of multiple input sequences. 
+#' Implements approach as described [here](https://arxiv.org/abs/1703.06114)
+#'
+#' @inheritParams create_model_lstm_cnn
+#' @inheritParams create_model_lstm_cnn_time_dist
+#' @param samples_per_target Number of samples to combine for one target.
+#' @param dropout_dense Vector of dropout rates between dense layers. No dropout if `NULL`.
+#' @param gap_inputs Global pooling method to apply. Same options as for `flatten_method` argument
+#' in \link{create_model_transformer} function.
+#' @examplesIf reticulate::py_module_available("tensorflow")
+#' 
+#' # Examples needs keras attached to run 
+#' maxlen <- 50
+#' \donttest{
+#' library(keras)
+#' create_model_lstm_cnn_multi_input(
+#'   maxlen = maxlen,
+#'   vocabulary_size = 4,
+#'   samples_per_target = 7,
+#'   kernel_size = c(10, 10),
+#'   filters = c(64, 128),
+#'   pool_size = c(2, 2),
+#'   layer_lstm = c(32),
+#'   layer_dense = c(64, 2),
+#'   aggregation_method = c("max"),
+#'   learning_rate = 0.001)
+#'  }  
+#'  
+#' @returns A keras model with multiple input layers. Input goes through shared LSTM/CNN layers.   
+#' @export
+create_model_lstm_cnn_multi_input <- function(
+    maxlen = 50,
+    dropout_lstm = 0,
+    recurrent_dropout_lstm = 0,
+    layer_lstm = NULL,
+    layer_dense = c(4),
+    dropout_dense = NULL,
+    solver = "adam",
+    learning_rate = 0.001,
+    vocabulary_size = 4,
+    bidirectional = FALSE,
+    batch_size = NULL,
+    compile = TRUE,
+    kernel_size = NULL,
+    filters = NULL,
+    strides = NULL,
+    pool_size = NULL,
+    padding = "same",
+    dilation_rate = NULL,
+    gap_inputs = NULL,
+    use_bias = TRUE,
+    zero_mask = FALSE,
+    label_smoothing = 0,
+    label_noise_matrix = NULL,
+    last_layer_activation = "softmax",
+    loss_fn = "categorical_crossentropy",
+    auc_metric = FALSE,
+    f1_metric = FALSE,
+    bal_acc = FALSE,
+    samples_per_target,
+    batch_norm_momentum = 0.99,
+    aggregation_method = c('sum'), 
+    verbose = TRUE,
+    model_seed = NULL,
+    mixed_precision = FALSE,
+    mirrored_strategy = NULL) {
+  
+  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
+  
+  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
+  if (mirrored_strategy) {
+    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
+    with(mirrored_strategy$scope(), { 
+      argg <- as.list(environment())
+      argg$mirrored_strategy <- FALSE
+      model <- do.call(create_model_lstm_cnn_multi_input, argg)
+    })
+    return(model)
+  }
+  
+  layer_dense <- as.integer(layer_dense)
+  if (!is.null(dropout_dense)) stopifnot(length(dropout_dense) == length(layer_dense))
+  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
+  num_targets <- layer_dense[length(layer_dense)]
+  layers.lstm <- length(layer_lstm)
+  use.cnn <- ifelse(!is.null(kernel_size), TRUE, FALSE)
+  num_output_layers = 1
+  
+  if (!is.null(layer_lstm)) {
+    stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) ==  layers.lstm))
+  }
+  
+  if (layers.lstm == 0 & !use.cnn) {
+    stop("Model does not use LSTM or CNN layers.")
+  }
+  
+  if (is.null(strides)) strides <- rep(1L, length(filters))
+  if (is.null(dilation_rate) & use.cnn) dilation_rate <- rep(1L, length(filters))
+  
+  if (use.cnn) {
+    same_length <- (length(kernel_size) == length(filters)) &
+      (length(filters) == length(strides)) &
+      (length(strides) == length(dilation_rate))
+    if (!same_length) {
+      stop("kernel_size, filters, dilation_rate and strides must have the same length")
+    }
+  }
+  
+  stopifnot(maxlen > 0)
+  stopifnot(dropout_lstm <= 1 & dropout_lstm >= 0)
+  stopifnot(recurrent_dropout_lstm <= 1 & recurrent_dropout_lstm >= 0)
+  
+  if (length(layer_lstm) == 1) {
+    layer_lstm <- rep(layer_lstm, layers.lstm)
+  }
+  
+  input_tensor <- keras::layer_input(shape = c(maxlen, vocabulary_size))
+  
+  if (use.cnn) {
+    for (i in 1:length(filters)) {
+      if (i == 1) {
+        output_tensor <- input_tensor %>%
+          keras::layer_conv_1d(
+            kernel_size = kernel_size[i],
+            padding = padding,
+            activation = "relu",
+            filters = filters[i],
+            strides = strides[i],
+            dilation_rate = dilation_rate[i],
+            input_shape = c(maxlen, vocabulary_size),
+            use_bias = use_bias
+          )
+        if (!is.null(pool_size) && pool_size[i] > 1) {
+          output_tensor <- output_tensor %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
+        }
+        output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
+      } else {
+        
+        output_tensor <- output_tensor %>%
+          keras::layer_conv_1d(
+            kernel_size = kernel_size[i],
+            padding = padding,
+            activation = "relu",
+            filters = filters[i],
+            strides = strides[i],
+            dilation_rate = dilation_rate[i],
+            input_shape = c(maxlen, vocabulary_size),
+            use_bias = use_bias
+          )
+        output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
+        
+        if (!is.null(pool_size) && pool_size[i] > 1) {
+          output_tensor <- output_tensor %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
+        }
+        
+      }
+    }
+  } else {
+    if (zero_mask) {
+      output_tensor <- input_tensor %>% keras::layer_masking()
+    } else {
+      output_tensor <- input_tensor
+    }
+  }
+  # lstm layers
+  if (layers.lstm > 0) {
+    if (layers.lstm > 1) {
+      if (bidirectional) {
+        for (i in 1:(layers.lstm - 1)) {
+          output_tensor <- output_tensor %>%
+            keras::bidirectional(
+              input_shape = c(maxlen, vocabulary_size),
+              keras::layer_lstm(
+                units = layer_lstm[i],
+                return_sequences = TRUE,
+                dropout = dropout_lstm,
+                recurrent_dropout = recurrent_dropout_lstm,
+                recurrent_activation = "sigmoid"
+              )
+            )
+        }
+      } else {
+        for (i in 1:(layers.lstm - 1)) {
+          output_tensor <- output_tensor %>%
+            keras::layer_lstm(
+              units = layer_lstm[i],
+              input_shape = c(maxlen, vocabulary_size),
+              return_sequences = TRUE,
+              dropout = dropout_lstm,
+              recurrent_dropout = recurrent_dropout_lstm,
+              recurrent_activation = "sigmoid"
+            )
+        }
+      }
+    }
+    # last LSTM layer
+    if (bidirectional) {
+      output_tensor <- output_tensor %>%
+        keras::bidirectional(
+          input_shape = c(maxlen, vocabulary_size),
+          keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm,
+                            recurrent_activation = "sigmoid")
+        )
+    } else {
+      output_tensor <- output_tensor %>%
+        keras::layer_lstm(units = layer_lstm[length(layer_lstm)],
+                          input_shape = c(maxlen, vocabulary_size),
+                          dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm,
+                          recurrent_activation = "sigmoid")
+    }
+  }
+  
+  if (!is.null(gap_inputs)) {
+    if (layers.lstm != 0) {
+      stop("Global average pooling not compatible with using LSTM layer")
+    }
+    output_tensor <- output_tensor %>% pooling_flatten(global_pooling = gap_inputs)
+  } else {
+    if (layers.lstm == 0) {
+      output_tensor <- output_tensor %>% keras::layer_flatten()
+    }
+  }
+  
+  feature_ext_model <- keras::keras_model(inputs = input_tensor, outputs = output_tensor)
+  
+  input_list <- list()
+  representation_list <- list()
+  for (i in 1:samples_per_target) {
+    input_list[[i]] <- keras::layer_input(shape = c(maxlen, vocabulary_size), name = paste0("input_", i))
+    representation_list[[i]] <- feature_ext_model(input_list[[i]])
+  }
+  
+  if (!is.null(aggregation_method)) {
+    layer_aggregate_td <- layer_aggregate_time_dist_wrapper(method = aggregation_method, multi_in = TRUE)
+    y <- representation_list %>% layer_aggregate_td
+  }
+  
+  if (length(layer_dense) > 1) {
+    for (i in 1:(length(layer_dense) - 1)) {
+      if (!is.null(dropout_dense)) y <- y %>% keras::layer_dropout(dropout_dense[i])
+      y <- y %>% keras::layer_dense(units = layer_dense[i], activation = "relu")
+    }
+  }
+  
+  y <- y %>% keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32")
+  model <- keras::keras_model(inputs = input_list, outputs = y)
+  
+  if (compile) {
+    model <- compile_model(model = model, label_smoothing = label_smoothing, layer_dense = layer_dense,
+                           solver = solver, learning_rate = learning_rate, loss_fn = loss_fn, 
+                           num_output_layers = num_output_layers, label_noise_matrix = label_noise_matrix,
+                           bal_acc = bal_acc, f1_metric = f1_metric, auc_metric = auc_metric)
+  }
+  
+  argg <- c(as.list(environment()))
+  model <- add_hparam_list(model, argg)
+  
+  if (verbose) model$summary()
+  return(model)
+}