Switch to side-by-side view

--- a
+++ b/man/create_model_genomenet.Rd
@@ -0,0 +1,214 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/create_model_genomenet.R
+\name{create_model_genomenet}
+\alias{create_model_genomenet}
+\title{Create GenomeNet Model with Given Architecture Parameters}
+\usage{
+create_model_genomenet(
+  maxlen = 300,
+  learning_rate = 0.001,
+  number_of_cnn_layers = 1,
+  conv_block_count = 1,
+  kernel_size_0 = 16,
+  kernel_size_end = 16,
+  filters_0 = 256,
+  filters_end = 512,
+  dilation_end = 1,
+  max_pool_end = 1,
+  dense_layer_num = 1,
+  dense_layer_units = 100,
+  dropout_lstm = 0,
+  dropout = 0,
+  batch_norm_momentum = 0.8,
+  leaky_relu_alpha = 0,
+  dense_activation = "relu",
+  skip_block_fraction = 0,
+  residual_block = FALSE,
+  reverse_encoding = FALSE,
+  optimizer = "adam",
+  model_type = "gap",
+  recurrent_type = "lstm",
+  recurrent_layers = 1,
+  recurrent_bidirectional = FALSE,
+  recurrent_units = 100,
+  vocabulary_size = 4,
+  last_layer_activation = "softmax",
+  loss_fn = "categorical_crossentropy",
+  auc_metric = FALSE,
+  num_targets = 2,
+  model_seed = NULL,
+  bal_acc = FALSE,
+  f1_metric = FALSE,
+  mixed_precision = FALSE,
+  mirrored_strategy = NULL
+)
+}
+\arguments{
+\item{maxlen}{(integer \code{numeric(1)})\cr
+Input sequence length.}
+
+\item{learning_rate}{(\code{numeric(1)})\cr
+Used by the \code{keras} optimizer that is specified by \code{optimizer}.}
+
+\item{number_of_cnn_layers}{(integer \code{numeric(1)})\cr
+Target number of CNN-layers to use in total. If \code{number_of_cnn_layers} is
+greater than \code{conv_block_count}, then the effective number of CNN layers
+is set to the closest integer that is divisible by \code{conv_block_count}.}
+
+\item{conv_block_count}{(integer \code{numeric(1)})\cr
+Number of convolutional blocks, into which the CNN layers are divided.
+If this is greater than \code{number_of_cnn_layers}, then it is set to
+\code{number_of_cnn_layers} (the convolutional block size will then be 1).\cr
+Convolutional blocks are used when \code{model_type} is \code{"gap"} (the output of
+the last \code{conv_block_count * (1 - skip_block_fraction)} blocks is
+fed to global average pooling and then concatenated), and also when
+\code{residual_block} is \code{TRUE} (the number of filters is held constant within
+blocks). If neither of these is the case, \code{conv_block_count} has little
+effect besides the fact that \code{number_of_cnn_layers} is set to the closest
+integer divisible by \code{conv_block_count}.}
+
+\item{kernel_size_0}{(\code{numeric(1)})\cr
+Target CNN kernel size of the first CNN-layer. Although CNN kernel size is
+always an integer, this value can be non-integer, potentially affecting
+the kernel-sizes of intermediate layers (which are geometrically
+interpolated between \code{kernel_size_0} and \code{kernel_size_end}).}
+
+\item{kernel_size_end}{(\code{numeric(1)})\cr
+Target CNN kernel size of the last CNN-layer; ignored if only one
+CNN-layer is used (i.e. if \code{number_of_cnn_layers} is 1). Although CNN
+kernel size is always an integer, this value can be non-integer,
+potentially affecting the kernel-sizes of intermediate layers (which are
+geometrically interpolated between \code{kernel_size_0} and \code{kernel_size_end}).}
+
+\item{filters_0}{(\code{numeric(1)})\cr
+Target filter number of the first CNN-layer. Although CNN filter number is
+always an integer, this value can be non-integer, potentially affecting
+the filter-numbers of intermediate layers (which are geometrically
+interpolated between \code{filters_0} and \code{filters_end}).\cr
+Note that filters are constant within convolutional blocks when
+\code{residual_block} is \code{TRUE}.}
+
+\item{filters_end}{(\code{numeric(1)})\cr
+Target filter number of the last CNN-layer; ignored if only one CNN-layer
+is used (i.e. if \code{number_of_cnn_layers} is 1). Although CNN filter number
+is always an integer, this value can be non-integer, potentially affecting
+the filter-numbers of intermediate dilation_rates layers (which are geometrically
+interpolated between \code{kernel_size_0} and \code{kernel_size_end}).\cr
+Note that filters are constant within convolutional blocks when
+\code{residual_block} is \code{TRUE}.}
+
+\item{dilation_end}{(\code{numeric(1)})\cr
+Dilation of the last CNN-layer \emph{within each block}. Dilation rates within
+each convolutional block grows exponentially from 1 (no dilation) for the
+first CNN-layer to each block, to this value. Set to 1 (default) to
+disable dilation.}
+
+\item{max_pool_end}{(\code{numeric(1)})\cr
+Target total effective pooling of CNN part of the network. "Effective
+pooling" here is the product of the pooling rates of all previous
+CNN-layers. A network with three CNN-layers, all of which are followed
+by pooling layers of size 2, therefore has effective pooling of 8, with
+the effective pooling at intermediate positions being 1 (beginning), 2,
+and 4. Effective pooling after each layer is set to the power of 2 that is,
+on a logarithmic scale, closest to
+\verb{max_pool_end ^ (<CNN layer number> / <total number of CNN layers>)}.
+Therefore, even though the total effective pooling size of the whole
+CNN part of the network will always be a power of 2, having different,
+possibly non-integer values of \code{max_pool_end}, will still lead to
+different networks.}
+
+\item{dense_layer_num}{(integer \code{numeric(1)})\cr
+number of dense layers at the end of the network, not counting the output
+layer.}
+
+\item{dense_layer_units}{(integer \code{numeric(1)})\cr
+Number of units in each dense layer, except for the output layer.}
+
+\item{dropout_lstm}{Fraction of the units to drop for inputs.}
+
+\item{dropout}{(\code{numeric(1)})\cr
+Dropout rate of dense layers, except for the output layer.}
+
+\item{batch_norm_momentum}{(\code{numeric(1)})\cr
+\code{momentum}-parameter of \code{layer_batch_normalization} layers used in the
+convolutional part of the network.}
+
+\item{leaky_relu_alpha}{(\code{numeric(1)})\cr
+\code{alpha}-parameter of the \code{layer_activation_leaky_relu} activation layers
+used in the convolutional part of the network.}
+
+\item{dense_activation}{(\code{character(1)})\cr
+Which activation function to use for dense layers. Should be one of
+\code{"relu"}, \code{"sigmoid"}, or \code{"tanh"}.}
+
+\item{skip_block_fraction}{(\code{numeric(1)})\cr
+What fraction of the first convolutional blocks to skip.
+Only used when \code{model_type} is \code{"gap"}.}
+
+\item{residual_block}{(\code{logical(1)})\cr
+Whether to use residual layers in the convolutional part of the network.}
+
+\item{reverse_encoding}{(\code{logical(1)})\cr
+Whether the network should have a second input for reverse-complement
+sequences.}
+
+\item{optimizer}{(\code{character(1)})\cr
+Which optimizer to use. One of \code{"adam"}, \code{"adagrad"}, \code{"rmsprop"}, or \code{"sgd"}.}
+
+\item{model_type}{(\code{character(1)})\cr
+Whether to use the global average pooling (\code{"gap"}) or recurrent
+(\code{"recurrent"}) model type.}
+
+\item{recurrent_type}{(\code{character(1)})\cr
+Which recurrent network type to use. One of \code{"lstm"} or \code{"gru"}.
+Only used when \code{model_type} is \code{"recurrent"}.}
+
+\item{recurrent_layers}{(integer \code{numeric(1)})\cr
+Number of recurrent layers.
+Only used when \code{model_type} is \code{"recurrent"}.}
+
+\item{recurrent_bidirectional}{(\code{logical(1)})\cr
+Whether to use bidirectional recurrent layers.
+Only used when \code{model_type} is \code{"recurrent"}.}
+
+\item{recurrent_units}{(integer \code{numeric(1)})\cr
+Number of units in each recurrent layer.
+Only used when \code{model_type} is \code{"recurrent"}.}
+
+\item{vocabulary_size}{(integer \code{numeric(1)})\cr
+Vocabulary size of (one-hot encoded) input strings. This determines the
+input tensor shape, together with \code{maxlen}.}
+
+\item{last_layer_activation}{Either \code{"sigmoid"} or \code{"softmax"}.}
+
+\item{loss_fn}{Either \code{"categorical_crossentropy"} or \code{"binary_crossentropy"}. If \code{label_noise_matrix} given, will use custom \code{"noisy_loss"}.}
+
+\item{auc_metric}{Whether to add AUC metric.}
+
+\item{num_targets}{(integer \code{numeric(1)})\cr
+Number of output units to create.}
+
+\item{model_seed}{Set seed for model parameters in tensorflow if not \code{NULL}.}
+
+\item{bal_acc}{Whether to add balanced accuracy.}
+
+\item{f1_metric}{Whether to add F1 metric.}
+
+\item{mixed_precision}{Whether to use mixed precision (https://www.tensorflow.org/guide/mixed_precision).}
+
+\item{mirrored_strategy}{Whether to use distributed mirrored strategy. If NULL, will use distributed mirrored strategy only if >1 GPU available.}
+}
+\value{
+A keras model.
+
+A keras model implementing genomenet architecture.
+}
+\description{
+Create GenomeNet Model with Given Architecture Parameters
+}
+\examples{
+\dontshow{if (reticulate::py_module_available("tensorflow")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+model <- create_model_genomenet()
+model
+\dontshow{\}) # examplesIf}
+}