deepG / Git / [1c0e03] /R/evaluation.R

Models:
MarcoTheBlack/
deepG
Downloads: 2
[1c0e03]: / R / evaluation.R
History
Download this file
785 lines (696 with data), 29.8 kB

#' Evaluates a trained model on fasta, fastq or rds files
#'
#' Returns evaluation metric like confusion matrix, loss, AUC, AUPRC, MAE, MSE (depending on output layer).
#'
#' @inheritParams generator_fasta_lm
#' @inheritParams generator_fasta_label_folder
#' @inheritParams generator_fasta_label_header_csv
#' @param path_input Input directory where fasta, fastq or rds files are located.
#' @param model A keras model.
#' @param batch_size Number of samples per batch.
#' @param step How often to take a sample.
#' @param vocabulary Vector of allowed characters. Character outside vocabulary get encoded as specified in ambiguous_nuc.
#' @param vocabulary_label List of labels for targets of each output layer.
#' @param number_batches How many batches to evaluate.
#' @param format File format, `"fasta"`, `"fastq"` or `"rds"`.
#' @param mode Either `"lm"` for language model or `"label_header"`, `"label_csv"` or `"label_folder"` for label classification.
#' @param verbose Boolean.
#' @param target_middle Whether model is language model with separate input layers. 
#' @param evaluate_all_files Boolean, if `TRUE` will iterate over all files in \code{path_input} once. \code{number_batches} will be overwritten.
#' @param auc Whether to include AUC metric. If output layer activation is `"softmax"`, only possible for 2 targets. Computes the average if output layer has sigmoid
#' activation and multiple targets.
#' @param auprc Whether to include AUPRC metric. If output layer activation is `"softmax"`, only possible for 2 targets. Computes the average if output layer has sigmoid
#' activation and multiple targets.
#' @param path_pred_list Path to store list of predictions (output of output layers) and corresponding true labels as rds file. 
#' @param exact_num_samples Exact number of samples to evaluate. If you want to evaluate a number of samples not divisible by batch_size. Useful if you want
#' to evaluate a data set exactly ones and know the number of samples already. Should be a vector if `mode = "label_folder"` (with same length as `vocabulary_label`)
#' and else an integer.
#' @param activations List containing output formats for output layers (`softmax, sigmoid` or `linear`). If `NULL`, will be estimated from model.   
#' @param include_seq Whether to store input. Only applies if `path_pred_list` is not `NULL`.
#' @param ... Further generator options. See \code{\link{get_generator}}.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' # create dummy data
#' path_input <- tempfile()
#' dir.create(path_input)
#' create_dummy_data(file_path = path_input,
#'                   num_files = 3,
#'                   seq_length = 11, 
#'                   num_seq = 5,
#'                   vocabulary = c("a", "c", "g", "t"))
#' # create model
#' model <- create_model_lstm_cnn(layer_lstm = 8, layer_dense = 4, maxlen = 10, verbose = FALSE)
#' # evaluate
#' evaluate_model(path_input = path_input,
#'   model = model,
#'   step = 11,
#'   vocabulary = c("a", "c", "g", "t"),
#'   vocabulary_label = list(c("a", "c", "g", "t")),
#'   mode = "lm",
#'   output_format = "target_right",
#'   evaluate_all_files = TRUE,
#'   verbose = FALSE)
#'   
#' @returns A list of evaluation results. Each list element corresponds to an output layer of the model.   
#' @export
evaluate_model <- function(path_input,
                           model = NULL,
                           batch_size = 100,
                           step = 1,
                           padding = FALSE,
                           vocabulary = c("a", "c", "g", "t"),
                           vocabulary_label = list(c("a", "c", "g", "t")),
                           number_batches = 10,
                           format = "fasta",
                           target_middle = FALSE,
                           mode = "lm",
                           output_format = "target_right",
                           ambiguous_nuc = "zero",
                           evaluate_all_files = FALSE,
                           verbose = TRUE,
                           max_iter = 20000,
                           target_from_csv = NULL,
                           max_samples = NULL,
                           proportion_per_seq = NULL,
                           concat_seq = NULL,
                           seed = 1234,
                           auc = FALSE,
                           auprc = FALSE,
                           path_pred_list = NULL,
                           exact_num_samples = NULL,
                           activations = NULL,
                           shuffle_file_order = FALSE,
                           include_seq = FALSE,
                           ...) {
  
  set.seed(seed)
  path_model <- NULL
  stopifnot(mode %in% c("lm", "label_header", "label_folder", "label_csv", "lm_rds", "label_rds"))
  stopifnot(format %in% c("fasta", "fastq", "rds"))
  stopifnot(is.null(proportion_per_seq) || proportion_per_seq <= 1)
  if (!is.null(exact_num_samples) & evaluate_all_files) {
    warning(paste("Will evaluate number of samples as specified in exact_num_samples argument. Setting evaluate_all_files to FALSE."))
    evaluate_all_files <- FALSE
  }
  eval_exact_num_samples <- !is.null(exact_num_samples) | evaluate_all_files
  if (is.null(activations)) activations <- get_output_activations(model)
  if (is.null(path_pred_list) & include_seq) {
    stop("Can only store input, if path_pred_list is specified.")
  }
  if (is.null(vocabulary_label)) vocabulary_label <- list(vocabulary)
  if (!is.list(vocabulary_label)) vocabulary_label <- list(vocabulary_label)
  if (mode == "label_folder") {
    number_batches <- rep(ceiling(number_batches/length(path_input)), length(path_input))
  }
  num_classes <- ifelse(mode == "label_folder", length(path_input), 1)
  num_out_layers <- length(activations)
  
  # extract maxlen from model
  num_in_layers <- length(model$inputs)
  if (num_in_layers == 1) {
    maxlen <- model$input$shape[[2]]
  } else {
    if (!target_middle) {
      maxlen <- model$input[[num_in_layers]]$shape[[2]]
    } else {
      maxlen <- model$input[[num_in_layers - 1]]$shape[[2]] + model$input[[num_in_layers]]$shape[[2]]
    }
  }
  
  if (evaluate_all_files & (format %in% c("fasta", "fastq"))) {
    
    number_batches <- NULL
    num_samples <- rep(0, length(path_input))
    
    for (i in 1:num_classes) {
      if (mode == "label_folder") {
        files <- list_fasta_files(path_input[[i]], format = format, file_filter = NULL)
      } else {
        files <- list_fasta_files(path_input, format = format, file_filter = NULL)
      }
      
      # remove files not in csv table
      if (mode == "label_csv") {
        csv_file <- utils::read.csv2(target_from_csv, header = TRUE, stringsAsFactors = FALSE)
        if (dim(csv_file)[2] == 1) {
          csv_file <- utils::read.csv(target_from_csv, header = TRUE, stringsAsFactors = FALSE)
        }
        index <- basename(files) %in% csv_file$file
        files <- files[index]
        if (length(files) == 0) {
          stop("No files from path_input have label in target_from_csv file.")
        }
      }
      
      for (file in files) {
        if (format == "fasta") {
          fasta_file <- microseq::readFasta(file)
        } else {
          fasta_file <- microseq::readFastq(file)
        }
        
        # remove entries with wrong header
        if (mode == "label_header") {
          index <- fasta_file$Header %in% vocabulary_label
          fasta_file <- fasta_file[index, ]
        }
        
        seq_vector <- fasta_file$Sequence
        
        if (!is.null(concat_seq)) {
          seq_vector <- paste(seq_vector, collapse = concat_seq)
        }
        
        if (!is.null(proportion_per_seq)) {
          fasta_width <- nchar(seq_vector)
          sample_range <- floor(fasta_width - (proportion_per_seq * fasta_width))
          start <- mapply(sample_range, FUN = sample, size = 1)
          perc_length <- floor(fasta_width * proportion_per_seq)
          stop <- start + perc_length
          seq_vector <- mapply(seq_vector, FUN = substr, start = start, stop = stop)
        }
        
        if (mode == "lm") {
          if (!padding) {
            seq_vector <- seq_vector[nchar(seq_vector) >= (maxlen + 1)]
          } else {
            length_vector <- nchar(seq_vector)
            short_seq_index <- which(length_vector < (maxlen + 1))
            for (ssi in short_seq_index) {
              seq_vector[ssi] <- paste0(paste(rep("0", (maxlen + 1) - length_vector[ssi]), collapse = ""), seq_vector[ssi])
            }
          }
        } else {
          if (!padding) {
            seq_vector <- seq_vector[nchar(seq_vector) >= (maxlen)]
          } else {
            length_vector <- nchar(seq_vector)
            short_seq_index <- which(length_vector < (maxlen))
            for (ssi in short_seq_index) {
              seq_vector[ssi] <- paste0(paste(rep("0", (maxlen) - length_vector[ssi]), collapse = ""), seq_vector[ssi])
            }
          }
        }
        
        if (length(seq_vector) == 0) next
        new_samples <- get_start_ind(seq_vector = seq_vector,
                                     length_vector = nchar(seq_vector),
                                     maxlen = maxlen,
                                     step = step,
                                     train_mode = ifelse(mode == "lm", "lm", "label"),
                                     discard_amb_nuc = ifelse(ambiguous_nuc == "discard", TRUE, FALSE),
                                     vocabulary = vocabulary
        ) %>% length()
        
        if (is.null(max_samples)) {
          num_samples[i] <- num_samples[i] + new_samples
        } else {
          num_samples[i] <- num_samples[i] + min(new_samples, max_samples)
        }
      }
      number_batches[i] <- ceiling(num_samples[i]/batch_size)
      
    }
    if (mode == "label_folder") {
      message_string <- paste0("Evaluate ", num_samples, " samples for class ", vocabulary_label[[1]], ".\n")
    } else {
      message_string <- paste0("Evaluate ", sum(num_samples), " samples.")
    }
    message(message_string)
  }
  
  if (evaluate_all_files & format == "rds") {
    rds_files <- list_fasta_files(path_corpus = path_input,
                                  format = "rds",
                                  file_filter = NULL)
    num_samples <- 0
    for (file in rds_files) {
      rds_file <- readRDS(file)
      x <- rds_file[[1]]
      while (is.list(x)) {
        x <- x[[1]]
      }
      num_samples <- dim(x)[1] + num_samples
    }
    number_batches <- ceiling(num_samples/batch_size)
    message_string <- paste0("Evaluate ", num_samples, " samples.")
    message(message_string)
  }
  
  if (!is.null(exact_num_samples)) {
    num_samples <- exact_num_samples
    number_batches <- ceiling(num_samples/batch_size)
  }
  
  overall_num_batches <- sum(number_batches)
  
  if (mode == "lm") {
    gen <- generator_fasta_lm(path_corpus = path_input,
                              format = format,
                              batch_size = batch_size,
                              maxlen = maxlen,
                              max_iter = max_iter,
                              vocabulary = vocabulary,
                              verbose = FALSE,
                              shuffle_file_order = shuffle_file_order,
                              step = step,
                              concat_seq = concat_seq,
                              padding = padding,
                              shuffle_input = FALSE,
                              reverse_complement = FALSE,
                              output_format = output_format,
                              ambiguous_nuc = ambiguous_nuc,
                              proportion_per_seq = proportion_per_seq,
                              max_samples = max_samples,
                              seed = seed,
                              ...)
  }
  
  if (mode == "label_header" | mode == "label_csv") {
    gen <- generator_fasta_label_header_csv(path_corpus = path_input,
                                            format = format,
                                            batch_size = batch_size,
                                            maxlen = maxlen,
                                            max_iter = max_iter,
                                            vocabulary = vocabulary,
                                            verbose = FALSE,
                                            shuffle_file_order = shuffle_file_order,
                                            step = step,
                                            padding = padding,
                                            shuffle_input = FALSE,
                                            concat_seq = concat_seq,
                                            vocabulary_label = vocabulary_label[[1]],
                                            reverse_complement = FALSE,
                                            ambiguous_nuc = ambiguous_nuc,
                                            target_from_csv = target_from_csv,
                                            proportion_per_seq = proportion_per_seq,
                                            max_samples = max_samples,
                                            seed = seed, ...)
  }
  
  if (mode == "label_rds" | mode == "lm_rds") {
    gen <- generator_rds(rds_folder = path_input, batch_size = batch_size, path_file_log = NULL, ...)
  }
  
  batch_index <- 1
  start_time <- Sys.time()
  ten_percent_steps <- seq(overall_num_batches/10, overall_num_batches, length.out = 10)
  percentage_index <- 1
  count <- 1
  y_conf_list <- vector("list", overall_num_batches)
  y_list <- vector("list", overall_num_batches)
  if (include_seq) {
    x_list <- vector("list", overall_num_batches)
  }
  
  for (k in 1:num_classes) {
    
    index <- NULL
    if (mode == "label_folder") {
      gen <- generator_fasta_label_folder(path_corpus = path_input[[k]],
                                          format = format,
                                          batch_size = batch_size,
                                          maxlen = maxlen,
                                          max_iter = max_iter,
                                          vocabulary = vocabulary,
                                          step = step,
                                          padding = padding,
                                          concat_seq = concat_seq,
                                          reverse_complement = FALSE,
                                          num_targets = length(path_input),
                                          ones_column = k,
                                          ambiguous_nuc = ambiguous_nuc,
                                          proportion_per_seq = proportion_per_seq,
                                          max_samples = max_samples,
                                          seed = seed, ...)
    }
    
    for (i in 1:number_batches[k]) {
      z <- gen()
      x <- z[[1]]
      y <- z[[2]]
      
      y_conf <- model(x)
      batch_index <- batch_index + 1
      
      # remove double predictions
      if (eval_exact_num_samples & (i == number_batches[k])) {
        double_index <- (i * batch_size) - num_samples[k]
        
        if (double_index > 0) {
          index <- 1:(nrow(y_conf) - double_index)
          
          if (is.list(y_conf)) {
            for (m in 1:length(y_conf)) {
              y_conf[[m]] <- y_conf[[m]][index, ]
              y[[m]] <- y[[m]][index, ]
            }
          } else {
            y_conf <- y_conf[index, ]
            y <- y[index, ]
          }
          
          # vector to matrix
          if (length(index) == 1) {
            if (is.list(y_conf)) {
              for (m in 1:length(y_conf)) {
                y_conf[[m]] <- array(as.array(y_conf[[m]]), dim = c(1, length(y_conf[[m]])))
                y[[m]] <- matrix(y[[m]], ncol = length(y[[m]]))
              }
            } else {
              y_conf <- array(as.array(y_conf), dim = c(1, length(y_conf)))
              y <- matrix(y, ncol = length(y))
            }
          }
          
        }
      }
      
      if (include_seq) {
        x_list[[count]] <- x
      }
      y_conf_list[[count]] <- y_conf
      if (batch_size == 1 | (!is.null(index) && length(index == 1))) {
        col_num <- ncol(y_conf)
        if (is.na(col_num)) col_num <- length(y_conf)
        y_list[[count]] <- matrix(y, ncol = col_num)
      } else {
        y_list[[count]] <- y
      }
      count <- count + 1
      
      if (verbose & (batch_index == 10)) {
        time_passed <- as.double(difftime(Sys.time(), start_time, units = "hours"))
        time_estimation <- (overall_num_batches/10) * time_passed
        cat("Evaluation will take approximately", round(time_estimation, 3), "hours. Starting time:", format(Sys.time(), "%F %R."), " \n")
        
      }
      
      if (verbose & (batch_index > ten_percent_steps[percentage_index]) & percentage_index < 10) {
        cat("Progress: ", percentage_index * 10 ,"% \n")
        time_passed <- as.double(difftime(Sys.time(), start_time, units = "hours"))
        cat("Time passed: ", round(time_passed, 3), "hours \n")
        percentage_index <- percentage_index + 1
      }
      
    }
  }
  
  if (verbose) {
    cat("Progress: 100 % \n")
    time_passed <- as.double(difftime(Sys.time(), start_time, units = "hours"))
    cat("Time passed: ", round(time_passed, 3), "hours \n")
  }
  
  y_conf_list <- reshape_y_list(y_conf_list, num_out_layers = num_out_layers, tf_format = TRUE)
  y_list <- reshape_y_list(y_list, num_out_layers = num_out_layers, tf_format = FALSE)
  
  if (!is.null(path_pred_list)) {
    if (include_seq) {
      if (is.list(x_list[[1]])) {
        num_layers <- length(x_list[[1]])
      } else {
        num_layers <- 1 
      }
      x_list <- reshape_y_list(x_list, num_out_layers = num_layers, tf_format = FALSE)
      saveRDS(list(pred = y_conf_list, true = y_list, x = x_list), path_pred_list)
    } else {
      saveRDS(list(pred = y_conf_list, true = y_list), path_pred_list)
    }
  } 
  
  eval_list <- list()
  for (i in 1:num_out_layers) {
    
    if (activations[i] == "softmax") {
      eval_list[[i]] <- evaluate_softmax(y = y_list[[i]], y_conf = y_conf_list[[i]],
                                         auc = auc, auprc = auprc,
                                         label_names = vocabulary_label[[i]])
    }
    
    if (activations[i] == "sigmoid") {
      eval_list[[i]] <- evaluate_sigmoid(y = y_list[[i]], y_conf = y_conf_list[[i]],
                                         auc = auc, auprc = auprc,
                                         label_names = vocabulary_label[[i]])
    }
    
    if (activations[i] == "linear") {
      eval_list[[i]] <- evaluate_linear(y_true = y_list[[i]], y_pred = y_conf_list[[i]], label_names = vocabulary_label[[i]])
    }
    
  }
  
  return(eval_list)
}


reshape_y_list <- function(y, num_out_layers, tf_format = TRUE) {
  
  if (num_out_layers > 1) {
    y <- do.call(c, y)
  }
  
  reshaped_list <- vector("list", num_out_layers)
  
  for (i in 1:num_out_layers) {
    index <- seq(i, length(y), by = num_out_layers)
    if (tf_format) {
      reshaped_list[[i]] <- y[index] %>%
        tensorflow::tf$concat(axis = 0L) %>%
        keras::k_eval()
    } else {
      reshaped_list[[i]] <- do.call(rbind, y[index])
    }
  }
  return(reshaped_list)
}

#' Evaluate matrices of true targets and predictions from layer with softmax activation. 
#' 
#' Compute confusion matrix, accuracy, categorical crossentropy and (optionally) AUC or AUPRC, given predictions and
#' true targets. AUC and AUPRC only possible for 2 targets. 
#' 
#' @param y Matrix of true target.
#' @param y_conf Matrix of predictions.
#' @param auc Whether to include AUC metric. Only possible for 2 targets. 
#' @param auprc Whether to include AUPRC metric. Only possible for 2 targets. 
#' @param label_names Names of corresponding labels. Length must be equal to number of columns of \code{y}.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' y <- matrix(c(1, 0, 0, 0, 1, 1), ncol = 2)
#' y_conf <- matrix(c(0.3, 0.5, 0.1, 0.7, 0.5, 0.9), ncol = 2)
#' evaluate_softmax(y, y_conf, auc = TRUE, auprc = TRUE, label_names = c("A", "B")) 
#' 
#' @returns A list of evaluation results. 
#' @export    
evaluate_softmax <- function(y, y_conf, auc = FALSE, auprc = FALSE, label_names = NULL) {
  
  if (ncol(y) != 2 & (auc | auprc)) {
    message("Can only compute AUC or AUPRC if output layer with softmax acticvation has two neurons.")
    auc <- FALSE
    auprc <- FALSE
  }
  
  y_pred <- apply(y_conf, 1, which.max)
  y_true <- apply(y, 1, FUN = which.max) - 1
  
  df_true_pred <- data.frame(
    true = factor(y_true + 1, levels = 1:(length(label_names)), labels = label_names),
    pred = factor(y_pred, levels = 1:(length(label_names)), labels = label_names)
  )
  
  loss_per_class <- list()
  for (i in 1:ncol(y)) {
    index <- (y_true + 1) == i
    if (any(index)) {
      cce_loss_class <- tensorflow::tf$keras$losses$categorical_crossentropy(y[index, ], y_conf[index, ])
      loss_per_class[[i]] <- cce_loss_class$numpy()
    } else {
      loss_per_class[[i]] <- NA
    }
  }
  
  cm <- yardstick::conf_mat(df_true_pred, true, pred)
  confMat <- cm[[1]]
  
  acc <- sum(diag(confMat))/sum(confMat)
  loss <- mean(unlist(loss_per_class))
  
  for (i in 1:length(loss_per_class)) {
    loss_per_class[[i]] <- mean(unlist(loss_per_class[[i]]), na.rm = TRUE)
  }
  
  loss_per_class <- unlist(loss_per_class)
  m <- as.matrix(confMat)
  class_acc <- vector("numeric")
  for (i in 1:ncol(m)) {
    if (sum(m[ , i]) == 0) {
      class_acc[i] <- NA
    } else {
      class_acc[i] <- m[i, i]/sum(m[ , i])
    }
  }
  names(class_acc) <- label_names
  names(loss_per_class) <- label_names
  balanced_acc <- mean(class_acc)
  
  if (auc) {
    auc_list <- PRROC::roc.curve(
      scores.class0 = y_conf[ , 2],
      weights.class0 = y_true)
  } else {
    auc_list <- NULL
  }
  
  if (auprc) {
    auprc_list <- PRROC::pr.curve(
      scores.class0 = y_conf[ , 2],
      weights.class0 = y_true)
  } else {
    auprc_list <- NULL
  }
  
  return(list(confusion_matrix = confMat,
              accuracy = acc,
              categorical_crossentropy_loss = loss,
              #balanced_accuracy = balanced_acc,
              #loss_per_class = loss_per_class,
              #accuracy_per_class = class_acc,
              AUC = auc_list$auc,
              AUPRC = auprc_list$auc.integral))
}

#' Evaluate matrices of true targets and predictions from layer with sigmoid activation. 
#' 
#' Compute accuracy, binary crossentropy and (optionally) AUC or AUPRC, given predictions and
#' true targets. Outputs columnwise average.  
#' 
#' @inheritParams evaluate_model
#' @inheritParams evaluate_softmax
#' @param auc Whether to include AUC metric.
#' @param auprc Whether to include AUPRC metric. 
#' @examplesIf reticulate::py_module_available("tensorflow")
#' y <- matrix(sample(c(0, 1), 30, replace = TRUE), ncol = 3)
#' y_conf <- matrix(runif(n = 30), ncol = 3)
#' evaluate_sigmoid(y, y_conf, auc = TRUE, auprc = TRUE)
#' 
#' @returns A list of evaluation results. 
#' @export    
evaluate_sigmoid <- function(y, y_conf, auc = FALSE, auprc = FALSE, label_names = NULL) {
  
  y_pred <- ifelse(y_conf > 0.5, 1, 0)
  
  loss_per_class <- list()
  for (i in 1:ncol(y)) {
    bce_loss_class <- tensorflow::tf$keras$losses$binary_crossentropy(y[ , i], y_conf[ , i])
    loss_per_class[[i]] <- bce_loss_class$numpy()
  }
  
  loss_per_class <- unlist(loss_per_class)
  names(loss_per_class) <- label_names
  loss <- mean(unlist(loss_per_class))
  
  class_acc <- vector("numeric", ncol(y))
  for (i in 1:ncol(y)) {
    num_true_pred <-  sum(y[ , i] == y_pred[ , i])
    class_acc[i] <- num_true_pred /nrow(y)
  }
  names(class_acc) <- label_names
  acc <- mean(class_acc)
  
  if (auc) {
    auc_list <- purrr::map(1:ncol(y_conf), ~PRROC::roc.curve(
      scores.class0 = y_conf[ , .x],
      weights.class0 = y[ , .x]))
    auc_vector <- vector("numeric", ncol(y))
    for (i in 1:length(auc_vector)) {
      auc_vector[i] <- auc_list[[i]]$auc
    }
    
    na_count <- sum(is.na(auc_vector))
    if (na_count > 0) {
      message(paste(sum(na_count), ifelse(na_count > 1, "columns", "column"),
                    "removed from AUC evaluation since they contain only one label"))
    }
    AUC <- mean(auc_vector, na.rm = TRUE)
  } else {
    AUC <- NULL
  }
  
  if (auprc) {
    auprc_list <- purrr::map(1:ncol(y_conf), ~PRROC::pr.curve(
      scores.class0 = y_conf[ , .x],
      weights.class0 = y[ , .x]))
    auprc_vector <- vector("numeric", ncol(y))
    for (i in 1:length(auprc_vector)) {
      auprc_vector[i] <- auprc_list[[i]]$auc.integral
    }
    AUPRC <- mean(auprc_vector, na.rm = TRUE) 
  } else {
    AUPRC <- NULL
  }
  
  return(list(accuracy = acc,
              binary_crossentropy_loss = loss,
              #loss_per_class = loss_per_class,
              #accuracy_per_class = class_acc,
              AUC = AUC,
              AUPRC = AUPRC))
  
}

#' Evaluate matrices of true targets and predictions from layer with linear activation. 
#' 
#' Compute MAE and MSE, given predictions and
#' true targets. Outputs columnwise average.  
#' 
#' @inheritParams evaluate_model
#' @inheritParams evaluate_softmax
#' @param y_true Matrix of true labels.
#' @param y_pred Matrix of predictions.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' y_true <- matrix(rnorm(n = 12), ncol = 3)
#' y_pred <- matrix(rnorm(n = 12), ncol = 3)
#' evaluate_linear(y_true, y_pred)
#' 
#' @returns A list of evaluation results. 
#' @export    
evaluate_linear <- function(y_true, y_pred, label_names = NULL) {
  
  loss_per_class_mse <- list()
  loss_per_class_mae <- list()
  for (i in 1:ncol(y_true)) {
    mse_loss_class <- tensorflow::tf$keras$losses$mean_squared_error(y_true[ ,i], y_pred[ , i])
    mae_loss_class <- tensorflow::tf$keras$losses$mean_absolute_error(y_true[ ,i], y_pred[ , i])
    loss_per_class_mse[[i]] <- mse_loss_class$numpy()
    loss_per_class_mae[[i]] <- mae_loss_class$numpy()
  }
  
  return(list(mse = mean(unlist(loss_per_class_mse)),
              mae = mean(unlist(loss_per_class_mae))))
  
}


#' Plot ROC
#' 
#' Compute ROC and AUC from target and prediction matrix and plot ROC. Target/prediction matrix should 
#' have one column if output of layer with sigmoid activation and two columns for softmax activation. 
#' 
#' @inheritParams evaluate_softmax
#' @inheritParams evaluate_linear
#' @param path_roc_plot Where to store ROC plot.
#' @param return_plot Whether to return plot.
#' @examples
#' y_true <- matrix(c(1, 0, 0, 0, 1, 1), ncol = 1)
#' y_conf <- matrix(runif(n = nrow(y_true)), ncol = 1)
#' p <- plot_roc(y_true, y_conf, return_plot = TRUE)
#' p
#' 
#' @returns A ggplot of ROC curve.
#' @export    
plot_roc <- function(y_true, y_conf, path_roc_plot = NULL,
                     return_plot = TRUE) {
  
  if (!all(y_true == 0 | y_true == 1)) {
    stop("y_true should only contain 0 and 1 entries")
  }
  
  if (is.matrix(y_true) && ncol(y_true) > 2) {
    stop("y_true can contain 1 or 2 columns")
  }
  
  if (is.matrix(y_true) && ncol(y_true) == 2) {
    y_true <- y_true[ , 1] 
    y_conf <- y_conf[ , 2]
  }
  
  if (stats::var(y_true) == 0) {
    stop("y_true contains just one label")
  }
  
  y_true <- as.vector(y_true)
  y_conf <- as.vector(y_conf)
  
  rocobj <-  pROC::roc(y_true, y_conf, quiet = TRUE)
  auc <- round(pROC::auc(y_true, y_conf, quiet = TRUE), 4)
  p <- pROC::ggroc(rocobj,  size = 1, color = "black")
  p <- p + ggplot2::theme_classic() + ggplot2::theme(aspect.ratio = 1) 
  p <- p + ggplot2::ggtitle(paste0('ROC Curve ', '(AUC = ', auc, ')'))
  p <- p + ggplot2::geom_abline(intercept = 1, linetype = 2, color = "grey50")
  p <- p + ggplot2::geom_vline(xintercept = 1, linetype = 2, color = "grey50")
  p <- p + ggplot2::geom_hline(yintercept = 1,  linetype = 2, color = "grey50")
  
  if (!is.null(path_roc_plot)) {
    ggplot2::ggsave(path_roc_plot, p)
  }
  
  if (return_plot) {
    return(p)
  } else {
    return(NULL)
  }
  
}

# plot_roc_auprc <- function(y_true, y_conf, path_roc_plot = NULL, path_auprc_plot = NULL,
#                            return_plot = TRUE, layer_activation = "softmax") {
#   
#   if (layer_activation == "softmax") {
#     
#     if (!all(y_true == 0 | y_true == 1)) {
#       stop("y_true should only contain 0 and 1 entries")
#     }
#     
#     if (ncol(y_true) != 2 & (auc | auprc)) {
#       message("Can only compute AUC or AUPRC if output layer with softmax acticvation has two neurons.")
#     }
#     
#     auc_list <- PRROC::roc.curve(
#       scores.class0 = y_conf[ , 2],
#       weights.class0 = y_true[ , 2], curve = TRUE)
#     
#     
#     auprc_list <- PRROC::pr.curve(
#       scores.class0 = y_conf[ , 2],
#       weights.class0 = y_true[ , 2], curve = TRUE)
#     
#     #auc_plot <- NULL
#     #auprc_plot <- NULL  
#     
#   }
#   
#   if (layer_activation == "sigmoid") {
#     
#     auc_list <- purrr::map(1:ncol(y_conf), ~PRROC::roc.curve(
#       scores.class0 = y_conf[ , .x],
#       weights.class0 = y[ , .x], curve = TRUE))
#     auc_vector <- vector("numeric", ncol(y))
#     
#     
#     auprc_list <- purrr::map(1:ncol(y_conf), ~PRROC::pr.curve(
#       scores.class0 = y_conf[ , .x],
#       weights.class0 = y[ , .x], curve = TRUE))
#     auprc_vector <- vector("numeric", ncol(y))
#     
#   }
#   
#   if (!is.null(path_roc_plot)) {
#     
#   }
#   
#   if (!is.null(path_auprc_plot)) {
#     
#   }
#   
# }