IntegratedLearner / Git / Diff of /R/IL

Models:
AlyssaS/
IntegratedLearner
Downloads: 1
Diff of /R/IL_Utilities.R [000000] .. [a4ee51]
Switch to side-by-side view

--- a
+++ b/R/IL_Utilities.R
@@ -0,0 +1,1485 @@
+ ###############################
+#### Print learner summary ####
+###############################
+
+print.learner <- function(res,...){
+  num_layers <- length(res$X_train_layers)
+  
+  cat("Time for model fit :",res$time,"minutes \n")
+  if(res$family=="binomial"){
+    
+    cat("========================================\n")
+    cat("Model fit for individual layers:",res$base_learner,"\n")
+    if(res$run_stacked==TRUE){cat("Model fit for stacked layer:",res$meta_learner,"\n")}
+    if(res$run_concat==TRUE){cat("Model fit for concatenated layer:",res$base_learner,"\n")}
+    
+    cat("========================================\n")
+    cat("AUC metric for training data: \n")
+    cat("Individual layers: \n")
+    print(res$AUC.train[1:num_layers])
+    cat("======================\n")
+    
+    if(res$run_stacked==TRUE){
+      cat("Stacked model:")
+      cat(as.numeric(res$AUC.train["stacked"]),"\n")
+      cat("======================\n")
+      
+    }
+    if(res$run_concat==TRUE){
+      cat("Concatenated model:")
+      cat(as.numeric(res$AUC.train["concatenated"]),"\n")
+      cat("======================\n")
+      
+    }
+    cat("========================================\n")
+    if(res$test==TRUE){
+      
+      #cat("======================\n")
+      cat("AUC metric for test data: \n")
+      cat("Individual layers: \n")
+      print(res$AUC.test[1:num_layers])
+      cat("======================\n")
+      
+      if(res$run_stacked==TRUE){
+        cat("Stacked model:")
+        cat(as.numeric(res$AUC.test["stacked"]),"\n")
+        cat("======================\n")
+        
+      }
+      if(res$run_concat==TRUE){
+        cat("Concatenated model:")
+        cat(as.numeric(res$AUC.test["concatenated"]),"\n")
+        cat("======================\n")
+        
+      }
+      cat("========================================\n")
+      
+    }
+    
+  } else if(res$family=="gaussian"){
+    
+    
+    cat("========================================\n")
+    cat("Model fit for individual layers:",res$base_learner,"\n")
+    if(res$run_stacked==TRUE){cat("Model fit for stacked layer:",res$meta_learner,"\n")}
+    if(res$run_concat==TRUE){cat("Model fit for concatenated layer:",res$base_learner,"\n")}
+    
+    cat("========================================\n")
+    cat("R^2 for training data: \n")
+    cat("Individual layers: \n")
+    print(res$R2.train[1:num_layers])
+    cat("======================\n")
+    if(res$run_stacked==TRUE){
+      cat("Stacked model:")
+      cat(as.numeric(res$R2.train["stacked"]),"\n")
+      cat("======================\n")
+      
+    }
+    if(res$run_concat==TRUE){
+      cat("Concatenated model:")
+      cat(as.numeric(res$R2.train["concatenated"]),"\n")
+      cat("======================\n")
+    }
+    cat("========================================\n")
+    if(res$test==TRUE){
+      #cat("======================\n")
+      cat("R^2 for test data: \n")
+      cat("Individual layers: \n")
+      print(res$R2.test[1:num_layers])
+      cat("======================\n")
+      if(res$run_stacked==TRUE){
+        cat("Stacked model:")
+        cat(as.numeric(res$R2.test["stacked"]),"\n")
+        cat("======================\n")
+        
+      }
+      if(res$run_concat==TRUE){
+        cat("Concatenated model:")
+        cat(as.numeric(res$R2.test["concatenated"]),"\n")
+        cat("======================\n")
+      }
+      cat("========================================\n")
+      
+    }
+    
+  }
+  
+  if(res$meta_learner=="SL.nnls.auc" & res$run_stacked){
+    
+    cat("Weights for individual layers predictions in IntegratedLearner: \n")
+    print(round(res$weights,digits=3))
+    cat("========================================\n")
+    
+  }
+  
+}
+
+expit <- function(x){
+  return(1/(1+exp(-x)))
+}
+
+#####################################################################
+# Rename after adding serialize = TRUE in bartMachine2 (from ck37r) #
+#####################################################################
+
+# Temporary wrapper that needs to be fixed in SuperLearner
+#' Wrapper for bartMachine learner
+#'
+#' Support bayesian additive regression trees via the bartMachine package.
+#'
+#' @param Y Outcome variable
+#' @param X Covariate dataframe
+#' @param newX Optional dataframe to predict the outcome
+#' @param obsWeights Optional observation-level weights (supported but not tested)
+#' @param id Optional id to group observations from the same unit (not used
+#'   currently).
+#' @param family "gaussian" for regression, "binomial" for binary
+#'   classification
+#' @param num_trees The number of trees to be grown in the sum-of-trees model.
+#' @param num_burn_in Number of MCMC samples to be discarded as "burn-in".
+#' @param num_iterations_after_burn_in Number of MCMC samples to draw from the
+#'   posterior distribution of f(x).
+#' @param alpha Base hyperparameter in tree prior for whether a node is
+#'   nonterminal or not.
+#' @param beta Power hyperparameter in tree prior for whether a node is
+#'   nonterminal or not.
+#' @param k For regression, k determines the prior probability that E(Y|X) is
+#'   contained in the interval (y_{min}, y_{max}), based on a normal
+#'   distribution. For example, when k=2, the prior probability is 95\%. For
+#'   classification, k determines the prior probability that E(Y|X) is between
+#'   (-3,3). Note that a larger value of k results in more shrinkage and a more
+#'   conservative fit.
+#' @param q Quantile of the prior on the error variance at which the data-based
+#'   estimate is placed. Note that the larger the value of q, the more
+#'   aggressive the fit as you are placing more prior weight on values lower
+#'   than the data-based estimate. Not used for classification.
+#' @param nu Degrees of freedom for the inverse chi^2 prior. Not used for
+#'   classification.
+#' @param verbose Prints information about progress of the algorithm to the
+#'   screen.
+#' @param serialize If TRUE, bartMachine results can be saved to a file, but
+#'   will require additional RAM.
+#' @param ... Additional arguments (not used)
+#'
+#' @encoding utf-8
+#' @export
+SL.BART <- function(Y, X, newX, family, obsWeights, id,
+                    num_trees = 50, num_burn_in = 250, verbose = F,
+                    alpha = 0.95, beta = 2, k = 2, q = 0.9, nu = 3,
+                    num_iterations_after_burn_in = 1000,
+                    serialize = TRUE, seed=5678,
+                    ...) {
+  #.SL.require("bartMachine")
+  
+  ################
+  ### CK changes:
+  if (family$family == "binomial") {
+    # Need to convert Y to a factor, otherwise bartMachine does regression.
+    # And importantly, bartMachine expects the first level to be the positive
+    # class, so we have to specify levels.
+    Y = factor(Y, levels = c("1", "0"))
+  }
+  model = bartMachine::bartMachine(X, Y, num_trees = num_trees,
+                                   num_burn_in = num_burn_in, verbose = verbose,
+                                   alpha = alpha, beta = beta, k = k, q = q, nu = nu,
+                                   num_iterations_after_burn_in = num_iterations_after_burn_in,
+                                   serialize = serialize,seed=seed)
+  # pred returns predicted responses (on the scale of the outcome)
+  #pred <- bartMachine:::predict.bartMachine(model, newX)
+  pred <- predict(model, newX)
+  
+  fit <- list(object = model)
+  class(fit) <- c("SL.BART")
+  
+  out <- list(pred = pred, fit = fit)
+  return(out)
+}
+
+#' Predict function for SL.BART
+#'
+#' @param object object
+#' @param newdata newdata
+#'
+#' @return Prediction from the SL.BART
+#' @export
+predict.SL.BART <- function(object, newdata, family, X = NULL, Y = NULL,...) {
+  #.SL.require("bartMachine")
+  pred <- predict(object$object, newdata)
+  return(pred)
+}
+
+
+
+##########################################
+# Mixed BART implementation using mxBART #
+##########################################
+SL.mxBART <- function(Y, X, newX, family, obsWeights, id,
+                      sparse=FALSE,ntree=50,
+                      ndpost=1000,nskip=100,keepevery=10,
+                      mxps=list(list(prior=1,df=3,scale=1)),
+                      ...) {
+  #.SL.require("mxBART")
+  
+  if(family$famil=="gaussian"){
+    type='wbart'
+  }else if(family$family=="binomial"){
+    type='pbart'
+  }
+  
+  model = mxBART::mxbart(y.train=Y,
+                         x.train=X,
+                         x.test = newX,
+                         id.train=list(id),
+                         sparse=FALSE,ntree=ntree,type=type,
+                         ndpost=ndpost,nskip=nskip,keepevery=keepevery,
+                         mxps = mxps)
+  
+  # pred returns predicted responses (on the scale of the outcome)
+  if(family$family=="gaussian"){
+    pred <- model$fhat.test.mean
+    
+  }else if(family$family=="binomial"){
+    pred <- model$prob.test.mean
+  }
+  
+  fit <- list(object = model)
+  class(fit) <- c("SL.mxBART")
+  
+  out <- list(pred = pred, fit = fit)
+  return(out)
+}
+
+
+predict.SL.mxBART <- function(object, newdata,family=family, X=X, Y=Y,
+                              obsWeights, id,
+                              sparse=FALSE,ntree=50,
+                              ndpost=1000,nskip=100,keepevery=10,
+                              mxps=list(list(prior=1,df=3,scale=1)),
+                              ...) {
+  #.SL.require("mxbart")
+  if(family$famil=="gaussian"){
+    type='wbart'
+  }else if(family$family=="binomial"){
+    type='pbart'
+  }
+  
+  model = mxBART::mxbart(y.train=Y,
+                         x.train=X,
+                         x.test = newdata,
+                         id.train=list(id),
+                         sparse=FALSE,ntree=ntree,type=type,
+                         ndpost=ndpost,nskip=nskip,keepevery=keepevery,
+                         mxps = mxps)
+  
+  if(family$family=="gaussian"){
+    pred <- model$fhat.test.mean
+    
+  }else if(family$family=="binomial"){
+    pred <- model$prob.test.mean
+  }
+  
+  
+  return(pred)
+}
+
+
+
+
+##########################################
+# Elastic net wrapper with a fixed alpha #
+##########################################
+
+#' @title Elastic net regression, including lasso and ridge with a fixed alpha
+#'
+#' @description
+#' Penalized regression using elastic net. Alpha = 0 corresponds to ridge
+#' regression and alpha = 1 corresponds to Lasso.
+#'
+#' See \code{vignette("glmnet_beta", package = "glmnet")} for a nice tutorial on
+#' glmnet.
+#'
+#' @param Y Outcome variable
+#' @param X Covariate dataframe
+#' @param newX Dataframe to predict the outcome
+#' @param obsWeights Optional observation-level weights
+#' @param id Optional id to group observations from the same unit (not used
+#'   currently).
+#' @param family "gaussian" for regression, "binomial" for binary
+#'   classification. Untested options: "multinomial" for multiple classification
+#'   or "mgaussian" for multiple response, "poisson" for non-negative outcome
+#'   with proportional mean and variance, "cox".
+#' @param alpha Elastic net mixing parameter, range [0, 1]. 0 = ridge regression
+#'   and 1 = lasso.
+#' @param nfolds Number of folds for internal cross-validation to optimize lambda.
+#' @param nlambda Number of lambda values to check, recommended to be 100 or more.
+#' @param loss Loss function, can be "deviance", "mse", or "mae". If family =
+#'   binomial can also be "auc" or "class" (misclassification error).
+#' @param useMin If TRUE use lambda that minimizes risk, otherwise use 1
+#'   standard-error rule which chooses a higher penalty with performance within
+#'   one standard error of the minimum (see Breiman et al. 1984 on CART for
+#'   background).
+#' @param ... Any additional arguments are passed through to cv.glmnet.
+#'
+#' @examples
+#'
+#' # Load a test dataset.
+#' data(PimaIndiansDiabetes2, package = "mlbench")
+#' data = PimaIndiansDiabetes2
+#'
+#' # Omit observations with missing data.
+#' data = na.omit(data)
+#'
+#' Y = as.numeric(data$diabetes == "pos")
+#' X = subset(data, select = -diabetes)
+#'
+#' set.seed(1, "L'Ecuyer-CMRG")
+#'
+#' sl = SuperLearner(Y, X, family = binomial(),
+#'                   SL.library = c("SL.mean", "SL.glm", "SL.glmnet"))
+#' sl
+#'
+#' @references
+#'
+#' Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization paths for
+#' generalized linear models via coordinate descent. Journal of statistical
+#' software, 33(1), 1.
+#'
+#' Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased estimation
+#' for nonorthogonal problems. Technometrics, 12(1), 55-67.
+#'
+#' Tibshirani, R. (1996). Regression shrinkage and selection via the lasso.
+#' Journal of the Royal Statistical Society. Series B (Methodological), 267-288.
+#'
+#' Zou, H., & Hastie, T. (2005). Regularization and variable selection via the
+#' elastic net. Journal of the Royal Statistical Society: Series B (Statistical
+#' Methodology), 67(2), 301-320.
+#'
+#' @seealso \code{\link{predict.SL.glmnet}} \code{\link[glmnet]{cv.glmnet}}
+#'   \code{\link[glmnet]{glmnet}}
+#'
+#' @export
+SL.glmnet2 <- function(Y, X, newX, family, obsWeights, id,
+                       alpha = 0.5, nfolds = 10, nlambda = 100, useMin = TRUE,
+                       loss = "deviance",
+                       ...) {
+  #.SL.require('glmnet')
+  
+  # X must be a matrix, should we use model.matrix or as.matrix
+  # TODO: support sparse matrices.
+  if (!is.matrix(X)) {
+    X <- model.matrix(~ -1 + ., X)
+    newX <- model.matrix(~ -1 + ., newX)
+  }
+  
+  # Use CV to find optimal lambda.
+  fitCV <- glmnet::cv.glmnet(x = X, y = Y, weights = obsWeights,
+                             lambda = NULL,
+                             type.measure = loss,
+                             nfolds = nfolds,
+                             family = family$family,
+                             alpha = alpha,
+                             nlambda = nlambda,
+                             intercept = FALSE,
+                             ...)
+  
+  # If we predict with the cv.glmnet object we can specify lambda using a
+  # string.
+  pred <- predict(fitCV, newx = newX, type = "response",
+                  s = ifelse(useMin, "lambda.min", "lambda.1se"))
+  
+  fit <- list(object = fitCV, useMin = useMin)
+  class(fit) <- "SL.glmnet"
+  
+  out <- list(pred = pred, fit = fit)
+  return(out)
+}
+
+########################################
+# LASSO wrapper with intercept = FALSE #
+########################################
+
+#' @title Elastic net regression, including lasso and ridge with a fixed alpha
+#'
+#' @description
+#' Penalized regression using elastic net. Alpha = 0 corresponds to ridge
+#' regression and alpha = 1 corresponds to Lasso.
+#'
+#' See \code{vignette("glmnet_beta", package = "glmnet")} for a nice tutorial on
+#' glmnet.
+#'
+#' @param Y Outcome variable
+#' @param X Covariate dataframe
+#' @param newX Dataframe to predict the outcome
+#' @param obsWeights Optional observation-level weights
+#' @param id Optional id to group observations from the same unit (not used
+#'   currently).
+#' @param family "gaussian" for regression, "binomial" for binary
+#'   classification. Untested options: "multinomial" for multiple classification
+#'   or "mgaussian" for multiple response, "poisson" for non-negative outcome
+#'   with proportional mean and variance, "cox".
+#' @param alpha Elastic net mixing parameter, range [0, 1]. 0 = ridge regression
+#'   and 1 = lasso.
+#' @param nfolds Number of folds for internal cross-validation to optimize lambda.
+#' @param nlambda Number of lambda values to check, recommended to be 100 or more.
+#' @param loss Loss function, can be "deviance", "mse", or "mae". If family =
+#'   binomial can also be "auc" or "class" (misclassification error).
+#' @param useMin If TRUE use lambda that minimizes risk, otherwise use 1
+#'   standard-error rule which chooses a higher penalty with performance within
+#'   one standard error of the minimum (see Breiman et al. 1984 on CART for
+#'   background).
+#' @param ... Any additional arguments are passed through to cv.glmnet.
+#'
+#' @examples
+#'
+#' # Load a test dataset.
+#' data(PimaIndiansDiabetes2, package = "mlbench")
+#' data = PimaIndiansDiabetes2
+#'
+#' # Omit observations with missing data.
+#' data = na.omit(data)
+#'
+#' Y = as.numeric(data$diabetes == "pos")
+#' X = subset(data, select = -diabetes)
+#'
+#' set.seed(1, "L'Ecuyer-CMRG")
+#'
+#' sl = SuperLearner(Y, X, family = binomial(),
+#'                   SL.library = c("SL.mean", "SL.glm", "SL.glmnet"))
+#' sl
+#'
+#' @references
+#'
+#' Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization paths for
+#' generalized linear models via coordinate descent. Journal of statistical
+#' software, 33(1), 1.
+#'
+#' Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased estimation
+#' for nonorthogonal problems. Technometrics, 12(1), 55-67.
+#'
+#' Tibshirani, R. (1996). Regression shrinkage and selection via the lasso.
+#' Journal of the Royal Statistical Society. Series B (Methodological), 267-288.
+#'
+#' Zou, H., & Hastie, T. (2005). Regularization and variable selection via the
+#' elastic net. Journal of the Royal Statistical Society: Series B (Statistical
+#' Methodology), 67(2), 301-320.
+#'
+#' @seealso \code{\link{predict.SL.glmnet}} \code{\link[glmnet]{cv.glmnet}}
+#'   \code{\link[glmnet]{glmnet}}
+#'
+#' @export
+SL.LASSO <- function(Y, X, newX, family, obsWeights, id,
+                     alpha = 1, nfolds = 10, nlambda = 100, useMin = TRUE,
+                     loss = "deviance",
+                     ...) {
+  #.SL.require('glmnet')
+  
+  # X must be a matrix, should we use model.matrix or as.matrix
+  # TODO: support sparse matrices.
+  if (!is.matrix(X)) {
+    X <- model.matrix(~ -1 + ., X)
+    newX <- model.matrix(~ -1 + ., newX)
+  }
+  
+  # Use CV to find optimal lambda.
+  fitCV <- glmnet::cv.glmnet(x = X, y = Y, weights = obsWeights,
+                             lambda = NULL,
+                             type.measure = loss,
+                             nfolds = nfolds,
+                             family = family$family,
+                             alpha = alpha,
+                             nlambda = nlambda,
+                             intercept = FALSE,
+                             ...)
+  
+  # If we predict with the cv.glmnet object we can specify lambda using a
+  # string.
+  pred <- predict(fitCV, newx = newX, type = "response",
+                  s = ifelse(useMin, "lambda.min", "lambda.1se"))
+  
+  fit <- list(object = fitCV, useMin = useMin)
+  class(fit) <- "SL.glmnet"
+  
+  out <- list(pred = pred, fit = fit)
+  return(out)
+}
+
+
+############################################
+# Elastic net wrapper with optimized alpha #
+############################################
+
+# NOTE: COMPUTATIONALLY INTENSIVE
+
+#' @title Elastic net regression, including lasso and ridge with optimized alpha and lambda
+#'
+#' @description
+#' Penalized regression using elastic net. Alpha = 0 corresponds to ridge
+#' regression and alpha = 1 corresponds to Lasso.
+#'
+#' See \code{vignette("glmnet_beta", package = "glmnet")} for a nice tutorial on
+#' glmnet.
+#'
+#' @param Y Outcome variable
+#' @param X Covariate dataframe
+#' @param newX Dataframe to predict the outcome
+#' @param obsWeights Optional observation-level weights
+#' @param id Optional id to group observations from the same unit (not used
+#'   currently).
+#' @param family "gaussian" for regression, "binomial" for binary
+#'   classification. Untested options: "multinomial" for multiple classification
+#'   or "mgaussian" for multiple response, "poisson" for non-negative outcome
+#'   with proportional mean and variance, "cox".
+#' @param alpha Elastic net mixing parameter, range [0, 1]. 0 = ridge regression
+#'   and 1 = lasso.
+#' @param nfolds Number of folds for internal cross-validation to optimize lambda.
+#' @param nlambda Number of lambda values to check, recommended to be 100 or more.
+#' @param loss Loss function, can be "deviance", "mse", or "mae". If family =
+#'   binomial can also be "auc" or "class" (misclassification error).
+#' @param useMin If TRUE use lambda that minimizes risk, otherwise use 1
+#'   standard-error rule which chooses a higher penalty with performance within
+#'   one standard error of the minimum (see Breiman et al. 1984 on CART for
+#'   background).
+#' @param ... Any additional arguments are passed through to cv.glmnet.
+#'
+#' @examples
+#'
+#' # Load a test dataset.
+#' data(PimaIndiansDiabetes2, package = "mlbench")
+#' data = PimaIndiansDiabetes2
+#'
+#' # Omit observations with missing data.
+#' data = na.omit(data)
+#'
+#' Y = as.numeric(data$diabetes == "pos")
+#' X = subset(data, select = -diabetes)
+#'
+#' set.seed(1, "L'Ecuyer-CMRG")
+#'
+#' sl = SuperLearner(Y, X, family = binomial(),
+#'                   SL.library = c("SL.mean", "SL.glm", "SL.glmnet"))
+#' sl
+#'
+#' @references
+#'
+#' Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization paths for
+#' generalized linear models via coordinate descent. Journal of statistical
+#' software, 33(1), 1.
+#'
+#' Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased estimation
+#' for nonorthogonal problems. Technometrics, 12(1), 55-67.
+#'
+#' Tibshirani, R. (1996). Regression shrinkage and selection via the lasso.
+#' Journal of the Royal Statistical Society. Series B (Methodological), 267-288.
+#'
+#' Zou, H., & Hastie, T. (2005). Regularization and variable selection via the
+#' elastic net. Journal of the Royal Statistical Society: Series B (Statistical
+#' Methodology), 67(2), 301-320.
+#'
+#' @seealso \code{\link{predict.SL.glmnet}} \code{\link[glmnet]{cv.glmnet}}
+#'   \code{\link[glmnet]{glmnet}}
+#'
+#' @export
+SL.enet <- function(Y, X, newX, family, obsWeights, id,
+                    alpha = seq(0, 1, 0.1), nfolds = 10, nlambda = 100, useMin = TRUE,
+                    loss = "deviance",
+                    ...) {
+  #.SL.require('glmnet')
+  
+  # X must be a matrix, should we use model.matrix or as.matrix
+  # TODO: support sparse matrices.
+  if (!is.matrix(X)) {
+    X <- model.matrix(~ -1 + ., X)
+    newX <- model.matrix(~ -1 + ., newX)
+  }
+  
+  # Use CV to find optimal alpha.
+  fit0 <- glmnetUtils::cva.glmnet(x = X, 
+                                  y = Y, 
+                                  weights = obsWeights,
+                                  lambda = NULL,
+                                  type.measure = loss,
+                                  nfolds = nfolds,
+                                  family = family$family,
+                                  alpha = alpha,
+                                  nlambda = nlambda,
+                                  intercept = FALSE,
+                                  ...)
+  
+  # Extract best alpha
+  enet_performance <- data.frame(alpha = fit0$alpha)
+  models <- fit0$modlist
+  enet_performance$cvm <- vapply(models, get_cvm, numeric(1))
+  minix <- which.min(enet_performance$cvm)
+  best_alpha <- fit0$alpha[minix]
+  
+  # Use CV to find optimal lambda.
+  fitCV <- glmnet::cv.glmnet(x = X, y = Y, weights = obsWeights,
+                             lambda = NULL,
+                             type.measure = loss,
+                             nfolds = nfolds,
+                             family = family$family,
+                             alpha = best_alpha,
+                             nlambda = nlambda,
+                             intercept=FALSE,
+                             ...)
+  fitCV$alpha <- best_alpha
+  # If we predict with the cv.glmnet object we can specify lambda using a
+  # string.
+  pred <- predict(fitCV, newx = newX, type = "response",
+                  s = ifelse(useMin, "lambda.min", "lambda.1se"))
+  
+  fit <- list(object = fitCV, useMin = useMin)
+  class(fit) <- "SL.glmnet"
+  
+  out <- list(pred = pred, fit = fit)
+  return(out)
+}
+
+# Helper function for extracting best alpha from cva.glmnet
+get_cvm <- function(model) {
+  index <- match(model$lambda.1se, model$lambda)
+  model$cvm[index]
+}
+
+
+
+#' Horseshoe regression
+#'
+#' @param Y Outcome variable
+#' @param X Covariate data frame
+#' @param newX Dataframe to predict the outcome
+#' @param family "gaussian" for regression, "binomial" for binary
+#'   classification. Untested options: "poisson" for for integer or count data
+#' @param prior prior for regression coefficients to use. "Horseshoe" by default. 
+#' Untested options: ridge regression (prior="rr" or prior="ridge"), 
+#' lasso regression (prior="lasso") and horseshoe+ regression (prior="hs+" 
+#' or prior="horseshoe+")
+#' @param N Number of posterior samples to generate.
+#' @param burnin Number of burn-in samples.
+#' @param thinning Desired level of thinning.
+#' @param ... other parameters passed to bayesreg function
+#'
+#' @return SL object
+#' @export
+SL.horseshoe <- function(Y, X, newX, family, prior = "horseshoe", N = 20000L, burnin = 1000L,
+                         thinning = 1L, ...){
+  if (family$family == "binomial") {
+    # Need to convert Y to a factor, otherwise bartMachine does regression.
+    # And importantly, bayesreg expects the second level to be the positive
+    # class, so we have to specify levels.
+    Y = factor(Y, levels = c("0", "1"))
+  }
+  
+  #.SL.require('bayesreg') 
+  df <- data.frame(X,Y)
+  model.HS=bayesreg(Y~.,df,model = family$family,prior=prior,
+                    n.samples=N,burnin = burnin,thin = thinning)  
+  newX <- as.matrix(newX)
+  ynew.samp <- rep(1,nrow(newX)) %*% model.HS$beta0+ newX %*% model.HS$beta
+  if(family$family=="binomial"){
+    ynew.samp <- expit(ynew.samp)
+  }
+  pred <- apply(ynew.samp, 1, median)
+  
+  fit <- list(object = model.HS)
+  class(fit) <- c("SL.horseshoe")
+  
+  out <- list(pred = pred, fit = fit)
+  return(out)
+  
+  #}
+  
+}
+
+predict.SL.horseshoe <- function(object, newdata, family, X = NULL, Y = NULL,...) {
+  #.SL.require("bayesreg")
+  model.HS <- object$object
+  newdata <- as.matrix(newdata)
+  ynew.samp <- rep(1,nrow(newdata)) %*% model.HS$beta0+ newdata %*% model.HS$beta
+  if(family$family=="binomial"){
+    ynew.samp <- expit(ynew.samp)
+  }
+  pred <- apply(ynew.samp, 1, median)
+  
+  return(pred)
+}
+
+
+
+
+########################################################
+# Non-negative least squares or rank loss minimization #
+########################################################
+
+#' Title Meta level Objective function: NNLS for gaussian; Rank loss for binary observations
+#'
+#' @param b Weights vector
+#' @param X Design matrix (data frame)
+#' @param Y Outcome variable
+#'
+#' @return 1 - AUC
+#' @export
+auc.obj <- function(b,X,Y){
+  #Doesn't use observation weights in this part right now
+  wavg <- as.matrix(X) %*% b
+  pred = ROCR::prediction(wavg, Y)
+  AUC = ROCR::performance(pred, "auc")@y.values[[1]]
+  return((1-AUC))
+  
+}
+
+#' NNLS function to optimize weights of several base learners
+#'
+#' @param x x
+#' @param y y
+#' @param wt wt
+#'
+#' @return Solution of the quadratic programming problem
+#' @export
+NNLS <- function(x, y, wt) {
+  wX <- sqrt(wt) * x
+  wY <- sqrt(wt) * y
+  # Z'Z = n * cov(Z)
+  D <- t(wX) %*% wX
+  d <- t(t(wY) %*% wX)
+  A <- rbind(rep(1,ncol(wX)),diag(ncol(wX)))
+  b <- c(1,rep(0, ncol(wX)))
+  # This will give an error if cov(Z) is singular, meaning at least two
+  # columns are linearly dependent.
+  # TODO: This will also error if any learner failed. Fix this.
+  fit <- quadprog::solve.QP(Dmat = D, dvec = d, Amat = t(A), bvec = b, meq=1)
+  return(fit)
+}
+
+#' Combined SuperLearner function for both NNLS/AUC maximization
+#'
+#' @param Y Y
+#' @param X X
+#'
+#' @return Estimated meta-learner coefficients and predictions
+#' @export
+SL.nnls.auc <- function(Y, X, newX, family, obsWeights,bounds = c(0, Inf), ...) {
+  if(family$family=="gaussian"){
+    fit.nnls <- NNLS(x=as.matrix(X),y=Y,wt=obsWeights)
+    initCoef <- fit.nnls$solution
+    initCoef[is.na(initCoef)] <- 0
+    if (sum(initCoef) > 0) {
+      coef <- initCoef/sum(initCoef)
+    } else {
+      warning("All algorithms have zero weight", call. = FALSE)
+      coef <- initCoef
+    }
+    pred <- crossprod(t(as.matrix(newX)), coef)
+    fit <- list(object = fit.nnls)
+    class(fit) <- "SL.nnls.auc"
+    out <- list(pred = pred, fit = fit)
+  }else if (family$family=="binomial"){
+    
+    
+    nmethods = ncol(X)
+    coef_init <- runif(nmethods)
+    coef_init <- coef_init/sum(coef_init)
+    fit.rankloss <- nloptr::nloptr(x0=coef_init,
+                                   eval_f = auc.obj,
+                                   lb=rep(bounds[1],nmethods),
+                                   ub=rep(bounds[2],nmethods),
+                                   opts = list(algorithm="NLOPT_LN_COBYLA",xtol_rel = 1e-8,maxeval=10000),
+                                   X = X,
+                                   Y = Y)
+    if (fit.rankloss$status < 1 || fit.rankloss$status > 4) {
+      warning(fit.rankloss$message)
+    }
+    coef <- fit.rankloss$solution
+    if (anyNA(coef)) {
+      warning("Some algorithms have weights of NA, setting to 0.")
+      coef[is.na(coef)] <- 0
+    }
+    if (!sum(coef) > 0) warning("All algorithms have zero weight", call. = FALSE)
+    
+    coef <- coef/sum(coef)
+    
+    # solution stores normalized coefficients
+    fit.rankloss$solution <- coef
+    
+    pred <- crossprod(t(as.matrix(newX)), coef)
+    fit <- list(object = fit.rankloss)
+    class(fit) <- "SL.nnls.auc"
+    out <- list(pred = pred, fit = fit)
+  }
+  return(out)
+}
+
+#' Predict function for SL.nnls.auc
+#'
+#' @param object object
+#' @param newdata newdata
+#'
+#' @return Prediction from the meta-learner
+#' @export
+predict.SL.nnls.auc <- function(object, newdata, ...) {
+  initCoef <- object$object$solution
+  initCoef[is.na(initCoef)] <- 0
+  if (sum(initCoef) > 0) {
+    coef <- initCoef/sum(initCoef)
+  } else {
+    warning("All algorithms have zero weight", call. = FALSE)
+    coef <- initCoef
+  }
+  pred <- crossprod(t(as.matrix(newdata)), coef)
+  return(pred)
+}
+
+
+# predict.learner <- function(fit,
+#                             feature_table_valid = NULL, # Feature table from validation set. Must have the exact same structure as feature_table. If missing, uses feature_table for feature_table_valid.
+#                             sample_metadata_valid = NULL, # Optional: Sample-specific metadata table from independent validation set. Must have the exact same structure as sample_metadata.
+#                             feature_metadata=NULL){
+#   
+#   if(all(fit$feature.names==rownames(feature_metadata))==FALSE){
+#     stop("Both training feature_table and feature_metadata should have the same rownames.")
+#   }
+#   
+#   
+#   if(is.null(feature_table_valid)){
+#     stop("Feature table for validation set cannot be empty")
+#   } 
+#   # if(is.null(sample_metadata_valid)){
+#   #   stop("Sample metadata for validation set cannot be empty")
+#   # }
+#   
+#   if (!is.null(feature_table_valid)){
+#     if(all(fit$feature.names==rownames(feature_table_valid))==FALSE)
+#       stop("Both feature_table and feature_table_valid should have the same rownames.")
+#   }
+#   
+#   if (!is.null(sample_metadata_valid)){
+#     if(all(colnames(feature_table_valid)==rownames(sample_metadata_valid))==FALSE)
+#       stop("Row names of sample_metadata_valid must match the column names of feature_table_valid")
+#   }
+#   
+#   
+#   
+#   if (!'featureID' %in% colnames(feature_metadata)){
+#     stop("feature_metadata must have a column named 'featureID' describing per-feature unique identifiers.")
+#   }
+#   
+#   if (!'featureType' %in% colnames(feature_metadata)){
+#     stop("feature_metadata must have a column named 'featureType' describing the corresponding source layers.")
+#   }
+#   
+#   if (!is.null(sample_metadata_valid)){
+#     if (!'subjectID' %in% colnames(sample_metadata_valid)){
+#       stop("sample_metadata_valid must have a column named 'subjectID' describing per-subject unique identifiers.")
+#     }
+#     
+#     if (!'Y' %in% colnames(sample_metadata_valid)){
+#       stop("sample_metadata_valid must have a column named 'Y' describing the outcome of interest.")
+#     }
+#   }
+#   
+#   #############################################################################################
+#   # Extract validation Y right away (will not be used anywhere during the validation process) #
+#   #############################################################################################
+#   
+#   if (!is.null(sample_metadata_valid)){validY<-sample_metadata_valid['Y']}
+#   
+#   #####################################################################
+#   # Stacked generalization input data preparation for validation data #
+#   #####################################################################
+#   feature_metadata$featureType<-as.factor(feature_metadata$featureType)
+#   name_layers<-with(droplevels(feature_metadata), list(levels = levels(featureType)), 
+#                     nlevels = nlevels(featureType))$levels
+#   
+#   X_test_layers <- vector("list", length(name_layers)) 
+#   names(X_test_layers) <- name_layers
+#   
+#   layer_wise_prediction_valid<-vector("list", length(name_layers))
+#   names(layer_wise_prediction_valid)<-name_layers
+#   
+#   for(i in seq_along(name_layers)){
+#     
+#     ############################################################
+#     # Prepare single-omic validation data and save predictions #
+#     ############################################################
+#     include_list<-feature_metadata %>% filter(featureType == name_layers[i]) 
+#     t_dat_slice_valid<-feature_table_valid[rownames(feature_table_valid) %in% include_list$featureID, ]
+#     dat_slice_valid<-as.data.frame(t(t_dat_slice_valid))
+#     X_test_layers[[i]] <- dat_slice_valid
+#     layer_wise_prediction_valid[[i]]<-predict.SuperLearner(fit$SL_fits$SL_fit_layers[[i]], newdata = dat_slice_valid)$pred
+#     rownames(layer_wise_prediction_valid[[i]])<-rownames(dat_slice_valid)
+#     rm(dat_slice_valid); rm(include_list)
+#   }
+#   
+#   combo_valid <- as.data.frame(do.call(cbind, layer_wise_prediction_valid))
+#   names(combo_valid)<-name_layers
+#   
+#   if(fit$run_stacked==TRUE){
+#     stacked_prediction_valid<-predict.SuperLearner(fit$SL_fits$SL_fit_stacked, newdata = combo_valid)$pred
+#     rownames(stacked_prediction_valid)<-rownames(combo_valid)  
+#   }
+#   if(fit$run_concat==TRUE){
+#     fulldat_valid<-as.data.frame(t(feature_table_valid))
+#     concat_prediction_valid<-predict.SuperLearner(fit$SL_fits$SL_fit_concat, 
+#                                                   newdata = fulldat_valid)$pred
+#     rownames(concat_prediction_valid)<-rownames(fulldat_valid)
+#   }
+#   
+#   res=list()
+#   
+#   if (!is.null(sample_metadata_valid)){
+#     Y_test=validY$Y
+#     res$Y_test =Y_test
+#   }
+#   
+#   if(fit$run_concat & fit$run_stacked){
+#     yhat.test <- cbind(combo_valid, stacked_prediction_valid , concat_prediction_valid)
+#     colnames(yhat.test) <- c(colnames(combo_valid),"stacked","concatenated")  
+#   }else if(fit$run_concat & !fit$run_stacked){
+#     yhat.test <- cbind(combo_valid,  concat_prediction_valid)
+#     colnames(yhat.test) <- c(colnames(combo_valid),"concatenated")  
+#   }else if(!fit$run_concat & fit$run_stacked){
+#     yhat.test <- cbind(combo_valid, stacked_prediction_valid )
+#     colnames(yhat.test) <- c(colnames(combo_valid),"stacked")  
+#   }else{
+#     yhat.test <- combo_valid   
+#   }
+#   
+#   res$yhat.test <- yhat.test
+#   if (!is.null(sample_metadata_valid)){
+#     if(fit$family=='binomial'){
+#       # Calculate AUC for each layer, stacked and concatenated 
+#       pred=apply(res$yhat.test, 2, ROCR::prediction, labels=res$Y_test)
+#       AUC=vector(length = length(pred))
+#       names(AUC)=names(pred)
+#       for(i in seq_along(pred)){
+#         AUC[i] = round(ROCR::performance(pred[[i]], "auc")@y.values[[1]], 3)
+#       }
+#       res$AUC.test <- AUC  
+#       
+#     }
+#     
+#     if(fit$family=='gaussian'){
+#       # Calculate R^2 for each layer, stacked and concatenated 
+#       R2=vector(length = ncol(res$yhat.test))
+#       names(R2)=names(res$yhat.test)
+#       for(i in seq_along(R2)){
+#         R2[i] = as.vector(cor(res$yhat.test[ ,i], res$Y_test)^2)
+#       }
+#       res$R2.test <- R2
+#     }
+#   }
+#   
+#   return(res)
+#   
+# }
+# 
+# 
+# update.learner <- function(fit,                                     
+#                            feature_table_valid, # Feature table from validation set. Must have the exact same structure as feature_table. If missing, uses feature_table for feature_table_valid.
+#                            sample_metadata_valid=NULL, # OPTIONAL (can provide feature_table_valid and not this):  Sample-specific metadata table from independent validation set. Must have the exact same structure as sample_metadata.
+#                            feature_metadata_valid,
+#                            seed = 1234, # Specify the arbitrary seed value for reproducibility. Default is 1234.
+#                            verbose=FALSE
+# ){
+#   # Check that feature table and feature meta data valid is not empty here
+#   if(is.null(feature_table_valid | is.null(feature_metadata_valid))){
+#     stop("feature table/ feature metadata cannot be NULL for validation set in update learner")
+#   }
+#   
+#   if(fit$family=="gaussian"){
+#     family=gaussian()
+#   }else if(fit$family=="binomial"){
+#     family=binomial()
+#   }
+#   
+#   if (!is.null(sample_metadata_valid)){
+#     validY<-sample_metadata_valid['Y']
+#   }
+#   
+#   
+#   feature_metadata_valid$featureType<-as.factor(feature_metadata_valid$featureType)
+#   name_layers_valid<-with(droplevels(feature_metadata_valid), list(levels = levels(featureType)), nlevels = nlevels(featureType))$levels
+#   
+#   
+#   name_layers <- names(fit$model_fits$model_layers)
+#   
+#   # If layers in validation match layers in train
+#   # Just run predict function and return its object
+#   if(length(intersect(name_layers_valid,name_layers))==length(name_layers)){
+#     
+#     # Check if feature names are same for the train and test 
+#     
+#     return(predict.learner(fit, 
+#                            feature_table_valid = feature_table_valid,
+#                            sample_metadata_valid = sample_metadata_valid,
+#                            feature_metadata = feature_metadata_valid))
+#   }else if(length(intersect(name_layers_valid,name_layers))==0){
+#     
+#     stop("Validation set has no layers in common with model fit")
+#     
+#   }else{
+#     
+#     name_layers_common <- intersect(name_layers_valid,name_layers)
+#     
+#     
+#     
+#     # Extract only common name layers part of the fit object 
+#     fit$model_fits$model_layers <- fit$model_fits$model_layers[name_layers_common]
+#     fit$SL_fits$SL_fit_layers <-  fit$SL_fits$SL_fit_layers[name_layers_common]
+#     fit$X_train_layers <- fit$X_train_layers[name_layers_common]
+#     
+#     # Use common layers to get layer wise predictions for validation set
+#     X_test_layers <- vector("list", length(name_layers_common)) 
+#     names(X_test_layers) <- name_layers_common
+#     
+#     if (!is.null(feature_table_valid)){
+#       layer_wise_prediction_valid<-vector("list", length(name_layers_common))
+#       names(layer_wise_prediction_valid)<-name_layers_common
+#     } 
+#     
+#     
+#     for(i in seq_along(name_layers_common)){
+#       include_list<-feature_metadata_valid %>% filter(featureType == name_layers_common[i]) 
+#       
+#       # check if feature names in common layers match for train and test set 
+#       if(!all(include_list$featureID==colnames(fit$X_train_layers[name_layers_common[i]]))){
+#         stop(paste0("Validation set feature names for layer ", name_layers_common[i]," do not match with training data" ))
+#       }
+#       
+#       
+#       if (!is.null(feature_table_valid)){
+#         t_dat_slice_valid<-feature_table_valid[rownames(feature_table_valid) %in% include_list$featureID, ]
+#         dat_slice_valid<-as.data.frame(t(t_dat_slice_valid))
+#         X_test_layers[[i]] <- dat_slice_valid
+#         layer_wise_prediction_valid[[i]]<-predict.SuperLearner(fit$SL_fits$SL_fit_layers[[i]], newdata = dat_slice_valid)$pred
+#         rownames(layer_wise_prediction_valid[[i]])<-rownames(dat_slice_valid)
+#         fit$SL_fits$SL_fit_layers[[i]]$validX<-dat_slice_valid
+#         fit$SL_fits$SL_fit_layers[[i]]$validPrediction<-layer_wise_prediction_valid[[i]]
+#         colnames(fit$SL_fits$SL_fit_layers[[i]]$validPrediction)<-'validPrediction'
+#         rm(dat_slice_valid); rm(include_list)
+#       }
+#     }
+#     
+#     combo <- fit$yhat.train[ ,name_layers_common]
+#     
+#     if (!is.null(feature_table_valid)){
+#       combo_valid <- as.data.frame(do.call(cbind, layer_wise_prediction_valid))
+#       names(combo_valid)<-name_layers_valid
+#     }
+#     
+#     
+#     if(fit$run_stacked){
+#       
+#       cat('Running new stacked model...\n')
+#       #}
+#       
+#       ###################################
+#       # Run user-specified meta learner #
+#       ###################################
+#       
+#       SL_fit_stacked<-SuperLearner::SuperLearner(Y = fit$Y_train, 
+#                                                  X = combo, 
+#                                                  cvControl = fit$cvControl,    
+#                                                  verbose = verbose, 
+#                                                  SL.library = fit$meta_learner,
+#                                                  family=family,
+#                                                  id=fit$id)
+#       
+#       # Extract the fit object from superlearner
+#       model_stacked <- SL_fit_stacked$fitLibrary[[1]]$object
+#       
+#       ###################################################
+#       # Append the corresponding y and X to the results #
+#       ###################################################
+#       
+#       SL_fit_stacked$Y<-fit$Y_train
+#       SL_fit_stacked$X<-combo
+#       if (!is.null(sample_metadata_valid)) SL_fit_stacked$validY<-validY
+#       
+#       #################################################################
+#       # Prepate stacked input data for validation and save prediction #
+#       #################################################################
+#       
+#       if (!is.null(feature_table_valid)){
+#         stacked_prediction_valid<-predict.SuperLearner(SL_fit_stacked, newdata = combo_valid)$pred
+#         rownames(stacked_prediction_valid)<-rownames(combo_valid)
+#         SL_fit_stacked$validX<-combo_valid
+#         SL_fit_stacked$validPrediction<-stacked_prediction_valid
+#         colnames(SL_fit_stacked$validPrediction)<-'validPrediction'
+#       }
+#       
+#       fit$model_fits$model_stacked <- model_stacked
+#       fit$SL_fits$SL_fit_stacked <- SL_fit_stacked
+#       fit$yhat.train$stacked <- SL_fit_stacked$Z
+#       
+#       
+#     }
+#     
+#     
+#     if(fit$run_concat){
+#       #if (verbose) {
+#       cat('Running new concatenated model...\n')
+#       #}
+#       ###################################
+#       # Prepate concatenated input data #
+#       ###################################
+#       feature_table <-  Reduce(cbind.data.frame,fit$X_train_layers)
+#       feature_table <- feature_table[ ,feature_metadata_valid$featureID]
+#       fulldat<-as.data.frame(feature_table)
+#       
+#       ###################################
+#       # Run user-specified base learner #
+#       ###################################
+#       
+#       SL_fit_concat<-SuperLearner::SuperLearner(Y = fit$Y_train, 
+#                                                 X = fulldat, 
+#                                                 cvControl = fit$cvControl,    
+#                                                 verbose = verbose, 
+#                                                 SL.library = fit$base_learner,
+#                                                 family=family,
+#                                                 id=fit$id)
+#       
+#       # Extract the fit object from superlearner
+#       model_concat <- SL_fit_concat$fitLibrary[[1]]$object
+#       
+#       ###################################################
+#       # Append the corresponding y and X to the results #
+#       ###################################################
+#       
+#       SL_fit_concat$Y<-fit$Y_train
+#       SL_fit_concat$X<-fulldat
+#       if (!is.null(sample_metadata_valid)) SL_fit_concat$validY<-validY
+#       
+#       #########################################################################
+#       # Prepate concatenated input data for validaton set and save prediction #
+#       #########################################################################
+#       
+#       if (!is.null(feature_table_valid)){
+#         fulldat_valid<-as.data.frame(t(feature_table_valid))
+#         concat_prediction_valid<-predict.SuperLearner(SL_fit_concat, newdata = fulldat_valid)$pred
+#         SL_fit_concat$validX<-fulldat_valid
+#         rownames(concat_prediction_valid)<-rownames(fulldat_valid)
+#         SL_fit_concat$validPrediction<-concat_prediction_valid
+#         colnames(SL_fit_concat$validPrediction)<-'validPrediction'
+#       }
+#       
+#       fit$model_fits$model_concat <- model_concat
+#       fit$SL_fits$SL_fit_concat <- SL_fit_concat
+#       fit$yhat.train$concatenated <- SL_fit_concat$Z
+#     }
+#     
+#     
+#     if(fit$run_concat & fit$run_stacked){
+#       fit$yhat.train <- fit$yhat.train[ ,c(name_layers_common,"stacked","concatenated")]
+#       
+#     }else if(fit$run_concat & !fit$run_stacked){
+#       fit$yhat.train <- fit$yhat.train[ ,c(name_layers_common,"concatenated")]
+#       
+#     }else if(!fit$run_concat & fit$run_stacked){
+#       fit$yhat.train <- fit$yhat.train[ ,c(name_layers_common,"stacked")]
+#       
+#     }else if(!fit$run_concat & !fit$run_stacked){
+#       fit$yhat.train <- fit$yhat.train[ ,name_layers_common]
+#       
+#     }
+#     
+#     
+#     if(!is.null(feature_table_valid)){
+#       
+#       if(fit$run_concat & fit$run_stacked){
+#         yhat.test <- cbind(combo_valid, SL_fit_stacked$validPrediction,SL_fit_concat$validPrediction)
+#         colnames(yhat.test) <- c(colnames(combo_valid),"stacked","concatenated")
+#         
+#       }else if(fit$run_concat & !fit$run_stacked){
+#         yhat.test <- cbind(combo_valid, SL_fit_concat$validPrediction)
+#         colnames(yhat.test) <- c(colnames(combo_valid),"concatenated")
+#         
+#       }else if(!fit$run_concat & fit$run_stacked){
+#         yhat.test <- cbind(combo_valid, SL_fit_stacked$validPrediction)
+#         colnames(yhat.test) <- c(colnames(combo_valid),"stacked")
+#         
+#       }else if(!fit$run_concat & !fit$run_stacked){
+#         yhat.test <- cbind(combo_valid)
+#         colnames(yhat.test) <- c(colnames(combo_valid))
+#         
+#       }
+#       fit$yhat.test <- yhat.test
+#       fit$X_test_layers <- X_test_layers
+#     }
+#     if(is.null(sample_metadata_valid)){
+#       fit$test=FALSE
+#     }else{
+#       fit$test=TRUE
+#     }
+#     if(fit$meta_learner=="SL.nnls.auc" & fit$run_stacked){
+#       fit$weights <- fit$model_fits$model_stacked$solution
+#       names(fit$weights) <- colnames(combo)
+#     }
+#     
+#     if(!is.null(sample_metadata_valid)){fit$Y_test=validY$Y}
+#     
+#     if(fit$family=="binomial"){
+#       # Calculate AUC for each layer, stacked and concatenated 
+#       pred=apply(fit$yhat.train, 2, ROCR::prediction, labels=fit$Y_train)
+#       AUC=vector(length = length(pred))
+#       names(AUC)=names(pred)
+#       for(i in seq_along(pred)){
+#         AUC[i] = round(ROCR::performance(pred[[i]], "auc")@y.values[[1]], 3)
+#       }
+#       fit$AUC.train <- AUC
+#       
+#       if(fit$test==TRUE){
+#         # Calculate AUC for each layer, stacked and concatenated 
+#         pred=apply(fit$yhat.test, 2, ROCR::prediction, labels=fit$Y_test)
+#         AUC=vector(length = length(pred))
+#         names(AUC)=names(pred)
+#         for(i in seq_along(pred)){
+#           AUC[i] = round(ROCR::performance(pred[[i]], "auc")@y.values[[1]], 3)
+#         }
+#         fit$AUC.test <- AUC  
+#       }
+#     }
+#     if(fit$family=="gaussian"){
+#       
+#       # Calculate R^2 for each layer, stacked and concatenated 
+#       R2=vector(length = ncol(fit$yhat.train))
+#       names(R2)=names(fit$yhat.train)
+#       for(i in seq_along(R2)){
+#         R2[i] = as.vector(cor(fit$yhat.train[ ,i], fit$Y_train)^2)
+#       }
+#       fit$R2.train <- R2
+#       if(fit$test==TRUE){
+#         # Calculate R^2 for each layer, stacked and concatenated 
+#         R2=vector(length = ncol(fit$yhat.test))
+#         names(R2)=names(fit$yhat.test)
+#         for(i in seq_along(R2)){
+#           R2[i] = as.vector(cor(fit$yhat.test[ ,i], fit$Y_test)^2)
+#         }
+#         fit$R2.test <- R2
+#       }
+#       
+#     }  
+#     fit$feature.names <- rownames(feature_table_valid)
+#     print.learner(fit)
+#     return(fit)
+#   }
+#   
+#   
+# }
+# 
+# plot.learner <- function(fit,label_size=8, label_x=0.3,vjust=0.1,rowwise=TRUE){
+#   
+#   clean_base_learner <- str_remove_all(fit$base_learner, 'SL.')
+#   clean_meta_learner <- str_remove_all(fit$meta_learner, 'SL.')  
+#   method <- paste(clean_base_learner,clean_meta_learner,sep=' + ')
+#   
+#   if(fit$family=='binomial'){
+#     
+#     # Extract ROC plot data 
+#     list.ROC<-vector("list", length = ncol(fit$yhat.train))
+#     names(list.ROC)<-colnames(fit$yhat.train)
+#     
+#     y <- fit$Y_train
+#     # Loop over layers 
+#     for(k in 1:length(list.ROC)){
+#       preds<-fit$yhat.train[ ,k]
+#       pred = ROCR::prediction(preds, y)
+#       AUC = round(ROCR::performance(pred, "auc")@y.values[[1]], 2)
+#       perf = ROCR::performance(pred, "sens", "spec") 
+#       list.ROC[[k]] <- data.frame(sensitivity = methods::slot(perf, "y.values")[[1]],
+#                                   specificity = 1 - methods::slot(perf, "x.values")[[1]],
+#                                   AUC = AUC,
+#                                   layer = names(list.ROC)[k],
+#                                   method = method)
+#     }
+#     
+#     # Combine
+#     ROC_table<-do.call('rbind', list.ROC)
+#     
+#     # Prepare data for plotting
+#     plot_data<-ROC_table
+#     plot_data$displayItem<-paste(plot_data$layer, " AUC = ", plot_data$AUC, sep="")
+#     plot_data$displayItem<-factor(plot_data$displayItem,
+#                                   levels = unique(plot_data$displayItem))
+#     
+#     # ROC curves
+#     p1<-ggplot(plot_data,
+#                aes(x=specificity,
+#                    y=sensitivity,
+#                    group=displayItem)) + 
+#       geom_line(aes(x=specificity,y=sensitivity,color=displayItem)) +
+#       #ggtitle(paste('Training data: ', method, sep=''))+
+#       theme(legend.position="bottom", 
+#             legend.background=element_blank(),
+#             legend.box.background=element_rect(colour="black")) + 
+#       theme_bw() +
+#       xlab("False Positive Rate") +
+#       ylab("True Positive Rate") +
+#       theme(legend.position = "right", legend.direction = "vertical") +
+#       labs(color='') 
+#     
+#     if(fit$test==TRUE){
+#       
+#       # Extract ROC plot data 
+#       list.ROC.valid<-vector("list", length = ncol(fit$yhat.test))
+#       names(list.ROC.valid)<-colnames(fit$yhat.test)
+#       
+#       y <- fit$Y_test
+#       # Loop over layers 
+#       for(k in 1:length(list.ROC.valid)){
+#         preds<-fit$yhat.test[ ,k]
+#         pred = ROCR::prediction(preds, y)
+#         AUC = round(ROCR::performance(pred, "auc")@y.values[[1]], 2)
+#         perf = ROCR::performance(pred, "sens", "spec") 
+#         list.ROC.valid[[k]] <- data.frame(sensitivity = methods::slot(perf, "y.values")[[1]],
+#                                           specificity = 1 - methods::slot(perf, "x.values")[[1]],
+#                                           AUC = AUC,
+#                                           layer = names(list.ROC.valid)[k],
+#                                           method = method)
+#       }
+#       
+#       # Combine
+#       ROC_table_valid<-do.call('rbind', list.ROC.valid)
+#       
+#       # Prepare data for plotting
+#       plot_data<-ROC_table_valid
+#       plot_data$displayItem<-paste(plot_data$layer, " AUC = ", plot_data$AUC, sep="")
+#       plot_data$displayItem<-factor(plot_data$displayItem,
+#                                     levels = unique(plot_data$displayItem))
+#       
+#       # ROC curves
+#       p2<-ggplot(plot_data,
+#                  aes(x=specificity,
+#                      y=sensitivity,
+#                      group=displayItem)) + 
+#         geom_line(aes(x=specificity,y=sensitivity,color=displayItem)) +
+#         #ggtitle(paste('Test data: ', method, sep=''))+
+#         theme(legend.position="bottom", 
+#               legend.background=element_blank(),
+#               legend.box.background=element_rect(colour="black")) + 
+#         theme_bw() +
+#         xlab("False Positive Rate") +
+#         ylab("True Positive Rate") +
+#         theme(legend.position = "right", legend.direction = "vertical") +
+#         labs(color='') 
+#       
+#       p<-plot_grid(p1, 
+#                    p2, 
+#                    ifelse(rowwise,nrow = 2,ncol=2), 
+#                    labels = c(paste('A. ', fit$folds,'-fold CV',sep = ''), 
+#                               'B. Independent Validation'),
+#                    label_size = label_size, label_x = label_x,vjust = vjust)+
+#         theme(plot.margin = unit(c(1,1,1,1), "cm"))  
+#       print(p)
+#       return(list('plot'=p,'ROC_table'=ROC_table,'ROC_table_valid'=ROC_table_valid))
+#     }
+#     p <- plot_grid(p1, 
+#                    nrow = 1, 
+#                    labels = c(paste('A. ', fit$folds,'-fold CV',sep = '')), 
+#                    label_size = label_size, label_x = label_x,vjust = vjust)+
+#       theme(plot.margin = unit(c(1,1,1,1), "cm"))
+#     print(p)
+#     return(list('plot'=p,'ROC_table'=ROC_table)) 
+#   }
+#   else if(fit$family=='gaussian'){
+#     
+#     
+#     # Extract R2 plot data 
+#     list.R2<-vector("list", length = ncol(fit$yhat.train))
+#     names(list.R2)<-colnames(fit$yhat.train)
+#     
+#     y <- fit$Y_train
+#     # Loop over layers 
+#     for(k in 1:length(list.R2)){
+#       preds<-fit$yhat.train[ ,k]
+#       R2<- as.vector(cor(preds, y)^2)
+#       list.R2[[k]] <- data.frame(R2 = R2,
+#                                  layer = names(list.R2)[k],
+#                                  method = method)
+#     }
+#     
+#     # Combine 
+#     R2_table<-do.call('rbind', list.R2)
+#     
+#     # Plot
+#     p1<-ggplot(R2_table, aes(x = method, y = R2)) +
+#       geom_bar(position="dodge", stat="identity", aes(fill=layer)) +
+#       xlab("") + 
+#       ylab(expression(paste("Prediction accuracy (", R^2, ")"))) +
+#       scale_fill_discrete(name="") + 
+#       theme(legend.position="bottom", 
+#             legend.background=element_blank(),
+#             legend.box.background=element_rect(colour="black")) + 
+#       theme_bw() +
+#       guides(fill=guide_legend(title="")) +
+#       theme(legend.position = "right", legend.direction = "vertical",
+#             strip.background = element_blank()) +
+#       labs(fill='') 
+#     
+#     
+#     
+#     if(fit$test==TRUE){
+#       
+#       
+#       # Extract R2 plot data 
+#       list.R2.valid<-vector("list", length = ncol(fit$yhat.test))
+#       names(list.R2.valid)<-colnames(fit$yhat.test)
+#       
+#       y <- fit$Y_test
+#       # Loop over layers 
+#       for(k in 1:length(list.R2.valid)){
+#         preds<-fit$yhat.test[ ,k]
+#         R2<- as.vector(cor(preds, y)^2)
+#         list.R2.valid[[k]] <- data.frame(R2 = R2,
+#                                          layer = names(list.R2.valid)[k],
+#                                          method = method)
+#       }
+#       
+#       # Combine 
+#       R2_table_valid<-do.call('rbind', list.R2.valid)
+#       
+#       # Plot
+#       p2<-ggplot(R2_table_valid, aes(x = method, y = R2)) +
+#         geom_bar(position="dodge", stat="identity", aes(fill=layer)) +
+#         xlab("") + 
+#         ylab(expression(paste("Prediction accuracy (", R^2, ")"))) +
+#         scale_fill_discrete(name="") + 
+#         theme(legend.position="bottom", 
+#               legend.background=element_blank(),
+#               legend.box.background=element_rect(colour="black")) + 
+#         theme_bw() +
+#         guides(fill=guide_legend(title="")) +
+#         theme(legend.position = "right", legend.direction = "vertical",
+#               strip.background = element_blank()) +
+#         labs(fill='') 
+#       
+#       p<-plot_grid(p1, 
+#                    p2, 
+#                    ifelse(rowwise,nrow = 2,ncol=2), 
+#                    labels = c(paste('A. ', fit$folds,'-fold CV',sep = ''), 
+#                               'B. Independent Validation'),
+#                    label_size = label_size, label_x = label_x,vjust = vjust)+
+#         theme(plot.margin = unit(c(1,1,1,1), "cm"))  
+#       print(p)
+#       return(list('plot'=p,'R2_table'=R2_table,'R2_table_valid'=R2_table_valid))
+#       
+#     }
+#     p <- plot_grid(p1, 
+#                    ncol = 1, 
+#                    labels = c(paste('A. ', fit$folds,'-fold CV',sep = '')), 
+#                    label_size = label_size, label_x = label_x,vjust = vjust)+
+#       theme(plot.margin = unit(c(1,1,1,1), "cm"))
+#     print(p)
+#     return(list('plot'=p,'R2_table'=R2_table)) 
+#     
+#   }
+# }
+
+# Borrowed from mia package
+.require_package <- function(pkg){
+  if(!requireNamespace(pkg, quietly = TRUE)){
+    stop("'",pkg,"' package not found. Please install the '", pkg,
+         "' package to use this function.", call. = FALSE)
+  }
+  return(NULL)
+}