[3172cb]: / man / IntegratedLearner.Rd

Download this file

94 lines (77 with data), 4.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/IntegratedLearner.R
\name{IntegratedLearner}
\alias{IntegratedLearner}
\title{Integrated machine learning for multi-omics prediction and classification}
\usage{
IntegratedLearner(
feature_table,
sample_metadata,
feature_metadata,
feature_table_valid = NULL,
sample_metadata_valid = NULL,
folds = 5,
seed = 1234,
base_learner = "SL.BART",
base_screener = "All",
meta_learner = "SL.nnls.auc",
run_concat = TRUE,
run_stacked = TRUE,
verbose = FALSE,
print_learner = TRUE,
refit.stack = FALSE,
family = gaussian(),
...
)
}
\arguments{
\item{feature_table}{An R data frame containing multiview features (in rows) and samples (in columns).
Column names of \code{feature_metadata} must match the row names of \code{sample_metadata}.}
\item{sample_metadata}{An R data frame of metadata variables (in columns).
Must have a column named \code{subjectID} describing per-subject unique identifiers.
For longitudinal designs, this variable is expected to have non-unique values.
Additionally, a column named \code{Y} must be present which is the outcome of interest (can be binary or continuous).
Row names of \code{sample_metadata} must match the column names of \code{feature_table}.}
\item{feature_metadata}{An R data frame of feature-specific metadata across views (in columns) and features (in rows).
Must have a column named \code{featureID} describing per-feature unique identifiers.
Additionally, a column named \code{featureType} should describe the corresponding source layers.
Row names of \code{feature_metadata} must match the row names of \code{feature_table}.}
\item{feature_table_valid}{Feature table from validation set for which prediction is desired.
Must have the exact same structure as \code{feature_table}. If missing, uses \code{feature_table} for \code{feature_table_valid}.}
\item{sample_metadata_valid}{Sample-specific metadata table from independent validation set when available.
Must have the exact same structure as \code{sample_metadata}.}
\item{folds}{How many folds in the V-fold nested cross-validation? Default is 10.}
\item{seed}{Specify the arbitrary seed value for reproducibility. Default is 1234.}
\item{base_learner}{Base learner for late fusion and early fusion.
Check out the \href{https://cran.r-project.org/web/packages/SuperLearner/vignettes/Guide-to-SuperLearner.html}{SuperLearner user manual} for all available options. Default is \code{`SL.BART`}.}
\item{base_screener}{Whether to screen variables before fitting base models? \code{All} means no screening which is the default.
Check out the \href{https://cran.r-project.org/web/packages/SuperLearner/vignettes/Guide-to-SuperLearner.html}{SuperLearner user manual} for all available options.}
\item{meta_learner}{Meta-learner for late fusion (stacked generalization). Defaults to \code{`SL.nnls.auc`}.
Check out the \href{https://cran.r-project.org/web/packages/SuperLearner/vignettes/Guide-to-SuperLearner.html}{SuperLearner user manual} for all available options.}
\item{run_concat}{Should early fusion be run? Default is TRUE. Uses the specified \code{base_learner} as the learning algorithm.}
\item{run_stacked}{Should stacked model (late fusion) be run? Default is TRUE.}
\item{verbose}{logical; TRUE for \code{SuperLearner} printing progress (helpful for debugging). Default is FALSE.}
\item{print_learner}{logical; Should a detailed summary be printed? Default is TRUE.}
\item{refit.stack}{logical; For late fusion, post-refit predictions on the entire data is returned if specified. Default is FALSE.}
\item{family}{Currently allows \code{`gaussian()`} for continuous or \code{`binomial()`} for binary outcomes.}
\item{...}{Additional arguments. Not used currently.}
}
\value{
A \code{SuperLearner} object containing the trained model fits.
}
\description{
Performs integrated machine learning to predict a binary or continuous outcome based on two or more omics layers (views).
The \code{IntegratedLearner} function takes a training set (Y, X1, X2,...,Xn) and returns the predicted values based on a validation set.
It also performs V-fold nested cross-validation to estimate the prediction accuracy of various fusion algorithms.
Three types of integration paradigms are supported: early, late, and intermediate.
The software includes multiple ML models based on the \code{\link[SuperLearner]{SuperLearner}} R package as well as several data exploration capabilities and visualization modules in a unified estimation framework.
}
\author{
Himel Mallick, \email{him4004@med.cornell.edu}
}
\keyword{metagenomics,}
\keyword{microbiome,}
\keyword{multiomics,}
\keyword{scRNASeq,}
\keyword{singlecell}
\keyword{tweedie,}