Switch to unified view

a b/R/IntegratedLearnerFromMAE.R
1
#' MultiAssayExperiment wrapper for IntegratedLearner
2
#' 
3
#' This function offers a simple interface to the \code{IntegratedLearner()}
4
#' from a \code{\link{MultiAssayExperiment}} object. Instead of wrangling the
5
#' data, the function does it for the user and feeds it to
6
#' \code{IntegratedLearner()}.
7
#'
8
#' @param mae \code{\link{MultiAssayExperiment}}
9
#' 
10
#' @param experiment \code{Character vector} or \code{Integer vector}. Specifies
11
#' experiments from \code{mae}.
12
#' 
13
#' @param assay.type \code{Character vector}. Specified the name of assay for
14
#' each experiment. The length must be equal to \code{experiment}.
15
#' 
16
#' @param outcome.col \code{Character scalar}. Specifies the name of column
17
#' from \code{colData} that includes the outcome variable.
18
#' 
19
#' @param valid.col \code{Character scalar}. Specifies the name of column
20
#' from \code{colData} that includes the information on which sample belongs to
21
#' validation set. The variable must be \code{logical}. (Default: \code{NULL})
22
#' 
23
#' @param ... additional arguments passed to \code{IntegratedLearner()}
24
#' \itemize{
25
#'   \item \code{na.rm}: \code{Logical scalar}. Should features with missing
26
#'   values be dropped? (Default: \code{FALSE})
27
#' }
28
#' 
29
#' @return A \code{SuperLearner} object containing the trained model fits.
30
#' 
31
#' @export
32
33
IntegratedLearnerFromMAE <- function(
34
        mae, experiment, assay.type, outcome.col, valid.col = NULL, ...){
35
    ############################### INPUT CHECK ################################
36
    # MultiAssayExperiment and mia are required to run this function. However,
37
    # they are not direct dependency of the package; they are only needed when
38
    # this function is run. tidyr is needed for tidyr::pivot_wider
39
    .require_package("MultiAssayExperiment")
40
    .require_package("mia")
41
    .require_package("tidyr")
42
    # The object must be MAE
43
    if( !is(mae, "MultiAssayExperiment") ){
44
        stop("'mae' must be MultiAssayExperiment.", call. = FALSE)
45
    }
46
    # Experiments inside MAR must be SE objects
47
    if( !all(vapply(MultiAssayExperiment::experiments(mae), function(x)
48
        is(x, "SummarizedExperiment"), FUN.VALUE = logical(1))) ){
49
        stop("Experiments in 'mae' must be SummarizedExperiment objects.",
50
             call. = FALSE)
51
    }
52
    # experiment must be either integers specifying the index of experiments or
53
    # characters specifying the names of experiments
54
    is_index <- mia:::.is_integer(experiment) &&
55
        all(experiment <= length(experiments(mae)))
56
    is_character <- is.character(experiment) && all(experiment %in% names(mae))
57
    if( !(is_index || is_character) ){
58
        stop("'experiment' must be a vector specifying either names or ",
59
             "indices of experiments.", call. = FALSE)
60
    }
61
    # assay.type must be a vector of character values. At this point, we do not
62
    # check if the assays can be found. Moreover, there must be as many
63
    # assay.types as there are experiments.
64
    if( !(is.character(assay.type) &&
65
          length(assay.type) == length(experiment)) ){
66
        stop("'assay.type' must be a character vector specifying assay for ",
67
             "each experiment.", call. = FALSE)
68
    }
69
    # outcome.col and valid.col must be character values but we dot check
70
    # here whether they can be found since they can be inside experiments.
71
    if( !mia:::.is_a_string(outcome.col) ){
72
        stop("'outcome.col' must be a single character value.")
73
    }
74
    if( !(is.null(valid.col) || mia:::.is_a_string(valid.col)) ){
75
        stop("'valid.col' must be a single character value.")
76
    }
77
    ############################# INPUT CHECK END ##############################
78
    # Subset the MAE
79
    mae <- mae[ , , experiment]
80
    # Get data from experiments as a single large table
81
    data <- .get_data_from_MAE(
82
        mae, experiment, assay.type, outcome.col, valid.col)
83
    # Divide data into feature table, and metadata
84
    data <- .wrangle_data(data, outcome.col, valid.col, ...)
85
    # Fit the model
86
    fit <- do.call(IntegratedLearner, data)
87
    return(fit)
88
}
89
90
################################ HELP FUNCTIONS ################################
91
92
# This function fetches data from single experiments and create a single table.
93
.get_data_from_MAE <- function(
94
        mae, experiment, assay.type, outcome.col, valid.col){
95
    # Loop through experiments
96
    feature_table <- lapply(seq_len(length(experiment)), function(i){
97
        # Get experiment and assay name for this experiment
98
        exp <- experiment[[i]]
99
        assay_name <- assay.type[[i]]
100
        # Get SE object
101
        tse <- MultiAssayExperiment::getWithColData(mae, exp, verbose = FALSE)
102
        # Check if the assay name is correct
103
        mia:::.check_assay_present(assay_name, tse)
104
        # Outcome column must be found from colData
105
        if( !(mia:::.is_a_string(outcome.col) &&
106
              outcome.col %in% colnames(colData(tse))) ){
107
            stop("'outcome.col' must specify a column from colData.",
108
                 call. = FALSE)
109
        }
110
        if( !(is.null(valid.col) || (mia:::.is_a_string(valid.col) &&
111
                                     valid.col %in% colnames(colData(tse)))) ){
112
            stop("'valid.col' must be NULL or specify a column from colData.",
113
                 call. = FALSE)
114
        }
115
        # Also check that the values in validation set are correct
116
        if( !is.null(valid.col) && !is.logical(tse[[valid.col]]) ){
117
            stop("'valid.col' must specify a column that has boolean values.",
118
                 call. = FALSE)
119
        }
120
        # Melt data to single table
121
        df <- mia::meltSE(
122
            tse, assay.type = assay_name,
123
            row.name = "featureID", col.name = "subjectID",
124
            add.col = c(outcome.col, valid.col))
125
        # Replace the assay name with general name
126
        colnames(df)[ colnames(df) == assay_name ] <- "value"
127
        # Add omic type
128
        df[["featureType"]] <- as.character(exp)
129
        return(df)
130
    })
131
    # Combine data
132
    feature_table <- do.call(rbind, feature_table)
133
    # Convert outcome to numeric
134
    feature_table[[outcome.col]] <- as.numeric(
135
        as.factor(feature_table[[outcome.col]]))
136
    return(feature_table)
137
}
138
139
# This function gets single table as input and outputs an arguments ready for
140
# IntegratedLearner()
141
.wrangle_data <- function(
142
        feature_table, outcome.col, valid.col, na.rm = FALSE, ...){
143
    #
144
    if( !mia:::.is_a_bool(na.rm) ){
145
        stop("'na.rm' must be TRUE or FALSE.", call. = FALSE)
146
    }
147
    #
148
    # Divide data into metadata and feature table
149
    sample_metadata <- feature_table[
150
        , c("subjectID", outcome.col, valid.col), drop = FALSE]
151
    sample_metadata <- sample_metadata[ !duplicated(sample_metadata), ]
152
    feature_metadata <- feature_table[
153
        , c("featureID", "featureType"), drop = FALSE]
154
    feature_metadata <- feature_metadata[ !duplicated(feature_metadata), ]
155
    feature_table <- feature_table[
156
        , c("featureID", "subjectID", "value"), drop = FALSE]
157
    
158
    # Rename outcome column to Y which is require by IntegratedLearner
159
    colnames(sample_metadata)[ colnames(sample_metadata) == outcome.col ] <- "Y"
160
    # Convert feature table into wide format where rows represent features
161
    # and column samples
162
    feature_table <- tidyr::pivot_wider(
163
        feature_table, names_from = "subjectID", values_from = "value")
164
    
165
    # Convert datasets into data.frame and add rownames
166
    sample_metadata <- sample_metadata |> as.data.frame()
167
    rownames(sample_metadata) <- sample_metadata[["subjectID"]]
168
    feature_metadata <- feature_metadata |> as.data.frame()
169
    rownames(feature_metadata) <- feature_metadata[["featureID"]]
170
    feature_table <- feature_table |> as.data.frame()
171
    rownames(feature_table) <- feature_table[["featureID"]]
172
    # Feature table must have only numeric values
173
    feature_table[["featureID"]] <- NULL
174
    
175
    # Check if there are missing values. Drop them if specified. SuperLearner
176
    # does not support missing values.
177
    if( any(is.na(feature_table)) && na.rm ){
178
        keep <- complete.cases(feature_table)
179
        feature_table <- feature_table[keep, ]
180
        feature_metadata <- feature_metadata[keep, ]
181
        # Give error or warning depending on if there are still features left
182
        # after removing those that are not complete.
183
        FUN <- if( sum(keep) == 0 ) stop else warning
184
        FUN(sum(!keep), "/", length(keep),
185
            " features dropped due to missing data.", call. = FALSE)
186
    }
187
    
188
    # If user specified validation set, divide data into training and validation
189
    # datasets
190
    if( !is.null(valid.col) ){
191
        # Divide feature table
192
        feature_table_valid <- feature_table[
193
            , sample_metadata[[valid.col]], drop = FALSE]
194
        feature_table <- feature_table[
195
            , !sample_metadata[[valid.col]], drop = FALSE]
196
        # Divide sample metadata
197
        sample_metadata_valid <- sample_metadata[sample_metadata[[valid.col]], ]
198
        sample_metadata <- sample_metadata[!sample_metadata[[valid.col]], ]
199
        # Create an argument list
200
        args <- list(
201
            feature_table = feature_table,
202
            sample_metadata = sample_metadata,
203
            feature_metadata = feature_metadata,
204
            feature_table_valid = feature_table_valid,
205
            sample_metadata_valid = sample_metadata_valid
206
        )
207
    } else{
208
        # Create an argument list
209
        args <- list(
210
            feature_table = feature_table,
211
            sample_metadata = sample_metadata,
212
            feature_metadata = feature_metadata
213
        )
214
    }
215
    # Add those arguments that are passed with "..."
216
    args <- c(args, list(...))
217
    return(args)
218
}