#' Extract sPLS-DA feature selection stability
#'
#' @description
#' Extract feature selection stabilities for a given component from a
#' performance validated mixOmics sPLS-DA model Feature stabilities are an
#' important indicator of the confidence that a selected feature is predictive
#' for the outcome classes in the model, and hence (in combination with the
#' loading weight) is likely to be of biological significance.
#'
#' @param splsda.model Trained mixOmics sPLS-DA model.
#' @param splsda.perf Performance evaluation of the sPLS-DA model generated by
#' mixOmics perf.
#' @param comp Component number of which to retrieve feature selection
#' stabilities.
#'
#' @return A data frame containing the features selected for the specificed
#' component and their relative stability, as a proportion of trained models
#' 0-1.
#' @export
#'
#' @examples
#' \dontrun{
#' feature.selection.stability(splsda.analysis, perf.splsda.analysis, comp=1)
#' }
feature.selection.stability <- function(splsda.model, splsda.perf, comp) {
ind.match = match(selectVar(splsda.model, comp = comp)$name,
names(splsda.perf$features$stable[[comp]]))
freq = as.numeric(splsda.perf$features$stable[[comp]][ind.match])
stability <- data.frame(selectVar(splsda.model, comp = comp)$value, freq)
colnames(stability) <- c("value", "stability")
return(stability)
}
#' Get an sPLS-DA loadings table
#'
#' @description
#' Get a dataframe indicating the loading factors for a trained mixOmics sPLS-DA
#' model. Limited to two components.
#'
#' @param splsda.analysis Trained mixOmics sPLS-DA model
#'
#' @return Data frame including ordered loadings over the first two components
#' of the model. Selected features are ordered by component one followed by
#' component two, and then from highest to lowest (in absolute terms).
#' @export
#'
#' @examples
#' \dontrun{
#' get.loadings.table(splsda.analysis)
#' }
get.loadings.table <- function(splsda.analysis) {
rna.loadings <- splsda.analysis$loadings$X
rna.loadings <- rna.loadings[rowSums(rna.loadings) != 0,]
rna.loadings[rna.loadings == 0] <- NA
rna.loadings <- as.data.frame(rna.loadings)
# Sort by components columns in reverse order
for (column.id in rev(colnames(rna.loadings))) {
rna.loadings <-
rna.loadings[order(abs(rna.loadings[, column.id]), decreasing = TRUE), ,
drop = FALSE]
}
return(rna.loadings)
}
#' Extract DIABLO feature selection stability
#'
#' @description
#' Extract feature selection stabilities for a given component from a
#' performance validated mixOmics multi-omics DIABLO model. Feature stabilities
#' are an important indicator of the confidence that a selected feature is
#' predictive for the outcome classes in the model, and hence (in combination
#' with the loading weight) is likely to be of biological significance. Features
#' are retrieved for a single specified block and component.
#'
#' @param diablo.perf Performance evaluation of the DIABLO model generated by
#' mixOmics perf.
#' @param comp Component number for which to retrieve feature selection
#' stabilities.
#' @param block Block number for which to retrieve feature selection
#' stabilities.
#'
#' @return A data frame containing the the features selected for the specificed
#' component and their relative stability, as a proportion of trained models
#' 0-1.
#' @export
#'
#' @examples
#' \dontrun{
#' diablo.selection.stability(perf.diablo.analysis, comp=1, block=1)
#' }
diablo.selection.stability <- function(diablo.perf, comp, block) {
stable <- diablo.perf$features$stable
stable.wms <- lapply(stable, "[[", block)
stable.wms <- lapply(stable.wms, "[[", comp)
melted <- reshape2::melt(stable.wms)
stability.mean <- melted %>%
group_by(Var1) %>%
summarise(stability = mean(value)) %>%
arrange(desc(stability)) %>%
dplyr::filter(!is.na(Var1)) %>%
as.data.frame()
colnames(stability.mean) <- c('feature', 'stability')
rownames(stability.mean) <- stability.mean$feature
return(stability.mean)
}
#' Get a DIABLO loadings table
#'
#' @description
#' Get a dataframe indicating the top ranking loading factors for a trained
#' mixOmics multi-omics DIABLO model for a provided block from the model.
#'
#' @param diablo.loadings Loadings from the trained model.
#' @param feature.count The number of top loadings from each component to
#' include.
#'
#' @return A data frame containing the top (by absolute magnitude) factor
#' loadings for the first, then second component, ordered by absolute weight.
#' @export
#'
#' @examples
#' \dontrun{
#' get.diablo.top.loadings(diablo.tuned.analysis$loadings$metab)
#' }
get.diablo.top.loadings <- function(diablo.loadings, feature.count = 20) {
rna.loadings <- diablo.loadings
rna.loadings <- rna.loadings[rowSums(rna.loadings) != 0,]
rna.loadings[rna.loadings == 0] <- NA
rna.loadings <- as.data.frame(rna.loadings)
# Construct new data frame with each component's top features included
loadings <- data.frame()
for (col.index in 1:ncol(rna.loadings)) {
rna.loadings <-
rna.loadings[order(abs(rna.loadings[, col.index]), decreasing = TRUE), ,
drop = FALSE]
col.name <- colnames(rna.loadings)[col.index]
rows.to.copy <- min(feature.count, nrow(rna.loadings))
for (row.index in 1:rows.to.copy) {
row.name <- rownames(rna.loadings)[row.index]
loadings[row.name, col.name] <- rna.loadings[row.name, col.name]
}
}
# Sort the columns, going from right to left
for (column.id in rev(colnames(loadings))) {
loadings <-
loadings[order(abs(loadings[, column.id]), decreasing = TRUE), ,
drop = FALSE]
}
return(loadings)
}
#' Get top loadings for a single component DIABLO model
#'
#' @description
#' Get a dataframe indicating the top ranking loading factors for a trained
#' mixOmics multi-omics DIABLO model for a provided block from the model.
#'
#' @param diablo.loadings DIABLO loadings for a single component model.
#' @param feature.count The number of top loadings from each component to
#' include.
#'
#' @return A data frame containing the top (by absolute magnitude) factor
#' loadings for the single component, ordered by absolute weight.
#' @export
#'
#' @examples
#' \dontrun{
#' get.diablo.top.loadings.1comp(diablo.tuned.analysis$loadings$metab)
#' }
get.diablo.top.loadings.1comp <- function(diablo.loadings, feature.count = 20) {
return(get.diablo.top.loadings(diablo.loadings, feature.count))
}
#' Get DIABLO top loadings and stabilities
#'
#' @description
#' Get a dataframe showing the top ranked loading factors for a trained mixOmics
#' multi-omics DIABLO model along with selection stability values. Where a
#' feature appears in more than one component, the selection stability value
#' shown is for the lowest numbered component.
#'
#' @param trained.model A multiomics DIABLO model.
#' @param perf.result The results from a perf test on the above model.
#' @param block The name or index of the block to get the data for.
#' @param feature.count The number of top loadings from each component to
#' include.
#'
#' @return A dataframe containing the top (by absolute magnitude) factor
#' loadings for each component and selection stability values for those
#' features.
#' @export
#'
#' @examples
#' \dontrun{
#' get.diablo.top.loadings.with.stability(diablo.model, diablo.perf.result, 'species', 100)
#' }
get.diablo.top.loadings.with.stability <- function(trained.model, perf.result,
block, feature.count = 20) {
loadings <- get.diablo.top.loadings(trained.model$loadings[[block]],
feature.count = feature.count)
# Bind on stability values in reverse component order
for (comp in rev(colnames(loadings))) {
selection.stability <-
diablo.selection.stability(perf.result, comp = comp, block = block)
component.features <-
rownames(loadings[!is.na(loadings[comp]), comp, drop = FALSE])
loadings[component.features, "stability"] <-
selection.stability[component.features, "stability"]
}
# Remove any features with only NA values
loadings <- loadings[apply(loadings, 1, function(x) { !all(is.na(x)) }) ,]
return(loadings)
}
#' Merge DIABLO stability onto a data frame of features
#'
#' @description
#' Utility function to combine top loading factors extracted from a trained
#' multi-omics mixOmics (DIABLO) model with the feature stability, previously
#' retrieved from the performance evaluation using e.g.
#' diablo.selection.stability and create a nicely formatted data frame.
#'
#' @param features Data frame of top features, e.g. provided by
#' get.diablo.top.loadings().
#' @param stability Data frame of feature stability, e.g. provided by
#' diablo.selection.stability().
#'
#' @return Data frame combining top features with their measured stability.
#' @export merge.feature.stability
#'
#' @examples
#' \dontrun{
#' merge.feature.stability(get.diablo.top.loadings(diablo.tuned.analysis$loadings$metab), diablo.selection.stability(perf.diablo.analysis, comp=1, block=1))
#' }
merge.feature.stability <- function(features, stability) {
merged <- merge(x = features, y = stability[, "stability", drop = FALSE],
by.x = "row.names", by.y = "row.names", all.x = TRUE)
for (column.id in rev(colnames(features))) {
merged <- merged[order(abs(merged[,column.id]), decreasing = TRUE),]
}
merged <- as.data.frame(merged)
colnames(merged)[1] <- "feature"
return(merged)
}
#' Merge stability onto a data frame of features in a single component DIABLO
#' model
#'
#' @description
#' Utility function to combine top loading factors extracted from a trained
#' multi-omics mixOmics (DIABLO) model with the feature stability, previously
#' retrieved from the performance evaluation using e.g.
#' diablo.selection.stability and create a nicely formatted data frame.
#' Specialised for DIABLO models with a single component, which otherwise
#' behaves differently in R's data structures.
#'
#' @param features Data frame of top features, e.g. provided by
#' get.diablo.top.loadings.1comp().
#' @param stability Data frame of feature stability, e.g. provided by
#' diablo.selection.stability().
#'
#' @return Data frame combining top features with their measured stability.
#' @export merge.feature.stability.1comp
#'
#' @examples
#' \dontrun{
#' merge.feature.stability.1comp(get.diablo.top.loadings.1comp(diablo.tuned.analysis$loadings$metab), diablo.selection.stability(perf.diablo.analysis, comp=1, block=1))
#' }
merge.feature.stability.1comp <- function(features, stability) {
return(
merge.feature.stability(features, stability)
)
}
#' Plot feature stability
#'
#' @description
#' Plot feature stability for a summary overview.
#'
#' @param selection.stability Stability data for features selected for a
#' component.
#'
#' @return Plotted stability data.
#' @export plot.feature.stability
#'
#' @examples
#' \dontrun{
#' plot.feature.stability(diablo.selection.stability(perf.diablo.analysis, comp=1, block=1))
#' }
plot.feature.stability <- function(selection.stability) {
return(
plot(selection.stability, type = 'h', ylab = 'Stability', xlab = 'Features',
main = 'Selection stability')
)
}
#' Export a covariance matrix as a network CSV file
#'
#' @description
#' Export a covariance matrix, e.g. one generated using
#' `find.feature.associations` as data describing the association network and
#' suitable for e.g. loading into Cytoscape. The method can optionally take a
#' cutoff value, which is used to threshold (absolute) association scores. The
#' output is written to disk as a CSV file using the filename given.
#'
#' @param m Symmetrical matrix of association scores.
#' @param filename Filename to which to write network output.
#' @param cutoff Optional value of which to filter associations.
#' @param block.association If the matrix does not contain equal number of
#' variables from each block, then a custom list of block associations can be
#' passed in.
#' @param block.feature.count The number of features selected from each block,
#' defaulting to 20.
#'
#' @return The long-form network data that is also written to disk
#' @export
#'
#' @examples
#' \dontrun{
#' export.matrix.as.network(mat, "network.csv")
#' export.matrix.as.network(find.feature.associations(diablo.trained.analysis, 3), filename="network.csv", cutoff=0.7)
#' }
export.matrix.as.network <- function(m, filename, cutoff=NA,
block.association=NULL,
block.feature.count=20) {
block.labels <- as.data.frame(rownames(m))
if (is.null(block.association)) {
num.blocks <- length(rownames(m)) / block.feature.count
block.association <- rep(1:num.blocks, each=block.feature.count)
}
block.labels$block <- block.association
colnames(block.labels) <- c("feature", "block")
melted <- reshape2::melt(replace(m, lower.tri(m, TRUE), NA), na.rm = TRUE)
melted <- merge(x = melted, y = block.labels,
by.x = "Var1", by.y = "feature", all.x = TRUE)
colnames(melted) <- c("feature.1", "feature.2", "value", "block")
if (!is.na(cutoff)) {
melted <- melted[abs(melted$value) >= cutoff,]
}
write.csv(melted, filename, row.names=FALSE, quote=FALSE)
return(melted)
}
#' Get model variance
#'
#' @description
#' Get percent of variance explained by each component for a tuned model. Works
#' with sPLS-DA models or DIABLO models as input, but the block parameter must
#' be specified for DIABLO models.
#'
#' @param tuned.model Either an sPLS-DA model from singleomics or a DIABLO model
#' from multiomics.
#' @param block Where the model type is DIABLO, the block to get model variance
#' for.
#'
#' @return A data frame with the model variance for the specified block.
#' @export
#'
#' @examples
#' \dontrun{
#' get.model.variance(splsda.model)
#' get.model.variance(diablo.model, block = 'transcriptomics')
#' }
get.model.variance <- function(tuned.model, block = "X"){
model.var <- cor(as.numeric(as.factor(tuned.model$Y)),
tuned.model$variates[[block]][, 1:tuned.model$ncomp[block]])
model.var <- apply(model.var^2, 2, sum)
model.var <- cbind(model.var, cumsum(model.var))
colnames(model.var) <- c("Proportion", "Cumulative")
return(model.var)
}
#' Center truncate long strings
#'
#' @description
#' Truncate a string to no longer than 43 characters. Where a string is longer
#' than 43 characters, the first and last 20 characters sandwich three periods.
#' This can be useful for shortening long feature names before plotting.
#'
#' @param x A string to truncate.
#'
#' @return A string where each has been reduced to 43 characters or left as is.
#' @export
#'
#' @examples
#' \dontrun{
#' center.truncate('A string to truncate if long enough')
#' }
center.truncate <- function(x) {
string.length <- nchar(x)
if (string.length < 43) {
return(x)
} else {
return(paste0(substr(x, 1, 20),
"...",
substr(x, string.length - 20, string.length)))
}
}