#' A null device for running plot functions without outputting graphics.
.nulldev <- function(file = nullfile(), ...) {
type <- getOption("R.devices.nulldev", "pdf")
do.call(type, args = list(nullfile()))
}
#' Get block centroids for a DIABLO model
#'
#' @description
#' Function to get the centroids of samples transformed into the same model
#' space across all blocks for a trained multi-omics mixOmics (DIABLO) model.
#' Prepares plots and raw data of consensus sample classification, which (for
#' some reason) isn't provided in the base package.
#'
#' @param diablo.trained Trained multi-omics mixOmics DIABLO model.
#'
#' @return Simple object containing consensus data (as `consensus.summary`) and
#' a ggplot plot of the distribution in the first two components (as `plot`).
#' @export
#'
#' @examples
#' \dontrun{
#' consensus.centroids <- get.block.centroids(diablo.trained.analysis, 20, 3)
#' print(consensus.centroids$plot)
#' }
get.block.centroids <- function(diablo.trained) {
# Establish dimensionality
block.count <- length(diablo.trained$X)
sample.count <- length(diablo.trained$Y)
comp.count <- diablo.trained$ncomp[1]
# Perform the same analysis as the arrow plot -- sending graphics output to
# the null device
.nulldev()
# Set up the initial columns
arrow <- mixOmics::plotArrow(diablo.trained)
consensus <- data.frame(arrow$Block, arrow$group)
consensus$sample <- rep(1:sample.count, block.count + 1)
# Add component columns 1 at a time using arrow plots
comp.col.names <- vector()
for (comp in 1:comp.count) {
comp.col.name <- sprintf("comp%i", comp)
comp.col.names[comp] <- comp.col.name
arrow <- mixOmics::plotArrow(diablo.trained, comp = c(1, comp))
consensus[,comp.col.name] <- arrow$y
}
# Remove the outcome block
consensus <- subset(consensus, !(arrow.Block == "Block: Y"))
# Perform a group by on each sample, finding the centroid of each
consensus.summary <- consensus %>%
dplyr::group_by(sample, arrow.group) %>%
dplyr::summarise_at(comp.col.names, mean)
colnames(consensus.summary)[2] <- "label"
dev.off()
# Plot the consensus plot
p <- ggplot2::ggplot(consensus.summary,
ggplot2::aes(comp1, comp2, color = label))
p <- p + ggplot2::geom_point() + ggplot2::stat_ellipse()
# Return the consensus values
return(list("consensus" = consensus.summary, "plot" = p))
}
#' Plot projections for a prediction result
#'
#' @description
#' Function to plot a single-omics sPLS-DA prediction projected into the model
#' space. While confusion matrices of predicted data can easily be obtained,
#' there is no built-in function to plot the projection into the model space and
#' hence give a visualisation of the quality of the prediction. The plot will
#' show the centroids of the classes as large points, surrounded by the sample
#' projection as small points, coloured according to class. A good prediction
#' will cluster each class around the appropriate centroid.
#'
#' @param prediction mixOmics sPLS-DA prediction object, generated by the
#' `predict` method.
#' @param classes.new Factor indicating the classes of the new data.
#'
#' @return ggplot plot of the predicted data in the sPLS-DA model space.
#' @export plot.predicted.projection
#'
#' @examples
#' \dontrun{
#' prediction.projection <- plot.predicted.projection(prediction.replicate.data, replicate.data.classes)
#' print(prediction.projection)
#' }
plot.predicted.projection <- function(prediction, classes.new) {
variates <- as.data.frame(prediction$variates[,1:2])
variates$label <- classes.new
colnames(variates) <- c("Component1", "Component2", "label")
p <- ggplot2::ggplot(variates, ggplot2::aes(x=Component1, y=Component2)) +
ggplot2::geom_point(ggplot2::aes(color=label)) +
ggplot2::theme_bw()
classes.count <- length(levels(classes.new))
pal <- scales::hue_pal()(classes.count)
for (i in seq(1, classes.count)) {
p <- p + annotate("point", x = prediction$centroids[i,1],
y = prediction$centroids[i,2], color = pal[[i]], cex = 6)
}
return(p)
}
#' Extract feature associations from a multi-omics DIABLO model
#'
#' @description
#' Extract feature vs. feature association (mutual information) data from a
#' multi-omics mixOmics (DIABLO) model. This is the
#' same data used to create the circos and network plots, but includes all
#' interactions for the top features according to either blockrank or loading scores.
#'
#' @param diablo.model Trained mixOmics multi-omics (DIABLO) model.
#' @param feature_number Number of top features to select (default set to 20). If using loading scores,
#' top features will be evenly divided across the number of blocks.
#' @param score_type Type of score to select top features by. Accepted options: "blockrank" or "loading"
#' (default set to "blockrank")
#'
#' @return Matrix including the associations of top selected features in DIABLO model according
#' to either blockrank scores or loading values
#'
#' @export
#'
#' @examples
#' \dontrun{
#' find.feature.associations(diablo.model, feature_number=50, score_type="blockrank")
#' }
find.feature.associations <- function(diablo.model, feature_number=20, score_type="blockrank") {
#Extract the covariance matrix from the circosPlot function. Disable
#graphical output to avoid it being overwhelmed
circos <- mixOmics::circosPlot(diablo.model, cutoff=0.7,
line = TRUE, size.labels = 1.5)
dev.off()
if (score_type == "blockrank"){
#find top features across all blocks by ranking blockrank scores high to low
#run diablo blockrank
blockrank.i <- blockrank.diablo(diablo.model)
#compile blockrank results
plot.data <- vector(mode = "list", length = length(blockrank.i))
for (q in seq_along(blockrank.i)){
block <- names(blockrank.i)[q]
feature <- names(blockrank.i[[q]])
blockrank.score <- blockrank.i[[q]]
plot.data[[q]] <- data.frame(block, feature, blockrank.score)
}
plot.data <- bind_rows(plot.data)
plot.data <- arrange(plot.data, desc(blockrank.score))
#filter to desired number of features
plot.data <- head(plot.data,n=feature_number)
#Find the top factors across all blocks
selected.features <- plot.data$feature
} else if (score_type == "loading") {
#Find the top n features across all blocks according to loading score.
#Divide by feature_number by number of blocks to evenly attain number of scores per block
blocks <- length(diablo.model$loadings)-1
feature_number_per_block <- round(feature_number/blocks, digits = 0)
loading_scores <- vector(mode = "list", length = length(blocks))
for (i in 1:blocks){
dev.new(width = 3000, height = 3000, unit = "px")
loadings <- mixOmics::plotLoadings(diablo.model, block=i, comp = 1,
contrib = 'max', method = 'median',
ndisplay=feature_number_per_block)
loading_scores[[i]] <- data.frame(rownames(loadings))
dev.off()
}
loading_scores <- bind_rows(loading_scores)
selected.features <- loading_scores$rownames.loadings.
} else{
stop("score_type must be one of the following: blockrank, loading", call.=FALSE)
}
#Filter the covariance matrix for the top features based on blockrank scores
circos.selected <- circos[rownames(circos) %in% selected.features,
colnames(circos) %in% selected.features]
diag(circos.selected) <- 1
# Return the covariance matrix
return(circos.selected)
}
#' Filter and reformat feature association matrix
#'
#' @description
#' Reformats association matrix into network dataframe.
#' Filters feature network to remove associations that do not meet a correlation cut off.
#' Filters feature network to remove associations between features from the same block.
#' Filters feature network to only include associations to a list of selected features.
#'
#'
#' @param diablo.model Trained mixOmics multi-omics (DIABLO) model
#' @param associations Matrix consisting the associations of top discriminative features from find.feature.associations()
#' @param feature_list List of features of interest to filter network by. If argument not supplied, network will not be filtered
#' @param cutoff Correlation cut off to filter feature network. (default set to 0.7)
#' @param remove_intrablock If set to TRUE, removes associations between features from the same block (default set to FALSE)
#'
#' @return Dataframe describing associations between features and their blocks
#'
#' @export
#'
#' @examples
#' \dontrun{
#' filter.network(diablo.model=diablo.model, associations=associations, cutoff=0.7, feature_list= feature_list, remove_intrablock = FALSE)
#' }
filter.network <- function(diablo.model, associations, feature_list=NULL, cutoff=0.7, remove_intrablock=FALSE){
'%!in%' <- function(x,y)!('%in%'(x,y))
#generate block names for all features in model
feature_block_names <- data.frame()
number_of_blocks <- length(diablo.model$loadings)-1
for (i in 1:number_of_blocks) {
active.block <- diablo.model$loadings[[i]]
active.block.names <- data.frame(rownames(active.block), names(diablo.model$loadings[i]))
feature_block_names <- rbind(feature_block_names, active.block.names)
}
#identify blocks for top features
block_association <- feature_block_names[feature_block_names$rownames.active.block. %in% rownames(associations),]$names.diablo.model.loadings.i..
#export network file with correlation cut off
network <- export.matrix.as.network(associations, cutoff= cutoff,
filename = "network.csv", block.association = block_association)
if (is.null(feature_list)){
#not filtering network by top features
#merge in block names for nodes
network_subset_merged <- merge(network, feature_block_names, by.x="feature.2", by.y="rownames.active.block.", all.x = TRUE)
names(network_subset_merged)[4] <- "feature.1_block"
names(network_subset_merged)[5] <- "feature.2_block"
network <- network_subset_merged %>% select("feature.1", "feature.2", "value", "feature.1_block", "feature.2_block")
}
else{
#subset network to only include associations with features of interest
network_subset_switch <- network[network$feature.1 %in% feature_list | network$feature.2 %in% feature_list,]
#conditionally switching features where the feature of interest node is in second column
#identify which rows need to be switched and do not have the correct source node
i <- which(network_subset_switch$feature.2 %in% feature_list)
#create vectors with new row values
new_feature_1 <- as.character(network_subset_switch$feature.2[i])
new_feature_2 <- as.character(network_subset_switch$feature.1[i])
#change row values for the rows that need to be switched
network_subset_switch$feature.1[i] <- new_feature_1
network_subset_switch$feature.2[i] <- new_feature_2
#merge in block names for nodes
network_subset_merged <- merge(network_subset_switch, feature_block_names, by.x="feature.2", by.y="rownames.active.block.", all.x = TRUE)
names(network_subset_merged)[4] <- "feature.1_block"
names(network_subset_merged)[5] <- "feature.2_block"
network <- network_subset_merged %>% select("feature.1", "feature.2", "value", "feature.1_block", "feature.2_block")
#add error message if a feature of interest is not in top n features by blockrank score or loading score
feature_found <- FALSE
for (i in seq_along(feature_list)){
if (feature_list[i] %in% network$feature.1) {
feature_found <- TRUE
}
else if (feature_list[i] %!in% network$feature.1) {
message(paste0(feature_list[i], " is not present in top features selected"))
}
}
if(feature_found == FALSE) {
stop("None of the entered features are in the top selected features", call.=FALSE)
}
}
#remove intrablock connections if TRUE
if (remove_intrablock == TRUE){
#create primary key for each id
network$id <- rownames(network)
#identify intrablock connections to remove
network_to_remove <- network[network$feature.1_block == network$feature.2_block,]
#remove the unwanted intrablock connections
network <- network[network$id %!in% network_to_remove$id,]
#remove primary key
network$id <- NULL
}
#stop function is network has been filtered to to 0 associations
if (nrow(network) < 1){
stop("Final network is empty. Please try again with different parameters.", call.=FALSE)
}
return(network)
}
#' Visualize feature interaction network
#'
#' @description
#' Plots feature interaction network output dataframe from filter.network(). Nodes represent biological features and edges
#' represents correlations between nodes. Nodes are colored by block and edges are colored by correlation value.
#' Uses Kamada-Kawai layout algorithm to position nodes. More strongly connected nodes will be pulled together,
#' while more weakly connected nodes will pushed away from other nodes.
#'
#'
#' @param omicsfold_network Network dataframe generated by filter.network() that describes
#' associations between features and their associated blocks
#' @param diablo.model Trained mixOmics multi-omics (DIABLO) model
#'
#' @return Visualization of feature interaction network
#'
#' @export
#'
#' @examples
#' \dontrun{
#' plot.network(omicsfold_network=omicsfold_network, diablo.model=diablo.model)
#' }
#visualize network
plot.network <- function(omicsfold_network, diablo.model){
#plot network
thm <- theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
)
theme_set(thm)
final_network_graph <- as_tbl_graph(omicsfold_network)
final_network_graph <- final_network_graph %>%
activate(nodes) %>%
mutate(
title = str_to_title(name),
label = str_replace_all(title, " ", "\n")
)
#generate block names for all features in model
feature_block_names <- data.frame()
number_of_blocks <- length(diablo.model$loadings)-1
for (i in 1:number_of_blocks) {
active.block <- diablo.model$loadings[[i]]
active.block.names <- data.frame(rownames(active.block), names(diablo.model$loadings[i]))
feature_block_names <- rbind(feature_block_names, active.block.names)
}
names(feature_block_names)[2] <- "block"
#merge in block data
final_network_graph <- left_join(final_network_graph %>%
activate(nodes), feature_block_names, by= c("name" = "rownames.active.block."))
network_plot <- final_network_graph %>%
ggraph(layout = "kk") +
geom_edge_link(aes(color = value), edge_width=3, alpha = 0.7) +
scale_edge_colour_continuous(
low = "orange",
high = "red",
space = "Lab",
na.value = "grey50",
guide = "edge_colourbar")+
geom_node_point(size=6,show.legend = FALSE)+
geom_node_label(aes(label=name, fill=block), repel = T, show.legend = TRUE, alpha=0.9,
color = "white", # text
size = 5, # font size
label.r = unit(20, "pt"), # corner radius of label box
label.size = .1, # label border size
label.padding = unit(1, "lines"))
return(network_plot)
}
.order.circos.selected <- function(circos.selected, circos.order) {
circos.order.vars <- rownames(circos.selected)[circos.order]
circos.ordered <-
circos.selected[match(rownames(circos.selected), circos.order.vars),
match(colnames(circos.selected), circos.order.vars)]
return(circos.ordered)
}
#' Perform a permanova signficance test
#'
#' @description
#' Perform a permanova significance testing using the adonis function (from
#' vegan) upon consensus classes, as returned from the get.block.centroids
#' method. Limited to first two components.
#'
#' @param consensus.summary Consensus summary as generated by
#' get.block.centroids().
#'
#' @return Measurement of p-value for separation of clusters.
#' @export
permanova.clusters <- function(consensus.summary) {
consensus.points <- data.frame(consensus.summary$comp1,
consensus.summary$comp2)
return(vegan::adonis(consensus.points ~ sample(consensus.summary$arrow.group),
method='euclidean'))
}