--- a +++ b/R/getMoCluster.R @@ -0,0 +1,100 @@ +#' @name getMoCluster +#' @title Get subtypes from MoCluster +#' @description This function wraps the MoCluster (Multiple omics data integrative clustering) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`. +#' @param data List of matrices. +#' @param N.clust Number of clusters. +#' @param ncomp An integer value to indicate the number of components to calculate. To calculate more components requires longer computational time. +#' @param method A string value can be one of CPCA, GCCA and MCIA; CPCA by default. +#' @param option A string value could be one of c('lambda1', 'inertia', 'uniform') to indicate how the different matrices should be normalized. +#' @param k A numeric value to indicate the absolute number (if k >= 1) or the proportion (if 0 < k < 1) of non-zero coefficients for the variable loading vectors. It could be a single value or a vector has the same length as x so the sparsity of individual matrix could be different. +#' @param center A logical value to indicate if the variables should be centered. TRUE by default. +#' @param scale A logical value to indicate if the variables should be scaled. TRUE by default. +#' @param clusterAlg A string value to indicate the cluster algorithm for distance. +#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion. +#' @return A list with the following components: +#' +#' \code{fit} an object returned by \link[mogsa]{mbpca}. +#' +#' \code{clust.res} a data.frame storing sample ID and corresponding clusters. +#' +#' \code{feat.res} the results of features selection process. +#' +#' \code{clust.dend} a dendrogram of sample clustering. +#' +#' \code{mo.method} a string value indicating the method used for multi-omics integrative clustering. +#' @export +#' @examples # There is no example and please refer to vignette. +#' @import mogsa +#' @importFrom dplyr %>% +#' @references Meng C, Helm D, Frejno M, Kuster B (2016). moCluster: Identifying Joint Patterns Across Multiple Omics Data Sets. J Proteome Res, 15(3):755-765. +getMoCluster <- function(data = NULL, + N.clust = NULL, + type = rep("gaussian", length(data)), + ncomp = NULL, + method = "CPCA", + option = "lambda1", + k = 10, + center = TRUE, + scale = TRUE, + clusterAlg = "ward.D"){ + + # check data + n_dat <- length(data) + if(n_dat > 6){ + stop('current verision of MOVICS can support up to 6 datasets.') + } + if(n_dat < 2){ + stop('current verision of MOVICS needs at least 2 omics data.') + } + + useless.argument <- type + if(!is.element(method, c("CPCA","GCCA","MCIA"))) { + stop("method should be one of CPCA [consensus PCA], GCCA [generalized canonical correlation analysis], or MCIA [multiple co-inertia analysis]!") + } + + if(is.null(ncomp)) { + ncomp = N.clust + } + + moas <- data %>% mogsa::mbpca(ncomp = ncomp, + k = k, + method = switch(method, + "CPCA" = "globalScore", + "GCCA" = "blockScore", + "MCIA" = "blockLoading"), + option = option, + center = center, + scale = scale, + moa = TRUE, + svd.solver = "fast", + maxiter = 1000, + verbose = FALSE) + + scrs <- moas %>% moaScore + dist <- scrs %>% dist + clust.dend <- hclust(dist, method = clusterAlg) + + clustres <- data.frame(samID = colnames(data[[1]]), + clust = cutree(clust.dend,k = N.clust), + row.names = colnames(data[[1]]), + stringsAsFactors = FALSE) + #clustres <- clustres[order(clustres$clust),] + message("clustering done...") + + + featres <- moas@loading[which(moas@loading[,1] != 0),] + f <- sub('_[^_]*$', '', rownames(featres)) + d <- sub('.*_', '', rownames(featres)) + featres <- data.frame(feature = f, + dataset = d, + load = featres[,1], + stringsAsFactors = FALSE) + feat.res <- NULL + for (d in unique(featres$dataset)) { + tmp <- featres[which(featres$dataset == d),] + feat.res <- rbind.data.frame(feat.res,tmp) + } + message("feature selection done...") + + return(list(fit = moas, clust.res = clustres, feat.res = feat.res, clust.dend = clust.dend, mo.method = "MoCluster")) +}