Diff of /R/amaretto_run.R [000000] .. [2ab972]

Switch to side-by-side view

--- a
+++ b/R/amaretto_run.R
@@ -0,0 +1,331 @@
+#' AMARETTO_LarsenBased
+#' 
+#' @param Data 
+#' @param Clusters 
+#' @param RegulatorData 
+#' @param Parameters 
+#' @param NrCores 
+#' @param random_seeds 
+#' @param convergence_cutoff 
+#'
+#' @import MultiAssayExperiment
+#' @import graphics
+#' @importFrom Matrix nnzero
+#' @importFrom doParallel registerDoParallel
+#' 
+#' @return result
+#' @keywords internal
+AMARETTO_LarsenBased <- function(Data, Clusters, RegulatorData, Parameters, NrCores, random_seeds, convergence_cutoff) {
+    registerDoParallel(cores = NrCores)
+    ptm1 <- proc.time()
+    RegulatorData_rownames = rownames(RegulatorData)
+    Data_rownames = rownames(Data)
+    AutoRegulation = Parameters$AutoRegulation
+    RegulatorSign = array(0, length(RegulatorData_rownames))
+    Lambda = Parameters$Lambda2
+    OneRunStop = Parameters$OneRunStop
+    random_seeds = Parameters$random_seeds
+    convergence_cutoff = Parameters$convergence_cutoff
+    if (AutoRegulation == 1) {
+        cat("\tAutoregulation is turned ON.\n")
+    } else if (AutoRegulation == 2) {
+        cat("\tAutoregulation is turned ON.\n")
+    } else {
+        cat("\tAutoregulation is turned OFF.\n")
+    }
+    NrReassignGenes = length(Data_rownames)
+    NrReassignGenes_history <- NrReassignGenes
+    error_history<-list()
+    index=1
+    while (NrReassignGenes > convergence_cutoff * length(Data_rownames)) {
+        ptm <- proc.time()
+        switch(Parameters$Mode, larsen = {
+            regulatoryPrograms <- AMARETTO_LearnRegulatoryProgramsLarsen(Data, Clusters, RegulatorData, RegulatorSign, Lambda, AutoRegulation, alpha = Parameters$alpha, pmax = Parameters$pmax, random_seeds)
+        })
+        error_history[[index]]<-(rowMeans(regulatoryPrograms$error * regulatoryPrograms$error))^0.5
+        index<-index+1
+        ptm <- proc.time() - ptm
+        printf("Elapsed time is %f seconds\n", ptm[3])
+        NrClusters = length(unique(Clusters))
+        sum = 0
+        for (i in 1:NrClusters) {
+            sum = sum + Matrix::nnzero(regulatoryPrograms$Beta[i,])
+        }
+        avg = sum/NrClusters
+        printf("Average nr of regulators per module: %f \n", avg)
+        PreviousClusters = Clusters
+        if (OneRunStop == 1) {
+            break
+        }
+        ptm <- proc.time()
+        ReassignGenesToClusters <- AMARETTO_ReassignGenesToClusters(Data, RegulatorData, regulatoryPrograms$Beta, Clusters, AutoRegulation)
+        ptm <- proc.time() - ptm
+        printf("Elapsed time is %f seconds\n", ptm[3])
+        NrReassignGenes = ReassignGenesToClusters$NrReassignGenes
+        Clusters = ReassignGenesToClusters$Clusters
+        printf("Nr of reassignments is: %i \n", NrReassignGenes)
+        NrReassignGenes_history <- c(NrReassignGenes_history, NrReassignGenes)
+    }
+    ptm1 <- proc.time() - ptm1
+    printf("Elapsed time is %f seconds\n", ptm1[3])
+    ModuleMembership = as.matrix(PreviousClusters)
+    rownames(ModuleMembership) = rownames(Data)
+    colnames(ModuleMembership) = c("ModuleNr")
+    result <- list(NrModules = length(unique(Clusters)), RegulatoryPrograms = regulatoryPrograms$Beta, AllRegulators = rownames(RegulatorData),
+                   AllGenes = rownames(Data), ModuleMembership = ModuleMembership, AutoRegulationReport = regulatoryPrograms$AutoRegulationReport,
+                   run_history = list(NrReassignGenes_history = NrReassignGenes_history, error_history = error_history))
+    return(result)
+}
+
+#' AMARETTO_LearnRegulatoryProgramsLarsen
+#' @importFrom foreach foreach
+#' @importFrom glmnet cv.glmnet
+#' @importFrom stats coef
+#' @return result
+#' @keywords internal
+AMARETTO_LearnRegulatoryProgramsLarsen <- function(Data, Clusters, RegulatorData, RegulatorSign, Lambda, AutoRegulation, alpha, pmax, random_seeds) {
+    `%dopar%` <- foreach::`%dopar%`
+    RegulatorData_rownames = rownames(RegulatorData)
+    Data_rownames = rownames(Data)
+    trace = 0
+    NrFolds = 10
+    NrClusters = length(unique(Clusters))
+    NrGenes = nrow(Data)
+    NrSamples = ncol(Data)
+    NrInterpolateSteps = 100
+    if (AutoRegulation >= 1) {}
+    else if (AutoRegulation == 0) {
+        BetaSpecial = list(NrClusters, 1)
+        RegulatorPositions = list(NrClusters, 1)
+    }
+    y_all = mat.or.vec(NrClusters, NrSamples)
+    ClusterIDs = unique(Clusters)
+    ClusterIDs = sort(ClusterIDs, decreasing = FALSE)
+    cnt <- 1:NrClusters
+    ptm1 <- proc.time()
+    BetaY_all <- foreach(i = 1:NrClusters, .combine = cbind, .init = list(list(), list(), list()), .packages = "glmnet") %dopar% {
+            if (length(which(Clusters == ClusterIDs[i])) > 1) {
+                y = apply((Data[which(Clusters == ClusterIDs[i]),]), 2, mean)
+            } else {
+                y = Data[which(Clusters == ClusterIDs[i]),]
+            }
+            CurrentClusterPositions = which(Clusters %in% ClusterIDs[i])
+            nrGenesInClusters = length(CurrentClusterPositions)
+            if (AutoRegulation >= 1) {
+                X = RegulatorData
+            } else if (AutoRegulation == 0) {
+                X = RegulatorData[setdiff(RegulatorData_rownames, Data_rownames[CurrentClusterPositions]),]
+            }
+            suppressWM = function(...) suppressWarnings(suppressMessages(...))
+            
+            if(!is.null(random_seeds)){
+              seed<-ifelse (length(random_seeds)==2,random_seeds[2],random_seeds[1])
+              set.seed(seed)
+            }
+            fit = suppressWM(cv.glmnet(t(X), y, alpha = alpha, pmax = pmax, lambda = Lambda_Sequence(t(X), y)))
+            nonZeroLambdas <- fit$lambda[which(fit$nzero > 0)]
+            nonZeroCVMs <- fit$cvm[which(fit$nzero > 0)]
+            if (length(which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))) == 0) {
+                warnMessage <- paste0("\nOn cluster ", i, " there were no cv.glm results that gave non-zero coefficients.")
+                message(warnMessage)
+            }
+            bestNonZeroLambda <- nonZeroLambdas[which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))]
+            b_o = coef(fit, s = bestNonZeroLambda)
+            b_opt <- c(b_o[2:length(b_o)])
+            if (AutoRegulation == 2) {
+                CurrentUsedRegulators = RegulatorData_rownames[which(b_opt != 0, arr.ind = TRUE)]
+                CurrentClusterMembers = Data_rownames[CurrentClusterPositions]
+                nrIterations = 0
+                while (length(CurrentClusterMembers[CurrentClusterMembers %in% CurrentUsedRegulators]) != 0) {
+                  CurrentClusterMembers = setdiff(CurrentClusterMembers, CurrentUsedRegulators)
+                  nrCurrentClusterMembers = length(CurrentClusterMembers)
+                  if (nrCurrentClusterMembers > 0) {
+                    names = Data_rownames %in% CurrentClusterMembers
+                    if (length(which(names == TRUE)) > 1) {
+                      y = apply((Data[names, ]), 2, mean)
+                    } else {
+                      y = Data[names, ]
+                    }
+                    if(!is.null(random_seeds)){
+                      seed<-ifelse (length(random_seeds)==2,random_seeds[2],random_seeds[1])
+                      set.seed(seed)
+                    }
+                    fit = suppressWM(cv.glmnet(t(X), y, alpha = alpha, pmax = pmax, lambda = Lambda_Sequence(t(X), y)))
+                    nonZeroLambdas <- fit$lambda[which(fit$nzero > 0)]
+                    nonZeroCVMs <- fit$cvm[which(fit$nzero > 0)]
+                    if (length(which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))) == 0) {
+                      warnMessage <- paste0("\nOn cluster ", i, " there were no cv.glm results that gave non-zero coefficients during the Autoregulation step.")
+                      message(warnMessage)
+                    }
+                    bestNonZeroLambda <- nonZeroLambdas[which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))]
+                    new_b_o = coef(fit, s = bestNonZeroLambda)
+                    new_b_opt <- c(new_b_o[2:length(b_o)])
+                    CurrentUsedRegulators = RegulatorData_rownames[which(new_b_opt != 0)]
+                    nrIterations = nrIterations + 1
+                    b_opt = new_b_opt
+                  } else {
+                    b_opt = rep(0, length(RegulatorData_rownames))
+                  }
+                }
+                Report <- c(length(CurrentClusterPositions), length(CurrentClusterMembers), nrIterations)
+            }
+            if (sum(RegulatorSign[which(RegulatorSign != 0)]) > 0) {
+                RegulatorCheck = RegulatorSign * t(b_opt)
+                WrongRegulators = which(RegulatorCheck < 0)
+                if (length(WrongRegulators) == 0) {
+                  b_opt[WrongRegulators] = 0
+                }
+            }
+            if (AutoRegulation >= 1) {
+            } else {
+                BetaSpecial[i] = b_opt
+                RegulatorPositions[i] = (RegulatorData_rownames %in% setdiff(RegulatorData_rownames, Data_rownames[CurrentClusterPositions]))
+            }
+            list(b_opt, y, Report)
+        }
+    if (AutoRegulation == 0) {
+        for (i in 1:NrClusters) {
+            Beta[i, RegulatorPositions[i]] = BetaSpecial[i]
+        }
+    }
+    tmpPos = NrClusters + 1
+    Beta <- do.call(cbind, BetaY_all[1, 2:tmpPos])
+    Beta = t(Beta)
+    colnames(Beta) = RegulatorData_rownames
+    rownames(Beta) = gsub("result.", "Module_", rownames(Beta))
+    y_all <- do.call(cbind, BetaY_all[2, 2:tmpPos])
+    y_all = t(y_all)
+    rownames(y_all) = gsub("result.", "Module_", rownames(y_all))
+    AutoRegulationReport <- do.call(cbind, BetaY_all[3, 2:tmpPos])
+    AutoRegulationReport = t(AutoRegulationReport)
+    rownames(AutoRegulationReport) = gsub("result.", "Module_", rownames(AutoRegulationReport))
+    error = y_all - (Beta %*% RegulatorData)
+    result <- list(Beta = Beta, error = error, AutoRegulationReport = AutoRegulationReport)
+    return(result)
+}
+
+#' AMARETTO_ReassignGenesToClusters
+#'
+#' @return result
+#' @importFrom Matrix nnzero
+#' @importFrom stats cor
+#' @keywords internal
+AMARETTO_ReassignGenesToClusters <- function(Data, RegulatorData, Beta, Clusters, AutoRegulation) {
+    `%dopar%` <- foreach::`%dopar%`
+    RegulatorData_rownames = rownames(RegulatorData)
+    Data_rownames = rownames(Data)
+    NrGenes = nrow(Data)
+    NrSamples = ncol(Data)
+    NrReassignGenes = 0
+    X = RegulatorData
+    X1 = data.matrix(X)
+    ModuleVectors = Beta %*% X1
+    GeneNames = rownames(Data)
+    ptm1 <- proc.time()
+    i <- NULL
+    nc <- foreach(i = 1:NrGenes, .combine = c) %dopar%
+        {
+            OldModule = Clusters[i]
+            CurrentGeneVector = Data[i, , drop = FALSE]
+            Correlations = cor(t(CurrentGeneVector), t(ModuleVectors))
+            corr = data.matrix(Correlations, rownames.force = NA)
+            MaxCorrelation = max(corr, na.rm = TRUE)
+            MaxPosition = which(signif(corr, digits = 7) == signif(MaxCorrelation, digits = 7))
+            MaxPosition = MaxPosition[1]
+            if (AutoRegulation > 0) {
+                if (MaxPosition != OldModule) {
+                  NrReassignGenes = NrReassignGenes + 1
+                }
+                NewClusters = MaxPosition
+            } else {
+                if (nnzero(rownames(RegulatorData_rownames) %in% GeneNames[i]) != 0) {
+                  if (nnzero(which(which(GeneNames %in% rownames(RegulatorData_rownames)) %in% i) %in% which(Beta[MaxPosition,] != 0)) != 0) {
+                    if (MaxPosition != OldModule) {
+                      NrReassignGenes = NrReassignGenes + 1
+                    }
+                    NewClusters = MaxPosition
+                  } else {
+                    NewClusters = OldModule
+                  }
+                } else {
+                  if (MaxPosition != OldModule) {
+                    NrReassignGenes = NrReassignGenes + 1
+                  }
+                  NewClusters = MaxPosition
+                }
+            }
+        }
+    ptm1 <- proc.time() - ptm1
+    NrReassignGenes = length(which(nc != Clusters))
+    result <- list(NrReassignGenes = NrReassignGenes, Clusters = nc)
+    return(result)
+}
+
+#' AMARETTO_CreateModuleData
+#'
+#' @param AMARETTOinit List output from AMARETTO_Initialize().
+#' @param AMARETTOresults List output from AMARETTO_Run()
+#'
+#' @return result
+#' @export
+#' @examples
+#' data('ProcessedDataLIHC')
+#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
+#'                                     NrModules = 2, VarPercentage = 50)
+#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
+#' AMARETTO_MD <- AMARETTO_CreateModuleData(AMARETTOinit, AMARETTOresults)
+AMARETTO_CreateModuleData <- function(AMARETTOinit, AMARETTOresults) {
+    ModuleData = matrix(0, AMARETTOresults$NrModules, length(colnames(AMARETTOinit$MA_matrix_Var)))
+    rownames(ModuleData) = rownames(AMARETTOresults$AutoRegulationReport)
+    colnames(ModuleData) = colnames(AMARETTOinit$MA_matrix_Var)
+    for (ModuleNr in 1:AMARETTOresults$NrModules) {
+        currentModuleData = AMARETTOinit$MA_matrix_Var[AMARETTOresults$ModuleMembership[, 1] == ModuleNr, ]
+        if (length(which(AMARETTOresults$ModuleMembership[, 1] == ModuleNr)) > 1) {
+            ModuleData[ModuleNr, ] = colMeans(currentModuleData)
+        } else {
+            ModuleData[ModuleNr, ] = currentModuleData
+        }
+    }
+    return(ModuleData)
+}
+
+#' AMARETTO_CreateRegulatorPrograms
+#'
+#' @param AMARETTOinit  List output from AMARETTO_Initialize().
+#' @param AMARETTOresults List output from AMARETTO_Run()
+#'
+#' @return result
+#' @export
+#' @examples
+#' data('ProcessedDataLIHC')
+#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
+#'                                     NrModules = 2, VarPercentage = 50)
+#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
+#' AMARETTO_RP <- AMARETTO_CreateRegulatorPrograms(AMARETTOinit,AMARETTOresults)
+AMARETTO_CreateRegulatorPrograms <- function(AMARETTOinit, AMARETTOresults) {
+    RegulatorProgramData = matrix(0, AMARETTOresults$NrModules, length(colnames(AMARETTOinit$MA_matrix_Var)))
+    rownames(RegulatorProgramData) = rownames(AMARETTOresults$AutoRegulationReport)
+    colnames(RegulatorProgramData) = colnames(AMARETTOinit$MA_matrix_Var)
+    RegulatorNames = rownames(AMARETTOinit$RegulatorData)
+    for (ModuleNr in 1:AMARETTOresults$NrModules) {
+        currentRegulators = RegulatorNames[which(AMARETTOresults$RegulatoryPrograms[ModuleNr, ] != 0)]
+        weights = AMARETTOresults$RegulatoryPrograms[ModuleNr, currentRegulators]
+        RegulatorData = AMARETTOinit$RegulatorData[currentRegulators, ]
+        RegulatorProgramData[ModuleNr, ] = weights %*% RegulatorData
+    }
+    return(RegulatorProgramData)
+}
+
+
+#' Lambda_Sequence
+#'
+#' @return result
+#' @keywords internal
+Lambda_Sequence <- function(sx, sy) {
+    n <- nrow(sx)
+    lambda_max <- max(abs(colSums(sx * sy)))/n
+    epsilon <- 1e-04
+    K <- 100
+    lambdaseq <- round(exp(seq(log(lambda_max), log(lambda_max * epsilon), length.out = K)), digits = 10)
+    return(lambdaseq)
+}