Diff of /R/amaretto_run.R [000000] .. [2ab972]

Switch to unified view

a b/R/amaretto_run.R
1
#' AMARETTO_LarsenBased
2
#' 
3
#' @param Data 
4
#' @param Clusters 
5
#' @param RegulatorData 
6
#' @param Parameters 
7
#' @param NrCores 
8
#' @param random_seeds 
9
#' @param convergence_cutoff 
10
#'
11
#' @import MultiAssayExperiment
12
#' @import graphics
13
#' @importFrom Matrix nnzero
14
#' @importFrom doParallel registerDoParallel
15
#' 
16
#' @return result
17
#' @keywords internal
18
AMARETTO_LarsenBased <- function(Data, Clusters, RegulatorData, Parameters, NrCores, random_seeds, convergence_cutoff) {
19
    registerDoParallel(cores = NrCores)
20
    ptm1 <- proc.time()
21
    RegulatorData_rownames = rownames(RegulatorData)
22
    Data_rownames = rownames(Data)
23
    AutoRegulation = Parameters$AutoRegulation
24
    RegulatorSign = array(0, length(RegulatorData_rownames))
25
    Lambda = Parameters$Lambda2
26
    OneRunStop = Parameters$OneRunStop
27
    random_seeds = Parameters$random_seeds
28
    convergence_cutoff = Parameters$convergence_cutoff
29
    if (AutoRegulation == 1) {
30
        cat("\tAutoregulation is turned ON.\n")
31
    } else if (AutoRegulation == 2) {
32
        cat("\tAutoregulation is turned ON.\n")
33
    } else {
34
        cat("\tAutoregulation is turned OFF.\n")
35
    }
36
    NrReassignGenes = length(Data_rownames)
37
    NrReassignGenes_history <- NrReassignGenes
38
    error_history<-list()
39
    index=1
40
    while (NrReassignGenes > convergence_cutoff * length(Data_rownames)) {
41
        ptm <- proc.time()
42
        switch(Parameters$Mode, larsen = {
43
            regulatoryPrograms <- AMARETTO_LearnRegulatoryProgramsLarsen(Data, Clusters, RegulatorData, RegulatorSign, Lambda, AutoRegulation, alpha = Parameters$alpha, pmax = Parameters$pmax, random_seeds)
44
        })
45
        error_history[[index]]<-(rowMeans(regulatoryPrograms$error * regulatoryPrograms$error))^0.5
46
        index<-index+1
47
        ptm <- proc.time() - ptm
48
        printf("Elapsed time is %f seconds\n", ptm[3])
49
        NrClusters = length(unique(Clusters))
50
        sum = 0
51
        for (i in 1:NrClusters) {
52
            sum = sum + Matrix::nnzero(regulatoryPrograms$Beta[i,])
53
        }
54
        avg = sum/NrClusters
55
        printf("Average nr of regulators per module: %f \n", avg)
56
        PreviousClusters = Clusters
57
        if (OneRunStop == 1) {
58
            break
59
        }
60
        ptm <- proc.time()
61
        ReassignGenesToClusters <- AMARETTO_ReassignGenesToClusters(Data, RegulatorData, regulatoryPrograms$Beta, Clusters, AutoRegulation)
62
        ptm <- proc.time() - ptm
63
        printf("Elapsed time is %f seconds\n", ptm[3])
64
        NrReassignGenes = ReassignGenesToClusters$NrReassignGenes
65
        Clusters = ReassignGenesToClusters$Clusters
66
        printf("Nr of reassignments is: %i \n", NrReassignGenes)
67
        NrReassignGenes_history <- c(NrReassignGenes_history, NrReassignGenes)
68
    }
69
    ptm1 <- proc.time() - ptm1
70
    printf("Elapsed time is %f seconds\n", ptm1[3])
71
    ModuleMembership = as.matrix(PreviousClusters)
72
    rownames(ModuleMembership) = rownames(Data)
73
    colnames(ModuleMembership) = c("ModuleNr")
74
    result <- list(NrModules = length(unique(Clusters)), RegulatoryPrograms = regulatoryPrograms$Beta, AllRegulators = rownames(RegulatorData),
75
                   AllGenes = rownames(Data), ModuleMembership = ModuleMembership, AutoRegulationReport = regulatoryPrograms$AutoRegulationReport,
76
                   run_history = list(NrReassignGenes_history = NrReassignGenes_history, error_history = error_history))
77
    return(result)
78
}
79
80
#' AMARETTO_LearnRegulatoryProgramsLarsen
81
#' @importFrom foreach foreach
82
#' @importFrom glmnet cv.glmnet
83
#' @importFrom stats coef
84
#' @return result
85
#' @keywords internal
86
AMARETTO_LearnRegulatoryProgramsLarsen <- function(Data, Clusters, RegulatorData, RegulatorSign, Lambda, AutoRegulation, alpha, pmax, random_seeds) {
87
    `%dopar%` <- foreach::`%dopar%`
88
    RegulatorData_rownames = rownames(RegulatorData)
89
    Data_rownames = rownames(Data)
90
    trace = 0
91
    NrFolds = 10
92
    NrClusters = length(unique(Clusters))
93
    NrGenes = nrow(Data)
94
    NrSamples = ncol(Data)
95
    NrInterpolateSteps = 100
96
    if (AutoRegulation >= 1) {}
97
    else if (AutoRegulation == 0) {
98
        BetaSpecial = list(NrClusters, 1)
99
        RegulatorPositions = list(NrClusters, 1)
100
    }
101
    y_all = mat.or.vec(NrClusters, NrSamples)
102
    ClusterIDs = unique(Clusters)
103
    ClusterIDs = sort(ClusterIDs, decreasing = FALSE)
104
    cnt <- 1:NrClusters
105
    ptm1 <- proc.time()
106
    BetaY_all <- foreach(i = 1:NrClusters, .combine = cbind, .init = list(list(), list(), list()), .packages = "glmnet") %dopar% {
107
            if (length(which(Clusters == ClusterIDs[i])) > 1) {
108
                y = apply((Data[which(Clusters == ClusterIDs[i]),]), 2, mean)
109
            } else {
110
                y = Data[which(Clusters == ClusterIDs[i]),]
111
            }
112
            CurrentClusterPositions = which(Clusters %in% ClusterIDs[i])
113
            nrGenesInClusters = length(CurrentClusterPositions)
114
            if (AutoRegulation >= 1) {
115
                X = RegulatorData
116
            } else if (AutoRegulation == 0) {
117
                X = RegulatorData[setdiff(RegulatorData_rownames, Data_rownames[CurrentClusterPositions]),]
118
            }
119
            suppressWM = function(...) suppressWarnings(suppressMessages(...))
120
            
121
            if(!is.null(random_seeds)){
122
              seed<-ifelse (length(random_seeds)==2,random_seeds[2],random_seeds[1])
123
              set.seed(seed)
124
            }
125
            fit = suppressWM(cv.glmnet(t(X), y, alpha = alpha, pmax = pmax, lambda = Lambda_Sequence(t(X), y)))
126
            nonZeroLambdas <- fit$lambda[which(fit$nzero > 0)]
127
            nonZeroCVMs <- fit$cvm[which(fit$nzero > 0)]
128
            if (length(which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))) == 0) {
129
                warnMessage <- paste0("\nOn cluster ", i, " there were no cv.glm results that gave non-zero coefficients.")
130
                message(warnMessage)
131
            }
132
            bestNonZeroLambda <- nonZeroLambdas[which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))]
133
            b_o = coef(fit, s = bestNonZeroLambda)
134
            b_opt <- c(b_o[2:length(b_o)])
135
            if (AutoRegulation == 2) {
136
                CurrentUsedRegulators = RegulatorData_rownames[which(b_opt != 0, arr.ind = TRUE)]
137
                CurrentClusterMembers = Data_rownames[CurrentClusterPositions]
138
                nrIterations = 0
139
                while (length(CurrentClusterMembers[CurrentClusterMembers %in% CurrentUsedRegulators]) != 0) {
140
                  CurrentClusterMembers = setdiff(CurrentClusterMembers, CurrentUsedRegulators)
141
                  nrCurrentClusterMembers = length(CurrentClusterMembers)
142
                  if (nrCurrentClusterMembers > 0) {
143
                    names = Data_rownames %in% CurrentClusterMembers
144
                    if (length(which(names == TRUE)) > 1) {
145
                      y = apply((Data[names, ]), 2, mean)
146
                    } else {
147
                      y = Data[names, ]
148
                    }
149
                    if(!is.null(random_seeds)){
150
                      seed<-ifelse (length(random_seeds)==2,random_seeds[2],random_seeds[1])
151
                      set.seed(seed)
152
                    }
153
                    fit = suppressWM(cv.glmnet(t(X), y, alpha = alpha, pmax = pmax, lambda = Lambda_Sequence(t(X), y)))
154
                    nonZeroLambdas <- fit$lambda[which(fit$nzero > 0)]
155
                    nonZeroCVMs <- fit$cvm[which(fit$nzero > 0)]
156
                    if (length(which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))) == 0) {
157
                      warnMessage <- paste0("\nOn cluster ", i, " there were no cv.glm results that gave non-zero coefficients during the Autoregulation step.")
158
                      message(warnMessage)
159
                    }
160
                    bestNonZeroLambda <- nonZeroLambdas[which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))]
161
                    new_b_o = coef(fit, s = bestNonZeroLambda)
162
                    new_b_opt <- c(new_b_o[2:length(b_o)])
163
                    CurrentUsedRegulators = RegulatorData_rownames[which(new_b_opt != 0)]
164
                    nrIterations = nrIterations + 1
165
                    b_opt = new_b_opt
166
                  } else {
167
                    b_opt = rep(0, length(RegulatorData_rownames))
168
                  }
169
                }
170
                Report <- c(length(CurrentClusterPositions), length(CurrentClusterMembers), nrIterations)
171
            }
172
            if (sum(RegulatorSign[which(RegulatorSign != 0)]) > 0) {
173
                RegulatorCheck = RegulatorSign * t(b_opt)
174
                WrongRegulators = which(RegulatorCheck < 0)
175
                if (length(WrongRegulators) == 0) {
176
                  b_opt[WrongRegulators] = 0
177
                }
178
            }
179
            if (AutoRegulation >= 1) {
180
            } else {
181
                BetaSpecial[i] = b_opt
182
                RegulatorPositions[i] = (RegulatorData_rownames %in% setdiff(RegulatorData_rownames, Data_rownames[CurrentClusterPositions]))
183
            }
184
            list(b_opt, y, Report)
185
        }
186
    if (AutoRegulation == 0) {
187
        for (i in 1:NrClusters) {
188
            Beta[i, RegulatorPositions[i]] = BetaSpecial[i]
189
        }
190
    }
191
    tmpPos = NrClusters + 1
192
    Beta <- do.call(cbind, BetaY_all[1, 2:tmpPos])
193
    Beta = t(Beta)
194
    colnames(Beta) = RegulatorData_rownames
195
    rownames(Beta) = gsub("result.", "Module_", rownames(Beta))
196
    y_all <- do.call(cbind, BetaY_all[2, 2:tmpPos])
197
    y_all = t(y_all)
198
    rownames(y_all) = gsub("result.", "Module_", rownames(y_all))
199
    AutoRegulationReport <- do.call(cbind, BetaY_all[3, 2:tmpPos])
200
    AutoRegulationReport = t(AutoRegulationReport)
201
    rownames(AutoRegulationReport) = gsub("result.", "Module_", rownames(AutoRegulationReport))
202
    error = y_all - (Beta %*% RegulatorData)
203
    result <- list(Beta = Beta, error = error, AutoRegulationReport = AutoRegulationReport)
204
    return(result)
205
}
206
207
#' AMARETTO_ReassignGenesToClusters
208
#'
209
#' @return result
210
#' @importFrom Matrix nnzero
211
#' @importFrom stats cor
212
#' @keywords internal
213
AMARETTO_ReassignGenesToClusters <- function(Data, RegulatorData, Beta, Clusters, AutoRegulation) {
214
    `%dopar%` <- foreach::`%dopar%`
215
    RegulatorData_rownames = rownames(RegulatorData)
216
    Data_rownames = rownames(Data)
217
    NrGenes = nrow(Data)
218
    NrSamples = ncol(Data)
219
    NrReassignGenes = 0
220
    X = RegulatorData
221
    X1 = data.matrix(X)
222
    ModuleVectors = Beta %*% X1
223
    GeneNames = rownames(Data)
224
    ptm1 <- proc.time()
225
    i <- NULL
226
    nc <- foreach(i = 1:NrGenes, .combine = c) %dopar%
227
        {
228
            OldModule = Clusters[i]
229
            CurrentGeneVector = Data[i, , drop = FALSE]
230
            Correlations = cor(t(CurrentGeneVector), t(ModuleVectors))
231
            corr = data.matrix(Correlations, rownames.force = NA)
232
            MaxCorrelation = max(corr, na.rm = TRUE)
233
            MaxPosition = which(signif(corr, digits = 7) == signif(MaxCorrelation, digits = 7))
234
            MaxPosition = MaxPosition[1]
235
            if (AutoRegulation > 0) {
236
                if (MaxPosition != OldModule) {
237
                  NrReassignGenes = NrReassignGenes + 1
238
                }
239
                NewClusters = MaxPosition
240
            } else {
241
                if (nnzero(rownames(RegulatorData_rownames) %in% GeneNames[i]) != 0) {
242
                  if (nnzero(which(which(GeneNames %in% rownames(RegulatorData_rownames)) %in% i) %in% which(Beta[MaxPosition,] != 0)) != 0) {
243
                    if (MaxPosition != OldModule) {
244
                      NrReassignGenes = NrReassignGenes + 1
245
                    }
246
                    NewClusters = MaxPosition
247
                  } else {
248
                    NewClusters = OldModule
249
                  }
250
                } else {
251
                  if (MaxPosition != OldModule) {
252
                    NrReassignGenes = NrReassignGenes + 1
253
                  }
254
                  NewClusters = MaxPosition
255
                }
256
            }
257
        }
258
    ptm1 <- proc.time() - ptm1
259
    NrReassignGenes = length(which(nc != Clusters))
260
    result <- list(NrReassignGenes = NrReassignGenes, Clusters = nc)
261
    return(result)
262
}
263
264
#' AMARETTO_CreateModuleData
265
#'
266
#' @param AMARETTOinit List output from AMARETTO_Initialize().
267
#' @param AMARETTOresults List output from AMARETTO_Run()
268
#'
269
#' @return result
270
#' @export
271
#' @examples
272
#' data('ProcessedDataLIHC')
273
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
274
#'                                     NrModules = 2, VarPercentage = 50)
275
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
276
#' AMARETTO_MD <- AMARETTO_CreateModuleData(AMARETTOinit, AMARETTOresults)
277
AMARETTO_CreateModuleData <- function(AMARETTOinit, AMARETTOresults) {
278
    ModuleData = matrix(0, AMARETTOresults$NrModules, length(colnames(AMARETTOinit$MA_matrix_Var)))
279
    rownames(ModuleData) = rownames(AMARETTOresults$AutoRegulationReport)
280
    colnames(ModuleData) = colnames(AMARETTOinit$MA_matrix_Var)
281
    for (ModuleNr in 1:AMARETTOresults$NrModules) {
282
        currentModuleData = AMARETTOinit$MA_matrix_Var[AMARETTOresults$ModuleMembership[, 1] == ModuleNr, ]
283
        if (length(which(AMARETTOresults$ModuleMembership[, 1] == ModuleNr)) > 1) {
284
            ModuleData[ModuleNr, ] = colMeans(currentModuleData)
285
        } else {
286
            ModuleData[ModuleNr, ] = currentModuleData
287
        }
288
    }
289
    return(ModuleData)
290
}
291
292
#' AMARETTO_CreateRegulatorPrograms
293
#'
294
#' @param AMARETTOinit  List output from AMARETTO_Initialize().
295
#' @param AMARETTOresults List output from AMARETTO_Run()
296
#'
297
#' @return result
298
#' @export
299
#' @examples
300
#' data('ProcessedDataLIHC')
301
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
302
#'                                     NrModules = 2, VarPercentage = 50)
303
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
304
#' AMARETTO_RP <- AMARETTO_CreateRegulatorPrograms(AMARETTOinit,AMARETTOresults)
305
AMARETTO_CreateRegulatorPrograms <- function(AMARETTOinit, AMARETTOresults) {
306
    RegulatorProgramData = matrix(0, AMARETTOresults$NrModules, length(colnames(AMARETTOinit$MA_matrix_Var)))
307
    rownames(RegulatorProgramData) = rownames(AMARETTOresults$AutoRegulationReport)
308
    colnames(RegulatorProgramData) = colnames(AMARETTOinit$MA_matrix_Var)
309
    RegulatorNames = rownames(AMARETTOinit$RegulatorData)
310
    for (ModuleNr in 1:AMARETTOresults$NrModules) {
311
        currentRegulators = RegulatorNames[which(AMARETTOresults$RegulatoryPrograms[ModuleNr, ] != 0)]
312
        weights = AMARETTOresults$RegulatoryPrograms[ModuleNr, currentRegulators]
313
        RegulatorData = AMARETTOinit$RegulatorData[currentRegulators, ]
314
        RegulatorProgramData[ModuleNr, ] = weights %*% RegulatorData
315
    }
316
    return(RegulatorProgramData)
317
}
318
319
320
#' Lambda_Sequence
321
#'
322
#' @return result
323
#' @keywords internal
324
Lambda_Sequence <- function(sx, sy) {
325
    n <- nrow(sx)
326
    lambda_max <- max(abs(colSums(sx * sy)))/n
327
    epsilon <- 1e-04
328
    K <- 100
329
    lambdaseq <- round(exp(seq(log(lambda_max), log(lambda_max * epsilon), length.out = K)), digits = 10)
330
    return(lambdaseq)
331
}