Diff of /OmicsFold/R/permutation.R [000000] .. [e26484]

Switch to unified view

a b/OmicsFold/R/permutation.R
1
.permute.data.labels <- function (lab) {
2
  return (sample(lab))
3
}
4
5
.calculate.mode <- function(x) {
6
  uniqx <- unique(na.omit(x))
7
  uniqx[which.max(tabulate(match(x, uniqx)))]
8
}
9
10
.calculate.anti.mode <- function(x) {
11
  uniqx <- unique(na.omit(x))
12
  uniqx[which.min(tabulate(match(x, uniqx)))]
13
}
14
15
.find.permutation.extent <- function(permuted, original) {
16
  num.lo <- length(original[original == levels(original)[1]])
17
  num.hi <- length(original[original == levels(original)[2]])
18
19
  lo <- permuted[1:num.lo]
20
  hi <- permuted[num.lo+1:length(permuted)]
21
22
  lo.fac <- .calculate.mode(lo)
23
  hi.fac <- .calculate.anti.mode(lo)
24
25
  lo.match <- length(which(lo == lo.fac))
26
  hi.match <- length(which(hi == hi.fac))
27
28
  return ((lo.match + hi.match) / length(permuted))
29
}
30
31
32
#' Perform a re-fit for a DIABLO model after permuting labels
33
#'
34
#' @description
35
#' Perform a full multi-omics model fitting and performance assessment after
36
#' permuting the labels associated with the classes. This can take some time.
37
#' This is useful when you want to assess whether a model is capable of
38
#' overfitting the data it was given.
39
#'
40
#' @param data Object containing input data blocks.
41
#' @param design DIABLO block relation design matrix.
42
#' @param data.labels Unpermuted data class labels.
43
#' @param test.keepX Array of values to test for sparse model training.
44
#'
45
#' @return Model performance balanced error rate.
46
#' @export
47
#'
48
train.permuted.model <- function(data, design, data.labels, test.keepX) {
49
  permuted.data.labels <- .permute.data.labels(data.labels)
50
  print ("Permuted data labels: ")
51
  print (permuted.data.labels)
52
53
  print ("Evaluating error rate over 6 components")
54
  sgccda.res.permuted <- mixOmics::block.splsda(X = data,
55
                                                Y = permuted.data.labels,
56
                                                ncomp = 6,
57
                                                design = design)
58
  perf.diablo.permuted <- mixOmics::perf(sgccda.res.permuted,
59
                                         validation = 'Mfold',
60
                                         folds = 8,
61
                                         nrepeat = 50,
62
                                         cpus = 4,
63
                                         progressBar = TRUE)
64
  ncomp = perf.diablo.permuted$choice.ncomp$WeightedVote["Overall.BER",
65
                                                         "centroids.dist"]
66
  print (paste("Optimal components: ", ncomp, sep=""))
67
68
  print ("Tuning variable penalization")
69
  tune.diablo.permuted <- mixOmics::tune.block.splsda(X = data,
70
                                                      Y = permuted.data.labels,
71
                                                      ncomp = ncomp,
72
                                                      test.keepX = test.keepX,
73
                                                      design = design,
74
                                                      validation = 'Mfold',
75
                                                      folds = 8,
76
                                                      nrepeat = 2,
77
                                                      cpus = 4,
78
                                                      dist = "centroids.dist",
79
                                                      progressBar = TRUE)
80
  list.keepX.permuted <- tune.diablo.permuted$choice.keepX
81
  print ("Optimal selection: ")
82
  print (list.keepX.permuted)
83
84
  print ("Evaluating model mfold error rate")
85
  sgccda.trained.permuted <- mixOmics::block.splsda(X = data,
86
                                                    Y = permuted.data.labels,
87
                                                    ncomp = ncomp,
88
                                                    keepX = list.keepX.permuted,
89
                                                    design = design)
90
  perf.diablo.permuted <- mixOmics::perf(sgccda.trained.permuted,
91
                                         validation = 'Mfold',
92
                                         M = 8,
93
                                         nrepeat = 200,
94
                                         dist = 'centroids.dist',
95
                                         cpus = 4,
96
                                         progressBar = TRUE)
97
  print ("MFold error rate: ")
98
  print (perf.diablo.permuted$WeightedVote.error.rate)
99
100
  return (perf.diablo.permuted$WeightedVote.error.rate)
101
}
102
103
104
#' Perform a fast model fit with permuted labels
105
#'
106
#' @description
107
#' Perform a quick multi-omic model fit to data with permutated class labels.
108
#' This does not perform the (slow) step of sparse variable or component number
109
#' selection - so may be more approximate.
110
#'
111
#' @param data Object containing input data blocks.
112
#' @param design DIABLO block relation design matrix.
113
#' @param data.labels Unpermuted data class labels.
114
#' @param ncomp Number of components to use in the model.
115
#' @param list.keepX.permuted Set number of variable to select from each block.
116
#'
117
#' @return List containing a representation of the permuted labels, an estimate
118
#' of the permutation degree (as stochastically permutation may scramble the
119
#' labels more or less thoroughly) and the balanced error rate over one and two
120
#' components.
121
#' @export
122
#'
123
quick.permuted.fit <- function(data, design, data.labels, ncomp,
124
                               list.keepX.permuted) {
125
  permuted.data.labels <- .permute.data.labels(data.labels)
126
127
  sgccda.trained.permuted <- mixOmics::block.splsda(X = data,
128
                                                    Y = permuted.data.labels,
129
                                                    ncomp = ncomp,
130
                                                    keepX = list.keepX.permuted,
131
                                                    design = design)
132
  perf.diablo.permuted <- mixOmics::perf(sgccda.trained.permuted,
133
                                         validation = 'Mfold',
134
                                         M = 8,
135
                                         nrepeat = 200,
136
                                         dist = 'centroids.dist',
137
                                         cpus = 4)
138
139
  error.rate.comp1 <-
140
      perf.diablo.permuted$WeightedVote.error.rate$centroids.dist[4,1]
141
  error.rate.comp2 <-
142
      perf.diablo.permuted$WeightedVote.error.rate$centroids.dist[4,2]
143
144
  return(list(
145
    "permuted.labels" = paste(as.character(permuted.data.labels),
146
                              collapse = " "),
147
    "permutation.degree" = .find.permutation.extent(permuted.data.labels),
148
    "error.rate.comp1" = error.rate.comp1,
149
    "error.rate.comp2" = error.rate.comp2
150
  ))
151
}