Diff of /R/internal-functions.R [000000] .. [28e211]

Switch to unified view

a b/R/internal-functions.R
1
#' @importFrom fpc clusterboot cluster.stats calinhara dudahart2
2
#' @importFrom withr with_par with_options local_options
3
clustfun <- function(
4
    x,
5
    clustnr = 20,
6
    bootnr = 50,
7
    metric = "pearson",
8
    do.gap = TRUE,
9
    SE.method = "Tibs2001SEmax",
10
    SE.factor = .25,
11
    B.gap = 50,
12
    cln = 0,
13
    rseed = rseed,
14
    quiet = FALSE) {
15
  if (clustnr < 2) stop("Choose clustnr > 1")
16
  di <- dist.gen(t(x), method = metric)
17
  if (do.gap | cln > 0) {
18
    gpr <- NULL
19
    if (do.gap) {
20
      set.seed(rseed)
21
      gpr <- clusGap(
22
        as.matrix(di),
23
        FUNcluster = kmeans,
24
        K.max = clustnr,
25
        B = B.gap,
26
        verbose = !quiet
27
      )
28
      if (cln == 0) {
29
        cln <- maxSE(
30
          gpr$Tab[, 3],
31
          gpr$Tab[, 4],
32
          method = SE.method,
33
          SE.factor
34
        )
35
      }
36
    }
37
    if (cln <= 1) {
38
      clb <- list(
39
        result   = list(partition = rep(1, dim(x)[2])),
40
        bootmean = 1
41
      )
42
      names(clb$result$partition) <- names(x)
43
      return(list(x = x, clb = clb, gpr = gpr, di = di))
44
    }
45
    clb <- clusterboot(
46
      di,
47
      B             = bootnr,
48
      distances     = FALSE,
49
      bootmethod    = "boot",
50
      clustermethod = KmeansCBI,
51
      krange        = cln,
52
      scaling       = FALSE,
53
      multipleboot  = FALSE,
54
      bscompare     = TRUE,
55
      seed          = rseed,
56
      count         = !quiet
57
    )
58
    return(list(x = x, clb = clb, gpr = gpr, di = di))
59
  }
60
}
61
62
Kmeansruns <- function(
63
    data,
64
    krange = 2:10,
65
    criterion = "ch",
66
    iter.max = 100,
67
    runs = 100,
68
    scaledata = FALSE,
69
    alpha = 0.001,
70
    critout = FALSE,
71
    plot = FALSE,
72
    method = "euclidean",
73
    ...) {
74
  data <- as.matrix(data)
75
  if (criterion == "asw") sdata <- dist(data)
76
  if (scaledata) data <- scale(data)
77
  cluster1 <- 1 %in% krange
78
  crit <- numeric(max(krange))
79
  km <- list()
80
  for (k in krange) {
81
    if (k > 1) {
82
      minSS <- Inf
83
      kmopt <- NULL
84
      for (i in 1:runs) {
85
        opar <- withr::local_options(show.error.messages = FALSE)
86
        repeat {
87
          kmm <- try(kmeans(data, k))
88
          if (!is(kmm, "try-error")) break
89
        }
90
        opar <- withr::local_options(show.error.messages = TRUE)
91
        on.exit(withr::local_options(opar))
92
        swss <- sum(kmm$withinss)
93
        if (swss < minSS) {
94
          kmopt <- kmm
95
          minSS <- swss
96
        }
97
        if (plot) {
98
          opar <- withr::local_par(ask = TRUE)
99
          on.exit(withr::local_par(opar))
100
          pairs(data, col = kmm$cluster, main = swss)
101
        }
102
      }
103
      km[[k]] <- kmopt
104
      crit[k] <- switch(criterion,
105
        asw = cluster.stats(sdata, km[[k]]$cluster)$avg.silwidth,
106
        ch  = calinhara(data, km[[k]]$cluster)
107
      )
108
      if (critout) {
109
        message(k, " clusters ", crit[k], "\n")
110
      }
111
    }
112
  }
113
  if (cluster1) {
114
    cluster1 <- dudahart2(data, km[[2]]$cluster, alpha = alpha)$cluster1
115
  }
116
  k.best <- which.max(crit)
117
  if (cluster1) k.best <- 1
118
  km[[k.best]]$crit <- crit
119
  km[[k.best]]$bestk <- k.best
120
  out <- km[[k.best]]
121
  return(out)
122
}
123
124
KmeansCBI <- function(
125
    data,
126
    krange,
127
    k = NULL,
128
    scaling = FALSE,
129
    runs = 1,
130
    criterion = "ch",
131
    method = "euclidean",
132
    ...) {
133
  if (!is.null(k)) krange <- k
134
  if (!identical(scaling, FALSE)) {
135
    sdata <- scale(data, center = TRUE, scale = scaling)
136
  } else {
137
    sdata <- data
138
  }
139
  c1 <- Kmeansruns(
140
    sdata,
141
    krange,
142
    runs = runs,
143
    criterion = criterion,
144
    method = method,
145
    ...
146
  )
147
  partition <- c1$cluster
148
  cl <- list()
149
  nc <- krange
150
  for (i in 1:nc) cl[[i]] <- partition == i
151
  out <- list(
152
    result = c1,
153
    nc = nc,
154
    clusterlist = cl,
155
    partition = partition,
156
    clustermethod = "kmeans"
157
  )
158
  return(out)
159
}
160
161
dist.gen <- function(x, method = "euclidean") {
162
  if (method %in% c("spearman", "pearson", "kendall")) {
163
    as.dist(1 - cor(t(x), method = method))
164
  } else {
165
    dist(x, method = method)
166
  }
167
}
168
169
binompval <- function(p, N, n) {
170
  pval <- pbinom(n, round(N, 0), p, lower.tail = TRUE)
171
  filter <- !is.na(pval) & pval > 0.5
172
  pval[filter] <- 1 - pval[filter]
173
  return(pval)
174
}
175
176
add_legend <- function(...) {
177
  opar <- withr::local_par(
178
    fig = c(0, 1, 0, 1),
179
    oma = c(0, 0, 0, 0),
180
    mar = c(0, 0, 0, 0),
181
    new = TRUE
182
  )
183
  on.exit(withr::local_par(opar))
184
  plot(
185
    x    = 0,
186
    y    = 0,
187
    type = "n",
188
    bty  = "n",
189
    xaxt = "n",
190
    yaxt =  "n"
191
  )
192
  legend(...)
193
}
194
195
downsample <- function(x, n, dsn) {
196
  x <- round(x[, apply(x, 2, sum, na.rm = TRUE) >= n], 0)
197
  nn <- min(apply(x, 2, sum))
198
  for (j in 1:dsn) {
199
    z <- data.frame(GENEID = rownames(x))
200
    rownames(z) <- rownames(x)
201
    initv <- rep(0, nrow(z))
202
    for (i in seq_len(ncol(x))) {
203
      y <- aggregate(
204
        rep(1, nn), list(sample(
205
          rep(rownames(x), x[, i]), nn
206
        )),
207
        sum
208
      )
209
      na <- names(x)[i]
210
      names(y) <- c("GENEID", na)
211
      rownames(y) <- y$GENEID
212
      z[, na] <- initv
213
      k <- intersect(rownames(z), y$GENEID)
214
      z[k, na] <- y[k, na]
215
      z[is.na(z[, na]), na] <- 0
216
    }
217
    rownames(z) <- as.vector(z$GENEID)
218
    ds <- if (j == 1) z[, -1] else ds + z[, -1]
219
  }
220
  ds <- ds / dsn + .1
221
  return(ds)
222
}
223
224
eval.pred <- function(pred.class, true.class, class1, performance) {
225
  for (index in seq_len(length(pred.class))) {
226
    pred <- pred.class[index]
227
    true <- true.class[index]
228
    if (pred == true && true == class1) {
229
      performance["TP"] <- performance["TP"] + 1
230
    } else if (pred != true && true == class1) {
231
      performance["FN"] <- performance["FN"] + 1
232
    } else if (pred != true && true != class1) {
233
      performance["FP"] <- performance["FP"] + 1
234
    } else if (pred == true && true != class1) {
235
      performance["TN"] <- performance["TN"] + 1
236
    }
237
  }
238
  return(performance)
239
}
240
241
SN <- function(con.mat) {
242
  TP <- con.mat[1, 1]
243
  FN <- con.mat[2, 1]
244
  return(TP / (TP + FN))
245
}
246
247
SP <- function(con.mat) {
248
  TN <- con.mat[2, 2]
249
  FP <- con.mat[1, 2]
250
  return(TN / (TN + FP))
251
}
252
253
ACC <- function(con.mat) {
254
  TP <- con.mat[1, 1]
255
  FN <- con.mat[2, 1]
256
  TN <- con.mat[2, 2]
257
  FP <- con.mat[1, 2]
258
  return((TP + TN) / (TP + FN + TN + FP))
259
}
260
261
MCC <- function(con.mat) {
262
  TP <- con.mat[1, 1]
263
  FN <- con.mat[2, 1]
264
  TN <- con.mat[2, 2]
265
  FP <- con.mat[1, 2]
266
  denom <- sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
267
  denom <- ifelse(denom == 0, NA, denom)
268
  return((TP * TN - FP * FN) / denom)
269
}
270
271
#' @title Reformat Siggenes Table
272
#' @description Reformats the Siggenes table output from the SAMR package
273
#' @param table output from `samr::samr.compute.siggenes.table`
274
#' @seealso replaceDecimals
275
#' @author Waldir Leoncio
276
reformatSiggenes <- function(table) {
277
  if (is.null(table)) {
278
    return(table)
279
  }
280
  table <- as.data.frame(table)
281
  # ==========================================================================
282
  # Replacing decimal separators
283
  # ==========================================================================
284
  table[, "Score(d)"] <- replaceDecimals(table[, "Score(d)"])
285
  table[, "Numerator(r)"] <- replaceDecimals(table[, "Numerator(r)"])
286
  table[, "Denominator(s+s0)"] <- replaceDecimals(table[, "Denominator(s+s0)"])
287
  table[, "Fold Change"] <- replaceDecimals(table[, "Fold Change"])
288
  table[, "q-value(%)"] <- replaceDecimals(table[, "q-value(%)"])
289
  # ==========================================================================
290
  # Changing vector classes
291
  # ==========================================================================
292
  table[, "Row"] <- as.numeric(table[, "Row"])
293
  table[, "Score(d)"] <- as.numeric(table[, "Score(d)"])
294
  table[, "Numerator(r)"] <- as.numeric(table[, "Numerator(r)"])
295
  table[, "Denominator(s+s0)"] <- as.numeric(table[, "Denominator(s+s0)"])
296
  table[, "Fold Change"] <- as.numeric(table[, "Fold Change"])
297
  table[, "q-value(%)"] <- as.numeric(table[, "q-value(%)"])
298
  # ==========================================================================
299
  # Returning output
300
  # ==========================================================================
301
  return(table)
302
}
303
304
#' @title Replace Decimals
305
#' @description Replaces decimals separators between comma and periods on a
306
#' character vector
307
#' @note This function was especially designed to be used with retormatSiggenes
308
#' @param x vector of characters
309
#' @param from decimal separator on input file
310
#' @param to decimal separator for output file
311
#' @seealso reformatSiggenes
312
replaceDecimals <- function(x, from = ",", to = ".") {
313
  x <- gsub(",", ".", x)
314
  return(x)
315
}
316
317
#' @title Prepare Example Dataset
318
#' @description Internal function that prepares a pre-treated dataset for use in
319
#' several examples
320
#' @param dataset Dataset used for transformation
321
#' @param save save results?
322
#' @details This function serves the purpose of treating datasets such as
323
#' valuesG1msReduced to reduce examples of other functions by bypassing some
324
#' analysis steps covered in the vignettes.
325
#' @return Two rda files, ones for K-means clustering and another for
326
#' Model-based clustering.
327
#' @author Waldir Leoncio
328
prepExampleDataset <- function(dataset, save = TRUE) {
329
  # ==========================================================================
330
  # Initial data treatment
331
  # ==========================================================================
332
  message("Treating dataset")
333
  sc <- DISCBIO(dataset)
334
  sc <- NoiseFiltering(
335
    sc,
336
    percentile = 0.9, CV = 0.2, export = FALSE, plot = FALSE, quiet = TRUE
337
  )
338
  sc <- Normalizedata(sc)
339
  sc <- FinalPreprocessing(sc, export = FALSE, quiet = TRUE)
340
  # ==========================================================================
341
  # Clustering
342
  # ==========================================================================
343
  message("K-means clustering")
344
  sc_k <- Clustexp(sc, cln = 3, quiet = TRUE)
345
  sc_k <- comptSNE(sc_k, quiet = TRUE)
346
  valuesG1msReduced_treated_K <- sc_k
347
  message("Model-based clustering")
348
  sc_mb <- Exprmclust(sc, quiet = TRUE)
349
  sc_mb <- comptSNE(sc_mb, rseed = 15555, quiet = TRUE)
350
  valuesG1msReduced_treated_MB <- sc_mb
351
  # ==========================================================================
352
  # Output
353
  # ==========================================================================
354
  message("Saving datasets")
355
  if (save) {
356
    save(
357
      valuesG1msReduced_treated_K,
358
      file = file.path("data", "valuesG1msReduced_treated_K.rda")
359
    )
360
    save(
361
      valuesG1msReduced_treated_MB,
362
      file = file.path("data", "valuesG1msReduced_treated_MB.rda")
363
    )
364
  } else {
365
    message("Not saving dataset because (save == FALSE)")
366
  }
367
}
368
369
#' @title Retries a URL
370
#' @description Retries a URL
371
#' @param data A gene list
372
#' @param species The taxonomy name/id. Default is "9606" for Homo sapiens
373
#' @param outputFormat format of the output. Can be "highres_image", "tsv",
374
#' "json", "tsv-no-header", "xml"
375
#' @param maxRetries maximum number of attempts to connect to the STRING api.
376
#' @param successCode Status code number that represents success
377
#' @return either the output of httr::GET or an error message
378
#' @importFrom httr GET status_code
379
#' @author Waldir Leoncio
380
retrieveURL <- function(
381
    data, species, outputFormat, maxRetries = 3, successCode = 200) {
382
  # ======================================================== #
383
  # Setting up retrieval                                     #
384
  # ======================================================== #
385
  string_api_url <- "https://string-db.org/api/"
386
  method <- "network"
387
  url <- paste0(
388
    string_api_url, outputFormat, "/", method, "?identifiers=",
389
    paste(as.character(data), collapse = "%0d"), "&species=",
390
    species
391
  )
392
393
  # ======================================================== #
394
  # Retrieving URL                                           #
395
  # ======================================================== #
396
  message("Retrieving URL. Please wait...")
397
  repos <- GET(url)
398
  failedGET <- status_code(repos) != successCode
399
  r <- 1
400
  while (failedGET & (r <= maxRetries)) {
401
    message("Failed retrieval. Retry ", r, " out of ", maxRetries, ".")
402
    repos <- GET(url)
403
    failedGET <- status_code(repos) != successCode
404
    r <- r + 1
405
  }
406
407
  # ======================================================== #
408
  # Final output                                             #
409
  # ======================================================== #
410
  if (failedGET) {
411
    stop(
412
      "Unable to retrieve URL. Please check the parameters ",
413
      "passed to the Networking() function, increase the ",
414
      "'maxRetries' parameter or try again later."
415
    )
416
  } else {
417
    message("Successful retrieval.")
418
    return(repos)
419
  }
420
}