Diff of /R/getPINSPlus.R [000000] .. [494cbf]

Switch to unified view

a b/R/getPINSPlus.R
1
#' @name getPINSPlus
2
#' @title Get subtypes from PINSPlus
3
#' @description This function wraps the PINSPlus (Perturbation Clustering for data INtegration and disease Subtyping) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
4
#' @param data List of matrices.
5
#' @param N.clust Number of clusters
6
#' @param clusteringMethod The name of built-in clustering algorithm that PerturbationClustering will use. Currently supported algorithm are kmeans, pam and hclust. Default value is "kmeans".
7
#' @param iterMin The minimum number of iterations. Default value is 50
8
#' @param iterMax The maximum number of iterations. Default value is 500.
9
#' @param norMethod A string vector indicate the normalization method for consensus clustering.
10
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
11
#' @return A list with the following components:
12
#'
13
#'         \code{fit}       an object returned by \link[PINSPlus]{PerturbationClustering}.
14
#'
15
#'         \code{clust.res} a data.frame storing sample ID and corresponding clusters.
16
#'
17
#'         \code{mo.method} a string value indicating the method used for multi-omics integrative clustering.
18
#' @export
19
#' @examples # There is no example and please refer to vignette.
20
#' @importFrom PINSPlus PerturbationClustering
21
#' @importFrom dplyr %>%
22
#' @references Nguyen H, Shrestha S, Draghici S, Nguyen T (2019). PINSPlus: a tool for tumor subtype discovery in integrated genomic data. Bioinformatics, 35(16):2843-2846.
23
getPINSPlus <- function(data             = NULL,
24
                        N.clust          = NULL,
25
                        type             = rep("gaussian", length(data)),
26
                        norMethod        = "none",
27
                        clusteringMethod = "kmeans",
28
                        iterMin          = 50,
29
                        iterMax          = 500){
30
31
  # check data
32
  n_dat <- length(data)
33
  if(n_dat > 6){
34
    stop('current verision of MOVICS can support up to 6 datasets.')
35
  }
36
  if(n_dat < 2){
37
    stop('current verision of MOVICS needs at least 2 omics data.')
38
  }
39
40
  useless.argument <- type
41
  if(is.null(norMethod)) {
42
    d <- do.call(rbind, data)
43
  } else {
44
    if(!is.element(norMethod, c("median-centered","mean-centered","z-score","none"))) {
45
      stop("the normalized method should be one of median-centered, mean-centered, z-score or none!")
46
    }
47
    if(norMethod == "median-centered") {
48
      d <- do.call(rbind, data)
49
      d <- sweep(d,1, apply(d, 1, median, na.rm = TRUE))
50
    }
51
    if(norMethod == "mean-centered") {
52
      d <- do.call(rbind, data)
53
      d <- sweep(d,1, apply(d, 1, mean, na.rm = TRUE))
54
    }
55
    if(norMethod == "z-score") {
56
      d <- do.call(rbind, data)
57
      d <- t(scale(t(d)))
58
    }
59
    if(norMethod == "none") {
60
      d <- do.call(rbind, data)
61
    }
62
  }
63
64
  if(!is.element(clusteringMethod, c("kmeans", "hclust", "pam"))) {
65
    stop("clusteringMethod should be one of kmeans, hclust, or pam!")
66
  }
67
68
  data <- t(d)
69
70
  # for multi-omics but cannot determine cluster number
71
  # fit <- SubtypingOmicsData(data,
72
  #                           kMin = N.clust,
73
  #                           kMax = N.clust,
74
  #                           clusteringMethod = clusteringMethod,
75
  #                           iterMin = iterMin,
76
  #                           iterMax = iterMax,
77
  #                           verbose = T)
78
79
  # for one "feature" but can determine cluster number
80
  fit <- PerturbationClustering(data             = data,
81
                                kMin             = N.clust,
82
                                kMax             = N.clust,
83
                                clusteringMethod = clusteringMethod,
84
                                iterMin          = iterMin,
85
                                iterMax          = iterMax,
86
                                verbose          = TRUE)
87
88
  clustres <- data.frame(samID = rownames(data),
89
                         clust = fit$cluster,
90
                         row.names = rownames(data),
91
                         stringsAsFactors = FALSE)
92
  #clustres <- clustres[order(clustres$clust),]
93
94
  return(list(fit = fit, clust.res = clustres, mo.method = "PINSPlus"))
95
}