--- a
+++ b/R/XenaHub-class.R
@@ -0,0 +1,376 @@
+# FUN: Set Class & operate XenaHub object directly
+
+##' Class XenaHub
+##' @description  a S4 class to represent UCSC Xena Data Hubs
+##' @slot hosts hosts of data hubs
+##' @slot cohorts cohorts of data hubs
+##' @slot datasets datasets of data hubs
+##' @importFrom methods new
+##' @export
+.XenaHub <- setClass(
+  "XenaHub",
+  representation = representation(
+    hosts = "character",
+    cohorts = "character",
+    datasets = "character"
+  )
+)
+
+setMethod("show", "XenaHub", function(object) {
+  showsome <- function(label, x) {
+    len <- length(x)
+    if (len > 6) {
+      x <- c(head(x, 3), "...", tail(x, 2))
+    }
+    cat(label,
+      "() (",
+      len,
+      " total):",
+      "\n  ",
+      paste0(x, collapse = "\n  "),
+      "\n",
+      sep = ""
+    )
+  }
+  cat("class:", class(object), "\n")
+  cat("hosts():",
+    "\n  ", paste0(hosts(object), collapse = "\n  "),
+    "\n",
+    sep = ""
+  )
+  showsome("cohorts", cohorts(object))
+  showsome("datasets", datasets(object))
+})
+
+
+##' @title UCSC Xena Default Hosts
+##' @description Return Xena default hosts
+##' @return A character vector include current defalut hosts
+##' @author Shixiang Wang <w_shixiang@163.com>
+##' @seealso [UCSCXenaTools::XenaHub()]
+##' @export
+xena_default_hosts <- function() {
+  c(
+    "https://ucscpublic.xenahubs.net",
+    "https://tcga.xenahubs.net",
+    "https://gdc.xenahubs.net",
+    "https://gdcV18.xenahubs.net",
+    "https://icgc.xenahubs.net",
+    "https://toil.xenahubs.net",
+    "https://pancanatlas.xenahubs.net",
+    "https://xena.treehouse.gi.ucsc.edu:443",
+    "https://pcawg.xenahubs.net",
+    "https://atacseq.xenahubs.net",
+    "https://singlecellnew.xenahubs.net",
+    "https://kidsfirst.xenahubs.net",
+    "https://tdi.xenahubs.net"
+  )
+}
+
+.xena_hosts <- c(
+  "publicHub",
+  "tcgaHub",
+  "gdcHub",
+  "gdcHubV18",
+  "icgcHub",
+  "toilHub",
+  "pancanAtlasHub",
+  "treehouseHub",
+  "pcawgHub",
+  "atacseqHub",
+  "singlecellHub",
+  "kidsfirstHub",
+  "tdiHub"
+)
+
+names(.xena_hosts) <- xena_default_hosts()
+
+# Add Hiplot mirror url
+# Still use UCSC Xena URL if it is not available
+.xena_hosts_hiplot <- .xena_hosts
+names(.xena_hosts_hiplot) <- c(
+  "https://xena-ucscpublic.hiplot.com.cn",
+  "https://xena-tcga.hiplot.com.cn",
+  "https://xena-gdc.hiplot.com.cn",
+  "https://xena-icgc.hiplot.com.cn",
+  "https://xena-toil.hiplot.com.cn",
+  "https://xena-pancanatlas.hiplot.com.cn",
+  "https://xena.treehouse.gi.ucsc.edu:443", #!
+  "https://xena-pcawg.hiplot.com.cn",
+  "https://xena-atacseq.hiplot.com.cn",
+  "https://singlecellnew.xenahubs.net", #!
+  "https://kidsfirst.xenahubs.net" #!
+  #"https://tdi.xenahubs.net" #!
+)
+# Map hiplot to ucsc
+.xena_mirror_map <- names(.xena_hosts)
+names(.xena_mirror_map) <- names(.xena_hosts_hiplot)
+# Map ucsc to hiplot
+.xena_mirror_map_rv <- names(.xena_hosts_hiplot)
+names(.xena_mirror_map_rv) <- names(.xena_hosts)
+
+##' Generate a XenaHub Object
+##'
+##' It is used to generate original
+##' `XenaHub` object according to hosts, cohorts, datasets or hostName.
+##' If these arguments not specified, all hosts and corresponding datasets
+##' will be returned as a `XenaHub` object. All datasets can be found
+##' at <https://xenabrowser.net/datapages/>.
+##'
+##'
+##' @param hosts a character vector specify UCSC Xena hosts, all available hosts can be
+##' found by `xena_default_hosts()` function. `hostName` is a more recommend option.
+##' @param cohorts default is empty character vector, all cohorts will be returned.
+##' @param datasets default is empty character vector, all datasets will be returned.
+##' @param hostName name of host, available options can be accessed by `.xena_hosts`
+##'  This is an easier option for user than `hosts` option. Note, this option
+##'  will overlap `hosts`.
+##' @return a [XenaHub] object
+##' @author Shixiang Wang <w_shixiang@163.com>
+##' @export
+##' @importFrom httr stop_for_status POST content
+##' @importFrom utils head tail
+##' @examples
+##' \dontrun{
+##' #1 query all hosts, cohorts and datasets
+##' xe = XenaHub()
+##' xe
+##' #2 query only TCGA hosts
+##' xe = XenaHub(hostName = "tcgaHub")
+##' xe
+##' hosts(xe)     # get hosts
+##' cohorts(xe)   # get cohorts
+##' datasets(xe)  # get datasets
+##' samples(xe)   # get samples
+##' }
+XenaHub <- function(hosts = xena_default_hosts(),
+                    cohorts = character(),
+                    datasets = character(),
+                    hostName = c(
+                      "publicHub",
+                      "tcgaHub",
+                      "gdcHub",
+                      "gdcHubV18",
+                      "icgcHub",
+                      "toilHub",
+                      "pancanAtlasHub",
+                      "treehouseHub",
+                      "pcawgHub",
+                      "atacseqHub",
+                      "singlecellHub",
+                      "kidsfirstHub",
+                      "tdiHub"
+                    )) {
+  stopifnot(
+    is.character(hosts),
+    is.character(cohorts),
+    is.character(datasets)
+  )
+
+  hostName <- unique(hostName)
+
+  if (length(hostName) != length(.xena_hosts) &
+    all(
+      hostName %in% .xena_hosts
+    )) {
+    .temp <- names(.xena_hosts)
+    names(.temp) <- .xena_hosts
+    hostNames <- .temp %>%
+      as.data.frame() %>%
+      t() %>%
+      as.data.frame()
+    rm(.temp)
+
+    hosts <- as.character(hostNames[, hostName])
+  } else if (!all(hostName %in% .xena_hosts)) {
+    stop("Bad hostName, please check")
+  }
+
+  if (is.null(names(hosts))) {
+    names(hosts) <- hosts
+  }
+
+  hosts0 <- hosts
+  hosts <- Filter(.host_is_alive, hosts)
+  if (length(hosts) == 0L) { # nocov start
+    stop(
+      "\n  no hosts responding:",
+      "\n    ",
+      paste0(hosts0, collapse = "\n  ")
+    )
+  } # nocov end
+
+  all_cohorts <- unlist(.host_cohorts(hosts), use.names = FALSE)
+  if (length(cohorts) == 0L) {
+    cohorts <- all_cohorts
+  } else {
+    hosts <- hosts[.cohort_datasets_count(hosts, cohorts) != 0L]
+  }
+
+  all_datasets <- unlist(.cohort_datasets(hosts, cohorts),
+    use.names = FALSE
+  )
+  if (length(datasets) == 0L) {
+    datasets <- all_datasets
+  } else {
+    if (!all(datasets %in% all_datasets)) { # nocov start
+      bad_dataset <- datasets[!datasets %in% all_datasets]
+      message("Following datasets are not in datasets of hosts, ignore them...")
+      message(bad_dataset)
+    } # nocov end
+    datasets <- all_datasets[all_datasets %in% datasets]
+  }
+
+
+  .XenaHub(
+    hosts = hosts,
+    cohorts = cohorts,
+    datasets = datasets
+  )
+}
+
+##' Get or Update Newest Data Information of UCSC Xena Data Hubs
+##' @param saveTolocal logical. Whether save to local R package data directory for permanent use
+##' or Not.
+##' @return a `data.frame` contains all datasets information of Xena.
+##' @author Shixiang Wang <w_shixiang@163.com>
+##' @export
+##' @examples
+##' \dontrun{
+##' XenaDataUpdate()
+##' XenaDataUpdate(saveTolocal = TRUE)
+##' }
+XenaDataUpdate <- function(saveTolocal = TRUE) { # nocov start
+  # .p_all_cohorts(list(unique(XenaData$XenaHosts)[10]), exclude = list(NULL))
+  # .p_dataset_list(list(XenaData$XenaHosts[1]), list(XenaData$XenaCohorts[1]))
+  message("Disable hiplot firstly.")
+  options(use_hiplot = FALSE)
+  try_query = function(h, max_try = 3L) {
+    Sys.sleep(0.1)
+    tryCatch(
+      {
+        message("==> Trying #", abs(max_try - 4L))
+        .p_all_cohorts(list(h), exclude = list(NULL))
+      },
+      error = function(e) {
+        if (max_try == 1) {
+          warning("Tried 3 times but failed, this hub may down or please check URL or your internet connection!", immediate. = TRUE)
+          return(NULL)
+        } else {
+          try_query(h, max_try - 1L)
+        }
+      }
+    )
+  }
+
+  query_host = function(h) {
+      message("==> Searching cohorts for host ", h, "...")
+
+      chs <- try_query(h, max_try = 3L)
+      if (is.null(chs)) {
+          return(NULL)
+      }
+      chs <- setdiff(chs, "(unassigned)")
+      message("===> #", length(chs), " cohorts found.")
+      message("===> Querying datasets info...")
+      zz <- lapply(chs, function(x, h) {
+          .p_dataset_list(list(h), list(x))
+      }, h = h) %>%
+          stats::setNames(chs) %>%
+          dplyr::bind_rows(.id = "XenaCohorts")
+      message("===> #", nrow(zz), " datasets found.")
+      message("==> Done for host ", h, "...")
+      zz
+  }
+
+  message("=> Obtaining info from UCSC Xena hubs...")
+  XenaInfo <- lapply(names(.xena_hosts), query_host) %>%
+    stats::setNames(names(.xena_hosts)) %>%
+    dplyr::bind_rows(.id = "XenaHosts")
+
+  message("=> Done for obtaining.")
+  message("=> Parsing datasets metadata...")
+
+  XenaInfo <- XenaInfo %>%
+    dplyr::rename(
+      XenaDatasets = .data$name,
+      SampleCount = .data$count,
+      DataSubtype = .data$datasubtype,
+      Type = .data$type,
+      LongTitle = .data$longtitle,
+      ProbeMap = .data$probemap
+    ) %>%
+    dplyr::mutate(XenaHostNames = .xena_hosts[.data$XenaHosts])
+
+  j_data <- lapply(XenaInfo$text, function(x) {
+    # decode metadata from json format
+    # note json data may have different elements for
+    #                different cohort datasets
+    # more work need to be done here
+    #
+    # tt$text contains metadata for dataset
+    # tt$pmtext contains metadata for probemap
+    json_df <- jsonlite::parse_json(x)
+    dplyr::tibble(
+      Citation = json_df[["citation"]] %||% NA,
+      Label = json_df[["label"]] %||% NA,
+      Tags = .collapse_list(json_df[["tags"]]) %||% NA,
+      AnatomicalOrigin = .collapse_list(json_df[["anatomical_origin"]]) %||% NA,
+      SampleType = .collapse_list(json_df[["sample_type"]]) %||% NA,
+      Version = json_df[["version"]] %||% NA,
+      PrimaryDisease = json_df[["primary_disease"]] %||% NA,
+      Platform = json_df[["platform"]] %||% NA,
+      Unit = json_df[["unit"]] %||% NA
+    )
+  })
+
+  message("=> Done for parsing. Tidying...")
+
+  tidy_data <- dplyr::bind_rows(j_data)
+  XenaData <- dplyr::bind_cols(XenaInfo, tidy_data)
+  XenaData <- dplyr::as_tibble(XenaData)
+
+  XenaData <- XenaData %>%
+    dplyr::select(
+      c(
+        "XenaHosts", "XenaHostNames", "XenaCohorts", "XenaDatasets", "SampleCount",
+        "DataSubtype", "Label", "Type", "AnatomicalOrigin", "SampleType",
+        "Tags", "ProbeMap", "LongTitle", "Citation", "Version",
+        "Unit", "Platform"
+      )
+    )
+
+  message("=> Tidying done.")
+
+  if (saveTolocal) {
+    message("=> Saving...")
+    data_dir <- base::system.file("data", package = "UCSCXenaTools")
+    if (dir.exists(data_dir)) {
+      save(XenaData, file = file.path(data_dir, "XenaData.rda"), compress = "xz")
+    } else {
+      message("There is no data directory ", data_dir)
+      message("Please check it.")
+    }
+  }
+  message("=> Done.")
+  XenaData
+} # nocov end
+
+.collapse_list <- function(x) {
+  sapply(x, function(x) x) %>% paste0(collapse = ",")
+}
+
+`%||%` <- function(x, y) {
+  # ifelse(is.null(x), y, x)
+  if (is.null(x)) {
+    y
+  } else {
+    x
+  }
+}
+
+utils::globalVariables(c(
+  ".p_dataset_metadata",
+  ".p_all_cohorts",
+  ".p_dataset_list"
+))