Diff of /R/CombatTumor.R [000000] .. [0f2269]

Switch to unified view

a b/R/CombatTumor.R
1
#' Process and Correct Batch Effects in Tumor Data
2
#'
3
#' This function takes a tumor data set, asks the user for specific tumor types to retain,
4
#' and then corrects for batch effects using the ComBat_seq function from the 'sva' package.
5
#'
6
#' @description
7
#' The function first extracts histological types from the provided tumor data set.
8
#' After displaying these types, the user is prompted to input specific types to retain.
9
#' The data is then filtered based on this input.
10
#'
11
#' Note: This example assumes that different tumor types represent different batches in a general sense.
12
#' Users need to adjust the batch and group vectors based on real-life scenarios.
13
#'
14
#' @param tumor_data_path The path to the tumor data stored in an RDS file.
15
#' @param CombatTumor_output_path A character string specifying the path where the output RDS file will be saved.
16
#' @param auto_mode Logical. If set to TRUE, the function will not prompt the user for input and
17
#'                  will instead use the values provided in default_input. Default is FALSE.
18
#' @param default_input Character string. When auto_mode is TRUE, this parameter specifies the default
19
#'                      tumor types to be retained. It should be provided as a comma-separated string (e.g., "01,06").
20
#'
21
#' @return A data.frame with corrected values after the ComBat_seq adjustment. Note that this function also saves the
22
#'         combat_count_df data as an RDS file at the specified output path.
23
#'
24
#' @details
25
#' The ComBat_seq function from the sva package is used to correct batch effects.
26
#' The function requires the 'sva' package to be installed and loaded externally.
27
#'
28
#' @examples
29
#' tumor_file <- system.file("extdata",
30
#'                           "SKCM_Skin_TCGA_exp_tumor_test.rds",
31
#'                           package = "TransProR")
32
#' output_file <- file.path(tempdir(), "SKCM_combat_count.rds")
33
#'
34
#'   SKCM_combat_count <- combat_tumor(
35
#'   tumor_data_path = tumor_file,
36
#'   CombatTumor_output_path = output_file,
37
#'   auto_mode = TRUE,
38
#'   default_input = "01,06"
39
#' )
40
#'
41
#' head(SKCM_combat_count)[1:5, 1:5]
42
#'
43
#' @seealso \code{\link[sva]{ComBat_seq}}
44
#' @importFrom sva ComBat_seq
45
#' @export
46
combat_tumor <- function(tumor_data_path, CombatTumor_output_path, auto_mode = FALSE, default_input = "01,06") {
47
48
  # Load the tumor data
49
  tumor_data <- readRDS(tumor_data_path)
50
51
  # Extract histological types
52
  TumorHistologicalTypes <- substring(colnames(tumor_data), 14, 15)
53
  tumor_hist_table <- table(TumorHistologicalTypes)
54
55
  # Display the table to the user
56
  message(" ")
57
  message("TumorHistologicalTypes:")
58
  message(paste(names(tumor_hist_table), tumor_hist_table, sep = ": ", collapse = "\n"))
59
  # Add a space after the output for separation
60
  message(" ")
61
62
  # Ask the user for input or use default input in auto_mode
63
  if(auto_mode) {
64
    selected_types <- strsplit(default_input, ",")[[1]]
65
  } else {
66
    message("Please input the tumor types you wish to retain, separated by commas (e.g., 01,06): ")
67
    selected_types <- strsplit(readline(), ",")[[1]]
68
  }
69
70
  # Filter the tumor data based on user's input
71
  tumor <- tumor_data[, TumorHistologicalTypes %in% selected_types]
72
73
  # Modify the tumor values
74
  tumor1 <- 2^(tumor) - 1
75
  tumor1 <- apply(tumor1, 2, as.integer)
76
  rownames(tumor1) <- rownames(tumor)
77
78
  # If only one sample type is chosen, skip batch correction and return modified tumor data
79
  if(length(selected_types) == 1) {
80
    combat_count_df <- as.data.frame(tumor1)
81
  } else {
82
    # Create group vector
83
    selected_group = rep("all_group", length(which(TumorHistologicalTypes %in% selected_types)))
84
85
    # Create batch vector based on group vector
86
    selected_batch = match(TumorHistologicalTypes[TumorHistologicalTypes %in% selected_types], selected_types)
87
88
    # Correct for batch effects using ComBat_seq
89
    combat_count <- sva::ComBat_seq(as.matrix(tumor1),
90
                                    batch = selected_batch,
91
                                    group = selected_group)
92
93
    # Convert matrix to data frame
94
    combat_count_df <- as.data.frame(combat_count)
95
  }
96
97
  saveRDS(combat_count_df, file = CombatTumor_output_path)
98
99
  return(combat_count_df)
100
}