|
a |
|
b/R/CombatTumor.R |
|
|
1 |
#' Process and Correct Batch Effects in Tumor Data |
|
|
2 |
#' |
|
|
3 |
#' This function takes a tumor data set, asks the user for specific tumor types to retain, |
|
|
4 |
#' and then corrects for batch effects using the ComBat_seq function from the 'sva' package. |
|
|
5 |
#' |
|
|
6 |
#' @description |
|
|
7 |
#' The function first extracts histological types from the provided tumor data set. |
|
|
8 |
#' After displaying these types, the user is prompted to input specific types to retain. |
|
|
9 |
#' The data is then filtered based on this input. |
|
|
10 |
#' |
|
|
11 |
#' Note: This example assumes that different tumor types represent different batches in a general sense. |
|
|
12 |
#' Users need to adjust the batch and group vectors based on real-life scenarios. |
|
|
13 |
#' |
|
|
14 |
#' @param tumor_data_path The path to the tumor data stored in an RDS file. |
|
|
15 |
#' @param CombatTumor_output_path A character string specifying the path where the output RDS file will be saved. |
|
|
16 |
#' @param auto_mode Logical. If set to TRUE, the function will not prompt the user for input and |
|
|
17 |
#' will instead use the values provided in default_input. Default is FALSE. |
|
|
18 |
#' @param default_input Character string. When auto_mode is TRUE, this parameter specifies the default |
|
|
19 |
#' tumor types to be retained. It should be provided as a comma-separated string (e.g., "01,06"). |
|
|
20 |
#' |
|
|
21 |
#' @return A data.frame with corrected values after the ComBat_seq adjustment. Note that this function also saves the |
|
|
22 |
#' combat_count_df data as an RDS file at the specified output path. |
|
|
23 |
#' |
|
|
24 |
#' @details |
|
|
25 |
#' The ComBat_seq function from the sva package is used to correct batch effects. |
|
|
26 |
#' The function requires the 'sva' package to be installed and loaded externally. |
|
|
27 |
#' |
|
|
28 |
#' @examples |
|
|
29 |
#' tumor_file <- system.file("extdata", |
|
|
30 |
#' "SKCM_Skin_TCGA_exp_tumor_test.rds", |
|
|
31 |
#' package = "TransProR") |
|
|
32 |
#' output_file <- file.path(tempdir(), "SKCM_combat_count.rds") |
|
|
33 |
#' |
|
|
34 |
#' SKCM_combat_count <- combat_tumor( |
|
|
35 |
#' tumor_data_path = tumor_file, |
|
|
36 |
#' CombatTumor_output_path = output_file, |
|
|
37 |
#' auto_mode = TRUE, |
|
|
38 |
#' default_input = "01,06" |
|
|
39 |
#' ) |
|
|
40 |
#' |
|
|
41 |
#' head(SKCM_combat_count)[1:5, 1:5] |
|
|
42 |
#' |
|
|
43 |
#' @seealso \code{\link[sva]{ComBat_seq}} |
|
|
44 |
#' @importFrom sva ComBat_seq |
|
|
45 |
#' @export |
|
|
46 |
combat_tumor <- function(tumor_data_path, CombatTumor_output_path, auto_mode = FALSE, default_input = "01,06") { |
|
|
47 |
|
|
|
48 |
# Load the tumor data |
|
|
49 |
tumor_data <- readRDS(tumor_data_path) |
|
|
50 |
|
|
|
51 |
# Extract histological types |
|
|
52 |
TumorHistologicalTypes <- substring(colnames(tumor_data), 14, 15) |
|
|
53 |
tumor_hist_table <- table(TumorHistologicalTypes) |
|
|
54 |
|
|
|
55 |
# Display the table to the user |
|
|
56 |
message(" ") |
|
|
57 |
message("TumorHistologicalTypes:") |
|
|
58 |
message(paste(names(tumor_hist_table), tumor_hist_table, sep = ": ", collapse = "\n")) |
|
|
59 |
# Add a space after the output for separation |
|
|
60 |
message(" ") |
|
|
61 |
|
|
|
62 |
# Ask the user for input or use default input in auto_mode |
|
|
63 |
if(auto_mode) { |
|
|
64 |
selected_types <- strsplit(default_input, ",")[[1]] |
|
|
65 |
} else { |
|
|
66 |
message("Please input the tumor types you wish to retain, separated by commas (e.g., 01,06): ") |
|
|
67 |
selected_types <- strsplit(readline(), ",")[[1]] |
|
|
68 |
} |
|
|
69 |
|
|
|
70 |
# Filter the tumor data based on user's input |
|
|
71 |
tumor <- tumor_data[, TumorHistologicalTypes %in% selected_types] |
|
|
72 |
|
|
|
73 |
# Modify the tumor values |
|
|
74 |
tumor1 <- 2^(tumor) - 1 |
|
|
75 |
tumor1 <- apply(tumor1, 2, as.integer) |
|
|
76 |
rownames(tumor1) <- rownames(tumor) |
|
|
77 |
|
|
|
78 |
# If only one sample type is chosen, skip batch correction and return modified tumor data |
|
|
79 |
if(length(selected_types) == 1) { |
|
|
80 |
combat_count_df <- as.data.frame(tumor1) |
|
|
81 |
} else { |
|
|
82 |
# Create group vector |
|
|
83 |
selected_group = rep("all_group", length(which(TumorHistologicalTypes %in% selected_types))) |
|
|
84 |
|
|
|
85 |
# Create batch vector based on group vector |
|
|
86 |
selected_batch = match(TumorHistologicalTypes[TumorHistologicalTypes %in% selected_types], selected_types) |
|
|
87 |
|
|
|
88 |
# Correct for batch effects using ComBat_seq |
|
|
89 |
combat_count <- sva::ComBat_seq(as.matrix(tumor1), |
|
|
90 |
batch = selected_batch, |
|
|
91 |
group = selected_group) |
|
|
92 |
|
|
|
93 |
# Convert matrix to data frame |
|
|
94 |
combat_count_df <- as.data.frame(combat_count) |
|
|
95 |
} |
|
|
96 |
|
|
|
97 |
saveRDS(combat_count_df, file = CombatTumor_output_path) |
|
|
98 |
|
|
|
99 |
return(combat_count_df) |
|
|
100 |
} |