Diff of /R/MergeGtexTcga.R [000000] .. [0f2269]

Switch to side-by-side view

--- a
+++ b/R/MergeGtexTcga.R
@@ -0,0 +1,58 @@
+#' Merge gene expression data from GTEx and TCGA datasets
+#'
+#' This function merges gene expression data obtained from the GTEx (Genotype-Tissue Expression) and TCGA (The Cancer Genome Atlas) datasets.
+#' It is assumed that both datasets are in '.rds' format and have genes as row names. The merged dataset is saved as an RDS file at the specified output path.
+#'
+#' @param gtex_data_path A string that specifies the file path to the GTEx data saved in RDS format.
+#' @param tcga_exp_path A string that specifies the file path to the TCGA expression data saved in RDS format.
+#'        This should be a data.frame with rows as genes and columns as samples.
+#' @param output_path A string that specifies the path where the merged dataset should be saved.
+#'        The file is saved in '.rds' format. The default path is "./merged_gtex_tcga_data.rds".
+#'
+#' @details It is assumed that both datasets are in '.rds' format and have genes as row names.
+#'
+#' @return A data frame where rows represent genes and columns represent samples.
+#'         The data frame contains expression values from both GTEx and TCGA datasets.
+#'         It saves the merged dataset to the path specified by 'output_path'.
+#'
+#' @examples
+#' tumor_file <- system.file("extdata",
+#'                           "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
+#'                           package = "TransProR")
+#' Normal_file <- system.file("extdata",
+#'                            "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
+#'                            package = "TransProR")
+#' ouput_file <- file.path(tempdir(), "all_data.rds")
+#'
+#' all_data <- merge_gtex_tcga(gtex_data_path = tumor_file,
+#'                             tcga_exp_path = Normal_file,
+#'                             output_path = ouput_file)
+#'
+#' @note CRITICAL: The 'output_path' parameter must end with '.rds' to be properly recognized by the function. It is also highly recommended
+#'       that the path includes specific identifiers related to the target samples. Please structure the 'output_path' following this pattern: './your_directory/merged.your_sample_type.gtex.tcga.data.rds'.
+#'
+#' @importFrom tibble column_to_rownames
+#' @export
+merge_gtex_tcga <- function(gtex_data_path,
+                            tcga_exp_path,
+                            output_path = "./merged_gtex_tcga_data.rds") {
+
+  # Load the GTEx data
+  gtex_data <- readRDS(gtex_data_path)
+  message("Number of GTEx samples:", ncol(gtex_data), "\n")
+
+  # Load the TCGA data
+  tcga.exp <- readRDS(tcga_exp_path)
+  message("Number of TCGA samples:", ncol(tcga.exp), "\n")
+
+  # Merge the datasets, ensuring both have genes as row names
+  all_data <- merge(gtex_data, tcga.exp, by = "row.names")
+  all_data <- tibble::column_to_rownames(all_data, var = "Row.names")  # Set the row names
+
+  message("Number of samples after merging:", ncol(all_data), "\n")
+
+  # Save the merged dataset
+  saveRDS(all_data, file = output_path)
+
+  return(all_data)
+}