|
a |
|
b/R/MergeGtexTcga.R |
|
|
1 |
#' Merge gene expression data from GTEx and TCGA datasets |
|
|
2 |
#' |
|
|
3 |
#' This function merges gene expression data obtained from the GTEx (Genotype-Tissue Expression) and TCGA (The Cancer Genome Atlas) datasets. |
|
|
4 |
#' It is assumed that both datasets are in '.rds' format and have genes as row names. The merged dataset is saved as an RDS file at the specified output path. |
|
|
5 |
#' |
|
|
6 |
#' @param gtex_data_path A string that specifies the file path to the GTEx data saved in RDS format. |
|
|
7 |
#' @param tcga_exp_path A string that specifies the file path to the TCGA expression data saved in RDS format. |
|
|
8 |
#' This should be a data.frame with rows as genes and columns as samples. |
|
|
9 |
#' @param output_path A string that specifies the path where the merged dataset should be saved. |
|
|
10 |
#' The file is saved in '.rds' format. The default path is "./merged_gtex_tcga_data.rds". |
|
|
11 |
#' |
|
|
12 |
#' @details It is assumed that both datasets are in '.rds' format and have genes as row names. |
|
|
13 |
#' |
|
|
14 |
#' @return A data frame where rows represent genes and columns represent samples. |
|
|
15 |
#' The data frame contains expression values from both GTEx and TCGA datasets. |
|
|
16 |
#' It saves the merged dataset to the path specified by 'output_path'. |
|
|
17 |
#' |
|
|
18 |
#' @examples |
|
|
19 |
#' tumor_file <- system.file("extdata", |
|
|
20 |
#' "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds", |
|
|
21 |
#' package = "TransProR") |
|
|
22 |
#' Normal_file <- system.file("extdata", |
|
|
23 |
#' "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds", |
|
|
24 |
#' package = "TransProR") |
|
|
25 |
#' ouput_file <- file.path(tempdir(), "all_data.rds") |
|
|
26 |
#' |
|
|
27 |
#' all_data <- merge_gtex_tcga(gtex_data_path = tumor_file, |
|
|
28 |
#' tcga_exp_path = Normal_file, |
|
|
29 |
#' output_path = ouput_file) |
|
|
30 |
#' |
|
|
31 |
#' @note CRITICAL: The 'output_path' parameter must end with '.rds' to be properly recognized by the function. It is also highly recommended |
|
|
32 |
#' that the path includes specific identifiers related to the target samples. Please structure the 'output_path' following this pattern: './your_directory/merged.your_sample_type.gtex.tcga.data.rds'. |
|
|
33 |
#' |
|
|
34 |
#' @importFrom tibble column_to_rownames |
|
|
35 |
#' @export |
|
|
36 |
merge_gtex_tcga <- function(gtex_data_path, |
|
|
37 |
tcga_exp_path, |
|
|
38 |
output_path = "./merged_gtex_tcga_data.rds") { |
|
|
39 |
|
|
|
40 |
# Load the GTEx data |
|
|
41 |
gtex_data <- readRDS(gtex_data_path) |
|
|
42 |
message("Number of GTEx samples:", ncol(gtex_data), "\n") |
|
|
43 |
|
|
|
44 |
# Load the TCGA data |
|
|
45 |
tcga.exp <- readRDS(tcga_exp_path) |
|
|
46 |
message("Number of TCGA samples:", ncol(tcga.exp), "\n") |
|
|
47 |
|
|
|
48 |
# Merge the datasets, ensuring both have genes as row names |
|
|
49 |
all_data <- merge(gtex_data, tcga.exp, by = "row.names") |
|
|
50 |
all_data <- tibble::column_to_rownames(all_data, var = "Row.names") # Set the row names |
|
|
51 |
|
|
|
52 |
message("Number of samples after merging:", ncol(all_data), "\n") |
|
|
53 |
|
|
|
54 |
# Save the merged dataset |
|
|
55 |
saveRDS(all_data, file = output_path) |
|
|
56 |
|
|
|
57 |
return(all_data) |
|
|
58 |
} |