--- a +++ b/R/CompareMerge.R @@ -0,0 +1,74 @@ +#' Compare and merge specific columns from two DEG data frames +#' +#' This function takes two DEG data frames, inner joins them by a specified gene column, +#' checks if a specified column is identical across both data frames, and merges them if they are. +#' The resulting data frame will have a merged column named after the compared column. +#' +#' @importFrom dplyr inner_join +#' @param df1 First data frame. +#' @param df2 Second data frame. +#' @param by_gene Column name by which to join the data frames, typically "Gene". +#' @param compare_col Column to compare for identity, which will also be the name of the merged column. +#' @param suffixes Suffixes to use for non-identical column names in the joined data frame. +#' @param df_name Name to assign to the resulting data frame for identification. +#' @return A data frame with processed columns. +#' @examples +#' # Create simulated DESeq2 data +#' DEG_deseq2 <- data.frame( +#' Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"), +#' change = c("up", "down", "no_change", "up", "down"), +#' log2FoldChange = c(2.5, -3.2, 0.1, 1.8, -2.5), +#' pvalue = c(0.01, 0.05, 0.9, 0.02, 0.03) +#' ) +#' +#' # Display the first 5 rows of the DESeq2 data +#' head(DEG_deseq2, 5) +#' +#' # Create simulated edgeR data +#' DEG_edgeR <- data.frame( +#' Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"), +#' change = c("up", "down", "no_change", "no_change", "up"), +#' log2FoldChange = c(2.3, -3.1, 0.2, 0.1, 2.7), +#' pvalue = c(0.02, 0.04, 0.8, 0.6, 0.01) +#' ) +#' +#' # Display the first 5 rows of the edgeR data +#' head(DEG_edgeR, 5) +#' +#' # Merge the DESeq2 and edgeR data +#' deseq2_edgeR <- compare_merge( +#' df1 = DEG_deseq2, +#' df2 = DEG_edgeR, +#' by_gene = "Gene", +#' compare_col = "change", +#' suffixes = c("_1", "_2"), +#' df_name = "deseq2_edgeR" +#' ) +#' +#' @export +compare_merge <- function(df1, df2, by_gene, compare_col, suffixes, df_name) { + # Perform an inner join on the 'Gene' column + merged_df <- dplyr::inner_join(df1, df2, by = by_gene, suffix = suffixes) + + # Generate column names for comparison + col1 <- paste0(compare_col, suffixes[1]) + col2 <- paste0(compare_col, suffixes[2]) + + # Check if the specified columns are identical + if (all(merged_df[[col1]] == merged_df[[col2]])) { + # If completely identical, merge these columns into one and rename + merged_df[[compare_col]] <- merged_df[[col1]] + # Remove original compared columns + merged_df[[col1]] <- NULL + merged_df[[col2]] <- NULL + } else { + # Handle non-identical case + message("The columns", col1, "and", col2, "are not identical.\n") + } + + # Assign the specified name for identification + merged_df$name <- df_name + + # Return the modified data frame + return(merged_df) +}