Diff of /R/CompareMerge.R [000000] .. [0f2269]

Switch to unified view

a b/R/CompareMerge.R
1
#' Compare and merge specific columns from two DEG data frames
2
#'
3
#' This function takes two DEG data frames, inner joins them by a specified gene column,
4
#' checks if a specified column is identical across both data frames, and merges them if they are.
5
#' The resulting data frame will have a merged column named after the compared column.
6
#'
7
#' @importFrom dplyr inner_join
8
#' @param df1 First data frame.
9
#' @param df2 Second data frame.
10
#' @param by_gene Column name by which to join the data frames, typically "Gene".
11
#' @param compare_col Column to compare for identity, which will also be the name of the merged column.
12
#' @param suffixes Suffixes to use for non-identical column names in the joined data frame.
13
#' @param df_name Name to assign to the resulting data frame for identification.
14
#' @return A data frame with processed columns.
15
#' @examples
16
#' # Create simulated DESeq2 data
17
#' DEG_deseq2 <- data.frame(
18
#'   Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"),
19
#'   change = c("up", "down", "no_change", "up", "down"),
20
#'   log2FoldChange = c(2.5, -3.2, 0.1, 1.8, -2.5),
21
#'   pvalue = c(0.01, 0.05, 0.9, 0.02, 0.03)
22
#' )
23
#'
24
#' # Display the first 5 rows of the DESeq2 data
25
#' head(DEG_deseq2, 5)
26
#'
27
#' # Create simulated edgeR data
28
#' DEG_edgeR <- data.frame(
29
#'   Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"),
30
#'   change = c("up", "down", "no_change", "no_change", "up"),
31
#'   log2FoldChange = c(2.3, -3.1, 0.2, 0.1, 2.7),
32
#'   pvalue = c(0.02, 0.04, 0.8, 0.6, 0.01)
33
#' )
34
#'
35
#' # Display the first 5 rows of the edgeR data
36
#' head(DEG_edgeR, 5)
37
#'
38
#' # Merge the DESeq2 and edgeR data
39
#' deseq2_edgeR <- compare_merge(
40
#'   df1 = DEG_deseq2,
41
#'   df2 = DEG_edgeR,
42
#'   by_gene = "Gene",
43
#'   compare_col = "change",
44
#'   suffixes = c("_1", "_2"),
45
#'   df_name = "deseq2_edgeR"
46
#' )
47
#'
48
#' @export
49
compare_merge <- function(df1, df2, by_gene, compare_col, suffixes, df_name) {
50
  # Perform an inner join on the 'Gene' column
51
  merged_df <- dplyr::inner_join(df1, df2, by = by_gene, suffix = suffixes)
52
53
  # Generate column names for comparison
54
  col1 <- paste0(compare_col, suffixes[1])
55
  col2 <- paste0(compare_col, suffixes[2])
56
57
  # Check if the specified columns are identical
58
  if (all(merged_df[[col1]] == merged_df[[col2]])) {
59
    # If completely identical, merge these columns into one and rename
60
    merged_df[[compare_col]] <- merged_df[[col1]]
61
    # Remove original compared columns
62
    merged_df[[col1]] <- NULL
63
    merged_df[[col2]] <- NULL
64
  } else {
65
    # Handle non-identical case
66
    message("The columns", col1, "and", col2, "are not identical.\n")
67
  }
68
69
  # Assign the specified name for identification
70
  merged_df$name <- df_name
71
72
  # Return the modified data frame
73
  return(merged_df)
74
}