|
a |
|
b/R/CompareMerge.R |
|
|
1 |
#' Compare and merge specific columns from two DEG data frames |
|
|
2 |
#' |
|
|
3 |
#' This function takes two DEG data frames, inner joins them by a specified gene column, |
|
|
4 |
#' checks if a specified column is identical across both data frames, and merges them if they are. |
|
|
5 |
#' The resulting data frame will have a merged column named after the compared column. |
|
|
6 |
#' |
|
|
7 |
#' @importFrom dplyr inner_join |
|
|
8 |
#' @param df1 First data frame. |
|
|
9 |
#' @param df2 Second data frame. |
|
|
10 |
#' @param by_gene Column name by which to join the data frames, typically "Gene". |
|
|
11 |
#' @param compare_col Column to compare for identity, which will also be the name of the merged column. |
|
|
12 |
#' @param suffixes Suffixes to use for non-identical column names in the joined data frame. |
|
|
13 |
#' @param df_name Name to assign to the resulting data frame for identification. |
|
|
14 |
#' @return A data frame with processed columns. |
|
|
15 |
#' @examples |
|
|
16 |
#' # Create simulated DESeq2 data |
|
|
17 |
#' DEG_deseq2 <- data.frame( |
|
|
18 |
#' Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"), |
|
|
19 |
#' change = c("up", "down", "no_change", "up", "down"), |
|
|
20 |
#' log2FoldChange = c(2.5, -3.2, 0.1, 1.8, -2.5), |
|
|
21 |
#' pvalue = c(0.01, 0.05, 0.9, 0.02, 0.03) |
|
|
22 |
#' ) |
|
|
23 |
#' |
|
|
24 |
#' # Display the first 5 rows of the DESeq2 data |
|
|
25 |
#' head(DEG_deseq2, 5) |
|
|
26 |
#' |
|
|
27 |
#' # Create simulated edgeR data |
|
|
28 |
#' DEG_edgeR <- data.frame( |
|
|
29 |
#' Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"), |
|
|
30 |
#' change = c("up", "down", "no_change", "no_change", "up"), |
|
|
31 |
#' log2FoldChange = c(2.3, -3.1, 0.2, 0.1, 2.7), |
|
|
32 |
#' pvalue = c(0.02, 0.04, 0.8, 0.6, 0.01) |
|
|
33 |
#' ) |
|
|
34 |
#' |
|
|
35 |
#' # Display the first 5 rows of the edgeR data |
|
|
36 |
#' head(DEG_edgeR, 5) |
|
|
37 |
#' |
|
|
38 |
#' # Merge the DESeq2 and edgeR data |
|
|
39 |
#' deseq2_edgeR <- compare_merge( |
|
|
40 |
#' df1 = DEG_deseq2, |
|
|
41 |
#' df2 = DEG_edgeR, |
|
|
42 |
#' by_gene = "Gene", |
|
|
43 |
#' compare_col = "change", |
|
|
44 |
#' suffixes = c("_1", "_2"), |
|
|
45 |
#' df_name = "deseq2_edgeR" |
|
|
46 |
#' ) |
|
|
47 |
#' |
|
|
48 |
#' @export |
|
|
49 |
compare_merge <- function(df1, df2, by_gene, compare_col, suffixes, df_name) { |
|
|
50 |
# Perform an inner join on the 'Gene' column |
|
|
51 |
merged_df <- dplyr::inner_join(df1, df2, by = by_gene, suffix = suffixes) |
|
|
52 |
|
|
|
53 |
# Generate column names for comparison |
|
|
54 |
col1 <- paste0(compare_col, suffixes[1]) |
|
|
55 |
col2 <- paste0(compare_col, suffixes[2]) |
|
|
56 |
|
|
|
57 |
# Check if the specified columns are identical |
|
|
58 |
if (all(merged_df[[col1]] == merged_df[[col2]])) { |
|
|
59 |
# If completely identical, merge these columns into one and rename |
|
|
60 |
merged_df[[compare_col]] <- merged_df[[col1]] |
|
|
61 |
# Remove original compared columns |
|
|
62 |
merged_df[[col1]] <- NULL |
|
|
63 |
merged_df[[col2]] <- NULL |
|
|
64 |
} else { |
|
|
65 |
# Handle non-identical case |
|
|
66 |
message("The columns", col1, "and", col2, "are not identical.\n") |
|
|
67 |
} |
|
|
68 |
|
|
|
69 |
# Assign the specified name for identification |
|
|
70 |
merged_df$name <- df_name |
|
|
71 |
|
|
|
72 |
# Return the modified data frame |
|
|
73 |
return(merged_df) |
|
|
74 |
} |