a b/R/GeneMapPathway.R
1
#' Create Pathway-Gene Mapping Data Frame
2
#'
3
#' This function takes multiple data frames and pathway IDs, merging them into a new data frame.
4
#' Each data frame represents a type of analysis (e.g., BP, KEGG, MF, etc.).
5
#' @param BP_dataframe Data frame for Biological Process analysis
6
#' @param BP_ids Selected pathway IDs for Biological Process analysis
7
#' @param KEGG_dataframe Data frame for KEGG analysis
8
#' @param KEGG_ids Selected pathway IDs for KEGG analysis
9
#' @param MF_dataframe Data frame for Molecular Function analysis
10
#' @param MF_ids Selected pathway IDs for Molecular Function analysis
11
#' @param REACTOME_dataframe Data frame for REACTOME analysis
12
#' @param REACTOME_ids Selected pathway IDs for REACTOME analysis
13
#' @param CC_dataframe Data frame for Cellular Component analysis
14
#' @param CC_ids Selected pathway IDs for Cellular Component analysis
15
#' @param DO_dataframe Data frame for Disease Ontology analysis
16
#' @param DO_ids Selected pathway IDs for Disease Ontology analysis
17
#' @return A new data frame that includes pathways, gene, type, and value columns
18
#' @export
19
#' @examples
20
#' # Simulating data for different analysis types
21
#'
22
#' # Simulate Biological Process (BP) data frame
23
#' BP_df <- data.frame(
24
#'   ID = c("GO:0002376", "GO:0019724"),
25
#'   geneID = c("GENE1/GENE2", "GENE3/GENE4"),
26
#'   Description = c("Immune response", "Glycosylation process")
27
#' )
28
#'
29
#' # Simulate KEGG data frame
30
#' KEGG_df <- data.frame(
31
#'   ID = c("12345", "67890"),
32
#'   geneID = c("GENE5/GENE6", "GENE7/GENE8"),
33
#'   Description = c("Pathway 1", "Pathway 2")
34
#' )
35
#'
36
#' # Simulate Molecular Function (MF) data frame
37
#' MF_df <- data.frame(
38
#'   ID = c("ABC123", "DEF456"),
39
#'   geneID = c("GENE9/GENE10", "GENE11/GENE12"),
40
#'   Description = c("Molecular function A", "Molecular function B")
41
#' )
42
#'
43
#' # Simulate REACTOME data frame
44
#' REACTOME_df <- data.frame(
45
#'   ID = c("R-HSA-12345", "R-HSA-67890"),
46
#'   geneID = c("GENE13/GENE14", "GENE15/GENE16"),
47
#'   Description = c("Pathway in Reactome 1", "Pathway in Reactome 2")
48
#' )
49
#'
50
#' # Simulate Cellular Component (CC) data frame
51
#' CC_df <- data.frame(
52
#'   ID = c("GO:0005575", "GO:0005634"),
53
#'   geneID = c("GENE17/GENE18", "GENE19/GENE20"),
54
#'   Description = c("Cellular component A", "Cellular component B")
55
#' )
56
#'
57
#' # Simulate Disease Ontology (DO) data frame
58
#' DO_df <- data.frame(
59
#'   ID = c("DOID:123", "DOID:456"),
60
#'   geneID = c("GENE21/GENE22", "GENE23/GENE24"),
61
#'   Description = c("Disease A", "Disease B")
62
#' )
63
#'
64
#' # Example pathway IDs for each analysis
65
#' BP_ids <- c("GO:0002376", "GO:0019724")
66
#' KEGG_ids <- c("12345", "67890")
67
#' MF_ids <- c("ABC123", "DEF456")
68
#' REACTOME_ids <- c("R-HSA-12345", "R-HSA-67890")
69
#' CC_ids <- c("GO:0005575", "GO:0005634")
70
#' DO_ids <- c("DOID:123", "DOID:456")
71
#'
72
#' # Generate the pathway-gene map using the gene_map_pathway function
73
#' pathway_gene_map <- gene_map_pathway(
74
#'   BP_dataframe = BP_df, BP_ids = BP_ids,
75
#'   KEGG_dataframe = KEGG_df, KEGG_ids = KEGG_ids,
76
#'   MF_dataframe = MF_df, MF_ids = MF_ids,
77
#'   REACTOME_dataframe = REACTOME_df, REACTOME_ids = REACTOME_ids,
78
#'   CC_dataframe = CC_df, CC_ids = CC_ids,
79
#'   DO_dataframe = DO_df, DO_ids = DO_ids
80
#' )
81
#'
82
#' # Display the resulting pathway-gene mapping data frame
83
#' print(pathway_gene_map)
84
#'
85
gene_map_pathway <- function(BP_dataframe, BP_ids, KEGG_dataframe, KEGG_ids, MF_dataframe, MF_ids, REACTOME_dataframe, REACTOME_ids, CC_dataframe, CC_ids, DO_dataframe, DO_ids) {
86
87
  # Create an empty data frame
88
  pathway_gene_map <- data.frame(
89
    pathway_description = character(),
90
    gene4 = character(),
91
    type = character(),
92
    value = integer()
93
  )
94
95
  # Helper function to extract information from a data frame and add it to the new data frame
96
  add_to_map <- function(df, ids, type) {
97
    if (is.null(df) || is.null(ids)) return()
98
    selected_rows <- df[df$ID %in% ids, ]
99
    for (row in seq(nrow(selected_rows))) {
100
      genes <- strsplit(as.character(selected_rows$geneID[row]), "/")[[1]]
101
      # Here, instead of the ID, we use the Description column
102
      description <- as.character(selected_rows$Description[row])
103
      for (gene in genes) {
104
        pathway_gene_map <<- rbind(pathway_gene_map, data.frame(
105
          pathway = description,
106
          gene = gene,
107
          type = type,
108
          value = 1
109
        ))
110
      }
111
    }
112
  }
113
114
  # Apply the helper function to add data for each type of analysis
115
  add_to_map(BP_dataframe, BP_ids, "BP")
116
  add_to_map(KEGG_dataframe, KEGG_ids, "KEGG")
117
  add_to_map(MF_dataframe, MF_ids, "MF")
118
  add_to_map(REACTOME_dataframe, REACTOME_ids, "REACTOME")
119
  add_to_map(CC_dataframe, CC_ids, "CC")
120
  add_to_map(DO_dataframe, DO_ids, "DO")
121
122
  return(pathway_gene_map)
123
}