|
a |
|
b/R/GeneMapPathway.R |
|
|
1 |
#' Create Pathway-Gene Mapping Data Frame |
|
|
2 |
#' |
|
|
3 |
#' This function takes multiple data frames and pathway IDs, merging them into a new data frame. |
|
|
4 |
#' Each data frame represents a type of analysis (e.g., BP, KEGG, MF, etc.). |
|
|
5 |
#' @param BP_dataframe Data frame for Biological Process analysis |
|
|
6 |
#' @param BP_ids Selected pathway IDs for Biological Process analysis |
|
|
7 |
#' @param KEGG_dataframe Data frame for KEGG analysis |
|
|
8 |
#' @param KEGG_ids Selected pathway IDs for KEGG analysis |
|
|
9 |
#' @param MF_dataframe Data frame for Molecular Function analysis |
|
|
10 |
#' @param MF_ids Selected pathway IDs for Molecular Function analysis |
|
|
11 |
#' @param REACTOME_dataframe Data frame for REACTOME analysis |
|
|
12 |
#' @param REACTOME_ids Selected pathway IDs for REACTOME analysis |
|
|
13 |
#' @param CC_dataframe Data frame for Cellular Component analysis |
|
|
14 |
#' @param CC_ids Selected pathway IDs for Cellular Component analysis |
|
|
15 |
#' @param DO_dataframe Data frame for Disease Ontology analysis |
|
|
16 |
#' @param DO_ids Selected pathway IDs for Disease Ontology analysis |
|
|
17 |
#' @return A new data frame that includes pathways, gene, type, and value columns |
|
|
18 |
#' @export |
|
|
19 |
#' @examples |
|
|
20 |
#' # Simulating data for different analysis types |
|
|
21 |
#' |
|
|
22 |
#' # Simulate Biological Process (BP) data frame |
|
|
23 |
#' BP_df <- data.frame( |
|
|
24 |
#' ID = c("GO:0002376", "GO:0019724"), |
|
|
25 |
#' geneID = c("GENE1/GENE2", "GENE3/GENE4"), |
|
|
26 |
#' Description = c("Immune response", "Glycosylation process") |
|
|
27 |
#' ) |
|
|
28 |
#' |
|
|
29 |
#' # Simulate KEGG data frame |
|
|
30 |
#' KEGG_df <- data.frame( |
|
|
31 |
#' ID = c("12345", "67890"), |
|
|
32 |
#' geneID = c("GENE5/GENE6", "GENE7/GENE8"), |
|
|
33 |
#' Description = c("Pathway 1", "Pathway 2") |
|
|
34 |
#' ) |
|
|
35 |
#' |
|
|
36 |
#' # Simulate Molecular Function (MF) data frame |
|
|
37 |
#' MF_df <- data.frame( |
|
|
38 |
#' ID = c("ABC123", "DEF456"), |
|
|
39 |
#' geneID = c("GENE9/GENE10", "GENE11/GENE12"), |
|
|
40 |
#' Description = c("Molecular function A", "Molecular function B") |
|
|
41 |
#' ) |
|
|
42 |
#' |
|
|
43 |
#' # Simulate REACTOME data frame |
|
|
44 |
#' REACTOME_df <- data.frame( |
|
|
45 |
#' ID = c("R-HSA-12345", "R-HSA-67890"), |
|
|
46 |
#' geneID = c("GENE13/GENE14", "GENE15/GENE16"), |
|
|
47 |
#' Description = c("Pathway in Reactome 1", "Pathway in Reactome 2") |
|
|
48 |
#' ) |
|
|
49 |
#' |
|
|
50 |
#' # Simulate Cellular Component (CC) data frame |
|
|
51 |
#' CC_df <- data.frame( |
|
|
52 |
#' ID = c("GO:0005575", "GO:0005634"), |
|
|
53 |
#' geneID = c("GENE17/GENE18", "GENE19/GENE20"), |
|
|
54 |
#' Description = c("Cellular component A", "Cellular component B") |
|
|
55 |
#' ) |
|
|
56 |
#' |
|
|
57 |
#' # Simulate Disease Ontology (DO) data frame |
|
|
58 |
#' DO_df <- data.frame( |
|
|
59 |
#' ID = c("DOID:123", "DOID:456"), |
|
|
60 |
#' geneID = c("GENE21/GENE22", "GENE23/GENE24"), |
|
|
61 |
#' Description = c("Disease A", "Disease B") |
|
|
62 |
#' ) |
|
|
63 |
#' |
|
|
64 |
#' # Example pathway IDs for each analysis |
|
|
65 |
#' BP_ids <- c("GO:0002376", "GO:0019724") |
|
|
66 |
#' KEGG_ids <- c("12345", "67890") |
|
|
67 |
#' MF_ids <- c("ABC123", "DEF456") |
|
|
68 |
#' REACTOME_ids <- c("R-HSA-12345", "R-HSA-67890") |
|
|
69 |
#' CC_ids <- c("GO:0005575", "GO:0005634") |
|
|
70 |
#' DO_ids <- c("DOID:123", "DOID:456") |
|
|
71 |
#' |
|
|
72 |
#' # Generate the pathway-gene map using the gene_map_pathway function |
|
|
73 |
#' pathway_gene_map <- gene_map_pathway( |
|
|
74 |
#' BP_dataframe = BP_df, BP_ids = BP_ids, |
|
|
75 |
#' KEGG_dataframe = KEGG_df, KEGG_ids = KEGG_ids, |
|
|
76 |
#' MF_dataframe = MF_df, MF_ids = MF_ids, |
|
|
77 |
#' REACTOME_dataframe = REACTOME_df, REACTOME_ids = REACTOME_ids, |
|
|
78 |
#' CC_dataframe = CC_df, CC_ids = CC_ids, |
|
|
79 |
#' DO_dataframe = DO_df, DO_ids = DO_ids |
|
|
80 |
#' ) |
|
|
81 |
#' |
|
|
82 |
#' # Display the resulting pathway-gene mapping data frame |
|
|
83 |
#' print(pathway_gene_map) |
|
|
84 |
#' |
|
|
85 |
gene_map_pathway <- function(BP_dataframe, BP_ids, KEGG_dataframe, KEGG_ids, MF_dataframe, MF_ids, REACTOME_dataframe, REACTOME_ids, CC_dataframe, CC_ids, DO_dataframe, DO_ids) { |
|
|
86 |
|
|
|
87 |
# Create an empty data frame |
|
|
88 |
pathway_gene_map <- data.frame( |
|
|
89 |
pathway_description = character(), |
|
|
90 |
gene4 = character(), |
|
|
91 |
type = character(), |
|
|
92 |
value = integer() |
|
|
93 |
) |
|
|
94 |
|
|
|
95 |
# Helper function to extract information from a data frame and add it to the new data frame |
|
|
96 |
add_to_map <- function(df, ids, type) { |
|
|
97 |
if (is.null(df) || is.null(ids)) return() |
|
|
98 |
selected_rows <- df[df$ID %in% ids, ] |
|
|
99 |
for (row in seq(nrow(selected_rows))) { |
|
|
100 |
genes <- strsplit(as.character(selected_rows$geneID[row]), "/")[[1]] |
|
|
101 |
# Here, instead of the ID, we use the Description column |
|
|
102 |
description <- as.character(selected_rows$Description[row]) |
|
|
103 |
for (gene in genes) { |
|
|
104 |
pathway_gene_map <<- rbind(pathway_gene_map, data.frame( |
|
|
105 |
pathway = description, |
|
|
106 |
gene = gene, |
|
|
107 |
type = type, |
|
|
108 |
value = 1 |
|
|
109 |
)) |
|
|
110 |
} |
|
|
111 |
} |
|
|
112 |
} |
|
|
113 |
|
|
|
114 |
# Apply the helper function to add data for each type of analysis |
|
|
115 |
add_to_map(BP_dataframe, BP_ids, "BP") |
|
|
116 |
add_to_map(KEGG_dataframe, KEGG_ids, "KEGG") |
|
|
117 |
add_to_map(MF_dataframe, MF_ids, "MF") |
|
|
118 |
add_to_map(REACTOME_dataframe, REACTOME_ids, "REACTOME") |
|
|
119 |
add_to_map(CC_dataframe, CC_ids, "CC") |
|
|
120 |
add_to_map(DO_dataframe, DO_ids, "DO") |
|
|
121 |
|
|
|
122 |
return(pathway_gene_map) |
|
|
123 |
} |