|
a |
|
b/R/PathwayCount.R |
|
|
1 |
#' Count Genes Present in Pathways Above a Threshold |
|
|
2 |
#' |
|
|
3 |
#' This function filters pathways that meet a count threshold and then counts the presence of specified genes in those pathways. |
|
|
4 |
#' |
|
|
5 |
#' @importFrom dplyr filter |
|
|
6 |
#' @importFrom rlang .data |
|
|
7 |
#' @param GO A character vector of gene symbols. |
|
|
8 |
#' @param count_threshold An integer specifying the count threshold for selecting pathways. |
|
|
9 |
#' @param enrich_data A data frame containing pathway enrichment analysis results. |
|
|
10 |
#' @return A data frame with columns "Symble" (gene symbol), "Description" (pathway description), and "Exists" (1 if gene is present, 0 otherwise). |
|
|
11 |
#' @export |
|
|
12 |
#' |
|
|
13 |
#' @examples |
|
|
14 |
#' # Simulated gene list |
|
|
15 |
#' GO <- c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5") |
|
|
16 |
#' # Simulated enrichment analysis data |
|
|
17 |
#' enrich_data <- data.frame( |
|
|
18 |
#' ID = c("GO:0001", "GO:0002", "GO:0003"), |
|
|
19 |
#' Description = c("Pathway A", "Pathway B", "Pathway C"), |
|
|
20 |
#' Count = c(10, 4, 6), |
|
|
21 |
#' geneID = c("Gene1/Gene2/Gene3", "Gene4/Gene5", "Gene2/Gene6/Gene7") |
|
|
22 |
#' ) |
|
|
23 |
#' |
|
|
24 |
#' # Example usage |
|
|
25 |
#' count_threshold <- 5 |
|
|
26 |
#' result_df <- pathway_count(GO, count_threshold, enrich_data) |
|
|
27 |
#' |
|
|
28 |
pathway_count <- function(GO, count_threshold, enrich_data) { |
|
|
29 |
# Filter pathways meeting the count threshold |
|
|
30 |
selected_pathways <- enrich_data %>% |
|
|
31 |
dplyr::filter(.data$Count > count_threshold) |
|
|
32 |
|
|
|
33 |
final_df <- data.frame(Symble = character(), Description = character(), Exists = integer()) |
|
|
34 |
|
|
|
35 |
# Iterate through each selected pathway |
|
|
36 |
for (i in 1:nrow(selected_pathways)) { |
|
|
37 |
pathway_info <- selected_pathways[i, ] |
|
|
38 |
genes_in_pathway <- unlist(strsplit(as.character(pathway_info$geneID), "/")) |
|
|
39 |
|
|
|
40 |
# Create a record for each gene in the current pathway |
|
|
41 |
for (gene in GO) { |
|
|
42 |
final_df <- rbind(final_df, data.frame( |
|
|
43 |
Symble = gene, |
|
|
44 |
Description = pathway_info$Description, |
|
|
45 |
Exists = as.integer(gene %in% genes_in_pathway) |
|
|
46 |
)) |
|
|
47 |
} |
|
|
48 |
} |
|
|
49 |
|
|
|
50 |
return(final_df) |
|
|
51 |
} |