|
a |
|
b/R/EnrichCircoBar.R |
|
|
1 |
#' Extract and Count Descriptions with Specified Color |
|
|
2 |
#' |
|
|
3 |
#' This function filters a data frame for specified descriptions, selects the 'Description' and 'Count' columns, |
|
|
4 |
#' and adds a new column with a specified color. |
|
|
5 |
#' |
|
|
6 |
#' @param df A data frame containing at least 'Description' and 'Count' columns. |
|
|
7 |
#' @param descriptions A vector of descriptions to filter in the data frame. |
|
|
8 |
#' @param color A character string specifying the color to be added as a new column. |
|
|
9 |
#' @return A data frame filtered by descriptions, containing 'Description', 'Count', and a new 'color' column. |
|
|
10 |
#' @export |
|
|
11 |
#' |
|
|
12 |
#' @examples |
|
|
13 |
#' # Generate Sample Input Data for extract_descriptions_counts Function |
|
|
14 |
#' |
|
|
15 |
#' # Create a sample data frame with 'Description' and 'Count' columns |
|
|
16 |
#' data <- data.frame( |
|
|
17 |
#' Description = c( |
|
|
18 |
#' "immunoglobulin production", |
|
|
19 |
#' "B cell mediated immunity", |
|
|
20 |
#' "T cell activation", |
|
|
21 |
#' "antigen processing and presentation", |
|
|
22 |
#' "cytokine signaling", |
|
|
23 |
#' "natural killer cell activity", |
|
|
24 |
#' "phagocytosis", |
|
|
25 |
#' "complement activation", |
|
|
26 |
#' "antibody-dependent cellular cytotoxicity", |
|
|
27 |
#' "regulatory T cell function" |
|
|
28 |
#' ), |
|
|
29 |
#' Count = c( |
|
|
30 |
#' 150, # immunoglobulin production |
|
|
31 |
#' 200, # B cell mediated immunity |
|
|
32 |
#' 175, # T cell activation |
|
|
33 |
#' 125, # antigen processing and presentation |
|
|
34 |
#' 190, # cytokine signaling |
|
|
35 |
#' 160, # natural killer cell activity |
|
|
36 |
#' 140, # phagocytosis |
|
|
37 |
#' 180, # complement activation |
|
|
38 |
#' 130, # antibody-dependent cellular cytotoxicity |
|
|
39 |
#' 170 # regulatory T cell function |
|
|
40 |
#' ), |
|
|
41 |
#' stringsAsFactors = FALSE # Ensure that strings are not converted to factors |
|
|
42 |
#' ) |
|
|
43 |
#' |
|
|
44 |
#' |
|
|
45 |
#' |
|
|
46 |
#' descriptions_to_filter <- c("immunoglobulin production", "B cell mediated immunity") |
|
|
47 |
#' specified_color <- "red" # You can specify any color you desire |
|
|
48 |
#' filtered_data_with_color <- extract_descriptions_counts( |
|
|
49 |
#' data, descriptions_to_filter, |
|
|
50 |
#' specified_color) |
|
|
51 |
#' print(filtered_data_with_color) |
|
|
52 |
#' |
|
|
53 |
extract_descriptions_counts <- function(df, descriptions, color) { |
|
|
54 |
# Filter rows where the Description column values are in the descriptions vector |
|
|
55 |
result_df <- df[df$Description %in% descriptions, ] |
|
|
56 |
|
|
|
57 |
# Select Description and Count columns |
|
|
58 |
result_df <- result_df[, c("Description", "Count")] |
|
|
59 |
|
|
|
60 |
# Add a new column 'color' with the specified color value |
|
|
61 |
result_df$color <- color |
|
|
62 |
|
|
|
63 |
return(result_df) |
|
|
64 |
} |
|
|
65 |
|
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
|
73 |
#' Combine and Visualize Data with Circular Bar Chart |
|
|
74 |
#' |
|
|
75 |
#' This function combines multiple data frames, arranges them, and visualizes the combined data |
|
|
76 |
#' in a Circular Bar Chart using the 'ggplot2' and 'ggalluvial' packages. |
|
|
77 |
#' |
|
|
78 |
#' @importFrom dplyr bind_rows arrange desc row_number mutate |
|
|
79 |
#' @importFrom ggplot2 ggplot geom_bar geom_text scale_fill_manual scale_y_continuous scale_x_continuous coord_polar labs theme_minimal theme element_rect element_blank |
|
|
80 |
#' @importFrom rlang .data |
|
|
81 |
#' @param data_list A list of data frames to be combined. |
|
|
82 |
#' @return A `ggplot` object representing the Circular Bar Chart. |
|
|
83 |
#' @export |
|
|
84 |
#' |
|
|
85 |
#' @examples |
|
|
86 |
#' # Create sample data frames for each enrichment category |
|
|
87 |
#' |
|
|
88 |
#' # 1. Biological Process (BP) |
|
|
89 |
#' filtered_data_BP <- data.frame( |
|
|
90 |
#' Description = c( |
|
|
91 |
#' "immune response", |
|
|
92 |
#' "cell proliferation", |
|
|
93 |
#' "signal transduction", |
|
|
94 |
#' "apoptotic process", |
|
|
95 |
#' "metabolic process" |
|
|
96 |
#' ), |
|
|
97 |
#' Count = c(120, 85, 150, 60, 95), |
|
|
98 |
#' color = c( |
|
|
99 |
#' "#1f77b4", # blue |
|
|
100 |
#' "#ff7f0e", # orange |
|
|
101 |
#' "#2ca02c", # green |
|
|
102 |
#' "#d62728", # red |
|
|
103 |
#' "#9467bd" # purple |
|
|
104 |
#' ), |
|
|
105 |
#' stringsAsFactors = FALSE |
|
|
106 |
#' ) |
|
|
107 |
#' |
|
|
108 |
#' # 2. Cellular Component (CC) |
|
|
109 |
#' filtered_data_CC <- data.frame( |
|
|
110 |
#' Description = c( |
|
|
111 |
#' "nucleus", |
|
|
112 |
#' "cytoplasm", |
|
|
113 |
#' "membrane", |
|
|
114 |
#' "mitochondrion", |
|
|
115 |
#' "extracellular space" |
|
|
116 |
#' ), |
|
|
117 |
#' Count = c(90, 110, 75, 65, 80), |
|
|
118 |
#' color = c( |
|
|
119 |
#' "#1f77b4", |
|
|
120 |
#' "#ff7f0e", |
|
|
121 |
#' "#2ca02c", |
|
|
122 |
#' "#d62728", |
|
|
123 |
#' "#9467bd" |
|
|
124 |
#' ), |
|
|
125 |
#' stringsAsFactors = FALSE |
|
|
126 |
#' ) |
|
|
127 |
#' |
|
|
128 |
#' # 3. Molecular Function (MF) |
|
|
129 |
#' filtered_data_MF <- data.frame( |
|
|
130 |
#' Description = c( |
|
|
131 |
#' "protein binding", |
|
|
132 |
#' "DNA binding", |
|
|
133 |
#' "enzyme activity", |
|
|
134 |
#' "transporter activity", |
|
|
135 |
#' "receptor activity" |
|
|
136 |
#' ), |
|
|
137 |
#' Count = c(140, 130, 100, 70, 90), |
|
|
138 |
#' color = c( |
|
|
139 |
#' "#1f77b4", |
|
|
140 |
#' "#ff7f0e", |
|
|
141 |
#' "#2ca02c", |
|
|
142 |
#' "#d62728", |
|
|
143 |
#' "#9467bd" |
|
|
144 |
#' ), |
|
|
145 |
#' stringsAsFactors = FALSE |
|
|
146 |
#' ) |
|
|
147 |
#' |
|
|
148 |
#' # 4. Disease Ontology (DO) |
|
|
149 |
#' filtered_data_DO <- data.frame( |
|
|
150 |
#' Description = c( |
|
|
151 |
#' "cancer", |
|
|
152 |
#' "cardiovascular disease", |
|
|
153 |
#' "neurological disorder", |
|
|
154 |
#' "metabolic disease", |
|
|
155 |
#' "infectious disease" |
|
|
156 |
#' ), |
|
|
157 |
#' Count = c(200, 150, 120, 90, 160), |
|
|
158 |
#' color = c( |
|
|
159 |
#' "#1f77b4", |
|
|
160 |
#' "#ff7f0e", |
|
|
161 |
#' "#2ca02c", |
|
|
162 |
#' "#d62728", |
|
|
163 |
#' "#9467bd" |
|
|
164 |
#' ), |
|
|
165 |
#' stringsAsFactors = FALSE |
|
|
166 |
#' ) |
|
|
167 |
#' |
|
|
168 |
#' # 5. Reactome Pathways |
|
|
169 |
#' filtered_data_Reactome <- data.frame( |
|
|
170 |
#' Description = c( |
|
|
171 |
#' "Cell Cycle", |
|
|
172 |
#' "Apoptosis", |
|
|
173 |
#' "DNA Repair", |
|
|
174 |
#' "Signal Transduction", |
|
|
175 |
#' "Metabolism of Proteins" |
|
|
176 |
#' ), |
|
|
177 |
#' Count = c(110, 95, 80, 130, 85), |
|
|
178 |
#' color = c( |
|
|
179 |
#' "#1f77b4", |
|
|
180 |
#' "#ff7f0e", |
|
|
181 |
#' "#2ca02c", |
|
|
182 |
#' "#d62728", |
|
|
183 |
#' "#9467bd" |
|
|
184 |
#' ), |
|
|
185 |
#' stringsAsFactors = FALSE |
|
|
186 |
#' ) |
|
|
187 |
#' |
|
|
188 |
#' # 6. KEGG Pathways |
|
|
189 |
#' filtered_data_kegg <- data.frame( |
|
|
190 |
#' Description = c( |
|
|
191 |
#' "PI3K-Akt signaling pathway", |
|
|
192 |
#' "MAPK signaling pathway", |
|
|
193 |
#' "NF-kappa B signaling pathway", |
|
|
194 |
#' "JAK-STAT signaling pathway", |
|
|
195 |
#' "Toll-like receptor signaling pathway" |
|
|
196 |
#' ), |
|
|
197 |
#' Count = c(175, 160, 145, 130, 155), |
|
|
198 |
#' color = c( |
|
|
199 |
#' "#1f77b4", |
|
|
200 |
#' "#ff7f0e", |
|
|
201 |
#' "#2ca02c", |
|
|
202 |
#' "#d62728", |
|
|
203 |
#' "#9467bd" |
|
|
204 |
#' ), |
|
|
205 |
#' stringsAsFactors = FALSE |
|
|
206 |
#' ) |
|
|
207 |
#' |
|
|
208 |
#' # Combine all filtered data frames into a list |
|
|
209 |
#' data_list <- list( |
|
|
210 |
#' BP = filtered_data_BP, |
|
|
211 |
#' CC = filtered_data_CC, |
|
|
212 |
#' MF = filtered_data_MF, |
|
|
213 |
#' DO = filtered_data_DO, |
|
|
214 |
#' Reactome = filtered_data_Reactome, |
|
|
215 |
#' KEGG = filtered_data_kegg |
|
|
216 |
#' ) |
|
|
217 |
#' |
|
|
218 |
#' # Create the Circular Bar Chart |
|
|
219 |
#' combined_and_visualized_data <- enrich_circo_bar(data_list) |
|
|
220 |
#' |
|
|
221 |
enrich_circo_bar <- function(data_list) { |
|
|
222 |
# Combine data frames |
|
|
223 |
combined_data <- dplyr::bind_rows(data_list) |
|
|
224 |
|
|
|
225 |
# Sort by 'Count' column in descending order and add an 'id' column |
|
|
226 |
combined_data <- combined_data %>% |
|
|
227 |
dplyr::arrange(.data$Count) %>% |
|
|
228 |
dplyr::mutate(id = dplyr::row_number()) |
|
|
229 |
|
|
|
230 |
# Ensure 'Description' is a factor with correct level order |
|
|
231 |
combined_data <- combined_data %>% |
|
|
232 |
dplyr::mutate(Description = factor(.data$Description, levels = unique(.data$Description))) |
|
|
233 |
|
|
|
234 |
# Extract fill colors, ensuring colors match 'Description' levels |
|
|
235 |
fill_colors <- combined_data$color[match(levels(combined_data$Description), combined_data$Description)] |
|
|
236 |
|
|
|
237 |
# Calculate the expanded max values for Count and id |
|
|
238 |
max_count <- max(combined_data$Count) + (max(combined_data$Count) / 5) |
|
|
239 |
max_id <- max(combined_data$id) + 1.5 |
|
|
240 |
|
|
|
241 |
# Create the plot |
|
|
242 |
p <- ggplot2::ggplot(combined_data, ggplot2::aes(x = .data$id, y = .data$Count, fill = .data$Description)) + |
|
|
243 |
ggplot2::geom_bar(stat = 'identity', width = 0.7) + |
|
|
244 |
ggplot2::geom_text(ggplot2::aes(x = .data$id, y = 0, label = .data$Description), hjust = 1.03, size = 3.5, color = fill_colors) + |
|
|
245 |
ggplot2::scale_fill_manual(values = fill_colors, guide = "none") + |
|
|
246 |
ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0, max_count), position = 'right') + |
|
|
247 |
ggplot2::scale_x_continuous(expand = c(0, 0), limits = c(-1, max_id)) + |
|
|
248 |
ggplot2::coord_polar(theta = 'y') + |
|
|
249 |
ggplot2::labs(title = 'Enrichment CircularBar Chart', subtitle = "Including: BP/MF/CC/DO/KEGG/Reactome") + |
|
|
250 |
ggplot2::theme_minimal() + |
|
|
251 |
ggplot2::theme(plot.background = ggplot2::element_rect(fill = 'white', color = 'white'), |
|
|
252 |
axis.title = ggplot2::element_blank(), |
|
|
253 |
axis.text = ggplot2::element_blank()) |
|
|
254 |
|
|
|
255 |
return(p) |
|
|
256 |
} |