Diff of /R/EnrichCircoBar.R [000000] .. [0f2269]

Switch to unified view

a b/R/EnrichCircoBar.R
1
#' Extract and Count Descriptions with Specified Color
2
#'
3
#' This function filters a data frame for specified descriptions, selects the 'Description' and 'Count' columns,
4
#' and adds a new column with a specified color.
5
#'
6
#' @param df A data frame containing at least 'Description' and 'Count' columns.
7
#' @param descriptions A vector of descriptions to filter in the data frame.
8
#' @param color A character string specifying the color to be added as a new column.
9
#' @return A data frame filtered by descriptions, containing 'Description', 'Count', and a new 'color' column.
10
#' @export
11
#'
12
#' @examples
13
#' # Generate Sample Input Data for extract_descriptions_counts Function
14
#'
15
#' # Create a sample data frame with 'Description' and 'Count' columns
16
#' data <- data.frame(
17
#'   Description = c(
18
#'     "immunoglobulin production",
19
#'     "B cell mediated immunity",
20
#'     "T cell activation",
21
#'     "antigen processing and presentation",
22
#'     "cytokine signaling",
23
#'     "natural killer cell activity",
24
#'     "phagocytosis",
25
#'     "complement activation",
26
#'     "antibody-dependent cellular cytotoxicity",
27
#'     "regulatory T cell function"
28
#'   ),
29
#'   Count = c(
30
#'     150,  # immunoglobulin production
31
#'     200,  # B cell mediated immunity
32
#'     175,  # T cell activation
33
#'     125,  # antigen processing and presentation
34
#'     190,  # cytokine signaling
35
#'     160,  # natural killer cell activity
36
#'     140,  # phagocytosis
37
#'     180,  # complement activation
38
#'     130,  # antibody-dependent cellular cytotoxicity
39
#'     170   # regulatory T cell function
40
#'   ),
41
#'   stringsAsFactors = FALSE  # Ensure that strings are not converted to factors
42
#' )
43
#'
44
#'
45
#'
46
#' descriptions_to_filter <- c("immunoglobulin production", "B cell mediated immunity")
47
#' specified_color <- "red"  # You can specify any color you desire
48
#' filtered_data_with_color <- extract_descriptions_counts(
49
#'   data, descriptions_to_filter,
50
#'   specified_color)
51
#' print(filtered_data_with_color)
52
#'
53
extract_descriptions_counts <- function(df, descriptions, color) {
54
  # Filter rows where the Description column values are in the descriptions vector
55
  result_df <- df[df$Description %in% descriptions, ]
56
57
  # Select Description and Count columns
58
  result_df <- result_df[, c("Description", "Count")]
59
60
  # Add a new column 'color' with the specified color value
61
  result_df$color <- color
62
63
  return(result_df)
64
}
65
66
67
68
69
70
71
72
73
#' Combine and Visualize Data with Circular Bar Chart
74
#'
75
#' This function combines multiple data frames, arranges them, and visualizes the combined data
76
#' in a Circular Bar Chart using the 'ggplot2' and 'ggalluvial' packages.
77
#'
78
#' @importFrom dplyr bind_rows arrange desc row_number mutate
79
#' @importFrom ggplot2 ggplot geom_bar geom_text scale_fill_manual scale_y_continuous scale_x_continuous coord_polar labs theme_minimal theme element_rect element_blank
80
#' @importFrom rlang .data
81
#' @param data_list A list of data frames to be combined.
82
#' @return A `ggplot` object representing the Circular Bar Chart.
83
#' @export
84
#'
85
#' @examples
86
#' # Create sample data frames for each enrichment category
87
#'
88
#' # 1. Biological Process (BP)
89
#' filtered_data_BP <- data.frame(
90
#'   Description = c(
91
#'     "immune response",
92
#'     "cell proliferation",
93
#'     "signal transduction",
94
#'     "apoptotic process",
95
#'     "metabolic process"
96
#'   ),
97
#'   Count = c(120, 85, 150, 60, 95),
98
#'   color = c(
99
#'     "#1f77b4",  # blue
100
#'     "#ff7f0e",  # orange
101
#'     "#2ca02c",  # green
102
#'     "#d62728",  # red
103
#'     "#9467bd"   # purple
104
#'   ),
105
#'   stringsAsFactors = FALSE
106
#' )
107
#'
108
#' # 2. Cellular Component (CC)
109
#' filtered_data_CC <- data.frame(
110
#'   Description = c(
111
#'     "nucleus",
112
#'     "cytoplasm",
113
#'     "membrane",
114
#'     "mitochondrion",
115
#'     "extracellular space"
116
#'   ),
117
#'   Count = c(90, 110, 75, 65, 80),
118
#'   color = c(
119
#'     "#1f77b4",
120
#'     "#ff7f0e",
121
#'     "#2ca02c",
122
#'     "#d62728",
123
#'     "#9467bd"
124
#'   ),
125
#'   stringsAsFactors = FALSE
126
#' )
127
#'
128
#' # 3. Molecular Function (MF)
129
#' filtered_data_MF <- data.frame(
130
#'   Description = c(
131
#'     "protein binding",
132
#'     "DNA binding",
133
#'     "enzyme activity",
134
#'     "transporter activity",
135
#'     "receptor activity"
136
#'   ),
137
#'   Count = c(140, 130, 100, 70, 90),
138
#'   color = c(
139
#'     "#1f77b4",
140
#'     "#ff7f0e",
141
#'     "#2ca02c",
142
#'     "#d62728",
143
#'     "#9467bd"
144
#'   ),
145
#'   stringsAsFactors = FALSE
146
#' )
147
#'
148
#' # 4. Disease Ontology (DO)
149
#' filtered_data_DO <- data.frame(
150
#'   Description = c(
151
#'     "cancer",
152
#'     "cardiovascular disease",
153
#'     "neurological disorder",
154
#'     "metabolic disease",
155
#'     "infectious disease"
156
#'   ),
157
#'   Count = c(200, 150, 120, 90, 160),
158
#'   color = c(
159
#'     "#1f77b4",
160
#'     "#ff7f0e",
161
#'     "#2ca02c",
162
#'     "#d62728",
163
#'     "#9467bd"
164
#'   ),
165
#'   stringsAsFactors = FALSE
166
#' )
167
#'
168
#' # 5. Reactome Pathways
169
#' filtered_data_Reactome <- data.frame(
170
#'   Description = c(
171
#'     "Cell Cycle",
172
#'     "Apoptosis",
173
#'     "DNA Repair",
174
#'     "Signal Transduction",
175
#'     "Metabolism of Proteins"
176
#'   ),
177
#'   Count = c(110, 95, 80, 130, 85),
178
#'   color = c(
179
#'     "#1f77b4",
180
#'     "#ff7f0e",
181
#'     "#2ca02c",
182
#'     "#d62728",
183
#'     "#9467bd"
184
#'   ),
185
#'   stringsAsFactors = FALSE
186
#' )
187
#'
188
#' # 6. KEGG Pathways
189
#' filtered_data_kegg <- data.frame(
190
#'   Description = c(
191
#'     "PI3K-Akt signaling pathway",
192
#'     "MAPK signaling pathway",
193
#'     "NF-kappa B signaling pathway",
194
#'     "JAK-STAT signaling pathway",
195
#'     "Toll-like receptor signaling pathway"
196
#'   ),
197
#'   Count = c(175, 160, 145, 130, 155),
198
#'   color = c(
199
#'     "#1f77b4",
200
#'     "#ff7f0e",
201
#'     "#2ca02c",
202
#'     "#d62728",
203
#'     "#9467bd"
204
#'   ),
205
#'   stringsAsFactors = FALSE
206
#' )
207
#'
208
#' # Combine all filtered data frames into a list
209
#' data_list <- list(
210
#'   BP = filtered_data_BP,
211
#'   CC = filtered_data_CC,
212
#'   MF = filtered_data_MF,
213
#'   DO = filtered_data_DO,
214
#'   Reactome = filtered_data_Reactome,
215
#'   KEGG = filtered_data_kegg
216
#' )
217
#'
218
#' # Create the Circular Bar Chart
219
#' combined_and_visualized_data <- enrich_circo_bar(data_list)
220
#'
221
enrich_circo_bar <- function(data_list) {
222
  # Combine data frames
223
  combined_data <- dplyr::bind_rows(data_list)
224
225
  # Sort by 'Count' column in descending order and add an 'id' column
226
  combined_data <- combined_data %>%
227
    dplyr::arrange(.data$Count) %>%
228
    dplyr::mutate(id = dplyr::row_number())
229
230
  # Ensure 'Description' is a factor with correct level order
231
  combined_data <- combined_data %>%
232
    dplyr::mutate(Description = factor(.data$Description, levels = unique(.data$Description)))
233
234
  # Extract fill colors, ensuring colors match 'Description' levels
235
  fill_colors <- combined_data$color[match(levels(combined_data$Description), combined_data$Description)]
236
237
  # Calculate the expanded max values for Count and id
238
  max_count <- max(combined_data$Count) + (max(combined_data$Count) / 5)
239
  max_id <- max(combined_data$id) + 1.5
240
241
  # Create the plot
242
  p <- ggplot2::ggplot(combined_data, ggplot2::aes(x = .data$id, y = .data$Count, fill = .data$Description)) +
243
    ggplot2::geom_bar(stat = 'identity', width = 0.7) +
244
    ggplot2::geom_text(ggplot2::aes(x = .data$id, y = 0, label = .data$Description), hjust = 1.03, size = 3.5, color = fill_colors) +
245
    ggplot2::scale_fill_manual(values = fill_colors, guide = "none") +
246
    ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0, max_count), position = 'right') +
247
    ggplot2::scale_x_continuous(expand = c(0, 0), limits = c(-1, max_id)) +
248
    ggplot2::coord_polar(theta = 'y') +
249
    ggplot2::labs(title = 'Enrichment CircularBar Chart', subtitle = "Including: BP/MF/CC/DO/KEGG/Reactome") +
250
    ggplot2::theme_minimal() +
251
    ggplot2::theme(plot.background = ggplot2::element_rect(fill = 'white', color = 'white'),
252
                   axis.title = ggplot2::element_blank(),
253
                   axis.text = ggplot2::element_blank())
254
255
  return(p)
256
}