|
a |
|
b/R/EnrichmentSpiralize.R |
|
|
1 |
#' Extract and Store Top Pathways for Each Sample |
|
|
2 |
#' |
|
|
3 |
#' This function processes a dataframe containing SSGSEA KEGG results. It allows specifying the number |
|
|
4 |
#' of top pathways to extract for each sample based on their scores, and stores these in a new dataframe |
|
|
5 |
#' with sample names and pathway scores. |
|
|
6 |
#' |
|
|
7 |
#' @param ssgsea_kegg Dataframe containing SSGSEA KEGG results with samples as columns and pathways as rows. |
|
|
8 |
#' @param nTop Integer, number of top pathways to select for each sample. |
|
|
9 |
#' @return A dataframe with columns 'Pathway', 'Sample', and 'Value' representing the top pathways for each sample. |
|
|
10 |
#' @importFrom utils head |
|
|
11 |
#' @export |
|
|
12 |
#' @examples |
|
|
13 |
#' # Example: Generating input data for the extract_ntop_pathways function |
|
|
14 |
#' |
|
|
15 |
#' # Define example pathways |
|
|
16 |
#' pathways <- c("Pathway_A", "Pathway_B", "Pathway_C", "Pathway_D", "Pathway_E", |
|
|
17 |
#' "Pathway_F", "Pathway_G", "Pathway_H", "Pathway_I", "Pathway_J") |
|
|
18 |
#' |
|
|
19 |
#' # Define example samples |
|
|
20 |
#' samples <- c("Sample_1", "Sample_2", "Sample_3") |
|
|
21 |
#' |
|
|
22 |
#' # Generate random SSGSEA KEGG scores between 0 and 1 |
|
|
23 |
#' set.seed(123) # For reproducibility |
|
|
24 |
#' ssgsea_scores <- matrix(runif(length(pathways) * length(samples), min = 0, max = 1), |
|
|
25 |
#' nrow = length(pathways), ncol = length(samples), |
|
|
26 |
#' dimnames = list(pathways, samples)) |
|
|
27 |
#' |
|
|
28 |
#' # Convert to a data frame |
|
|
29 |
#' ssgsea_kegg <- as.data.frame(ssgsea_scores) |
|
|
30 |
#' |
|
|
31 |
#' # Extract the top 3 pathways for each sample |
|
|
32 |
#' top_pathways <- extract_ntop_pathways(ssgsea_kegg, nTop = 3) |
|
|
33 |
#' |
|
|
34 |
extract_ntop_pathways <- function(ssgsea_kegg, nTop = 5) { |
|
|
35 |
# Initialize an empty data frame to store the results |
|
|
36 |
results <- data.frame(Pathway = character(), Sample = character(), Value = numeric(), stringsAsFactors = FALSE) |
|
|
37 |
|
|
|
38 |
# Iterate through each sample, starting from the first column |
|
|
39 |
for (i in 1:ncol(ssgsea_kegg)) { |
|
|
40 |
sample_name <- colnames(ssgsea_kegg)[i] |
|
|
41 |
# To avoid factor type errors, ensure the data is numeric |
|
|
42 |
column_data <- as.numeric(ssgsea_kegg[[i]]) |
|
|
43 |
# Create a new data frame with numeric data for sorting and extracting |
|
|
44 |
pathway_data <- data.frame(Pathway = rownames(ssgsea_kegg), Value = column_data, stringsAsFactors = FALSE) |
|
|
45 |
# Sort by value in descending order and take the top nTop entries |
|
|
46 |
top_paths <- utils::head(pathway_data[order(-pathway_data$Value),], nTop) |
|
|
47 |
# Bind to the results data frame |
|
|
48 |
results <- rbind(results, data.frame(Pathway = top_paths$Pathway, Sample = sample_name, Value = top_paths$Value)) |
|
|
49 |
} |
|
|
50 |
|
|
|
51 |
return(results) |
|
|
52 |
} |
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
#' Extract Positive Pathways from SSGSEA Results and Select Random Samples |
|
|
62 |
#' |
|
|
63 |
#' This function processes the results of SSGSEA, specifically focusing on KEGG pathways. |
|
|
64 |
#' It extracts pathways with positive values from each sample and randomly selects a subset of them. |
|
|
65 |
#' |
|
|
66 |
#' @param ssgsea_kegg A matrix or data frame with pathways as rows and samples as columns. |
|
|
67 |
#' @param max_paths_per_sample Integer, maximum number of pathways to select per sample. |
|
|
68 |
#' @return A data frame with selected pathways, samples, and their corresponding values. |
|
|
69 |
#' @export |
|
|
70 |
#' @examples |
|
|
71 |
#' # Example: Generating input data for the extract_positive_pathways function |
|
|
72 |
#' |
|
|
73 |
#' # Define example pathways |
|
|
74 |
#' pathways <- c("Pathway_1", "Pathway_2", "Pathway_3", "Pathway_4", "Pathway_5", |
|
|
75 |
#' "Pathway_6", "Pathway_7", "Pathway_8", "Pathway_9", "Pathway_10") |
|
|
76 |
#' |
|
|
77 |
#' # Define example samples |
|
|
78 |
#' samples <- c("Sample_A", "Sample_B", "Sample_C") |
|
|
79 |
#' |
|
|
80 |
#' # Generate random SSGSEA KEGG scores including both positive and negative values |
|
|
81 |
#' set.seed(456) # For reproducibility |
|
|
82 |
#' ssgsea_scores <- matrix(rnorm(length(pathways) * length(samples), mean = 0, sd = 1), |
|
|
83 |
#' nrow = length(pathways), ncol = length(samples), |
|
|
84 |
#' dimnames = list(pathways, samples)) |
|
|
85 |
#' |
|
|
86 |
#' # Convert to a data frame |
|
|
87 |
#' ssgsea_kegg <- as.data.frame(ssgsea_scores) |
|
|
88 |
#' |
|
|
89 |
#' # Use the extract_positive_pathways function to extract up to 3 positive pathways per sample |
|
|
90 |
#' selected_positive_pathways <- extract_positive_pathways(ssgsea_kegg, max_paths_per_sample = 3) |
|
|
91 |
#' |
|
|
92 |
extract_positive_pathways <- function(ssgsea_kegg, max_paths_per_sample = 5) { |
|
|
93 |
# Initialize an empty data frame to store the results |
|
|
94 |
results <- data.frame(Pathway = character(), Sample = character(), Value = numeric(), stringsAsFactors = FALSE) |
|
|
95 |
|
|
|
96 |
# Iterate over each sample |
|
|
97 |
for (i in 1:ncol(ssgsea_kegg)) { |
|
|
98 |
sample_name <- colnames(ssgsea_kegg)[i] |
|
|
99 |
# Ensure the data is numeric |
|
|
100 |
column_data <- as.numeric(ssgsea_kegg[[i]]) |
|
|
101 |
# Create a new data frame with pathway names and values |
|
|
102 |
pathway_data <- data.frame(Pathway = rownames(ssgsea_kegg), Value = column_data, stringsAsFactors = FALSE) |
|
|
103 |
# Filter for positive values |
|
|
104 |
positive_paths <- pathway_data[pathway_data$Value > 0,] |
|
|
105 |
# If there are positive values, randomly select a few pathways |
|
|
106 |
if (nrow(positive_paths) > 0) { |
|
|
107 |
selected_paths <- positive_paths[sample(nrow(positive_paths), min(max_paths_per_sample, nrow(positive_paths))),] |
|
|
108 |
# Bind to the results data frame |
|
|
109 |
results <- rbind(results, data.frame(Pathway = selected_paths$Pathway, Sample = sample_name, Value = selected_paths$Value)) |
|
|
110 |
} |
|
|
111 |
} |
|
|
112 |
return(results) |
|
|
113 |
} |
|
|
114 |
|
|
|
115 |
|
|
|
116 |
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
|
|
|
120 |
|
|
|
121 |
|
|
|
122 |
|
|
|
123 |
#' Adjust Color Tone by Modifying Saturation and Luminance |
|
|
124 |
#' |
|
|
125 |
#' This function adjusts the saturation and luminance of a given color. It works by converting |
|
|
126 |
#' the color from RGB to Luv color space, applying the scaling factors to the saturation and luminance, |
|
|
127 |
#' and then converting it back to RGB. |
|
|
128 |
#' |
|
|
129 |
#' @param color A color in hexadecimal format (e.g., "#FF0000") or a valid R color name. |
|
|
130 |
#' @param saturation_scale Numeric, the scaling factor for saturation (values < 1 decrease saturation, values > 1 increase saturation). |
|
|
131 |
#' @param luminance_scale Numeric, the scaling factor for luminance (values < 1 darken the color, values > 1 lighten the color). |
|
|
132 |
#' @return Returns a color in hexadecimal format adjusted according to the provided scales. |
|
|
133 |
#' @importFrom grDevices convertColor col2rgb rgb |
|
|
134 |
#' @export |
|
|
135 |
#' @examples |
|
|
136 |
#' adjusted_color <- adjust_color_tone("#FF0000", saturation_scale = 0.8, luminance_scale = 1.2) |
|
|
137 |
#' print(adjusted_color) |
|
|
138 |
#' |
|
|
139 |
adjust_color_tone <- function(color, saturation_scale, luminance_scale) { |
|
|
140 |
# Convert the input color to RGB, then to Luv color space |
|
|
141 |
rgb <- t(grDevices::col2rgb(color) / 255) |
|
|
142 |
luv <- grDevices::convertColor(rgb, from = "sRGB", to = "Luv") |
|
|
143 |
|
|
|
144 |
# Apply scaling factors to saturation and luminance |
|
|
145 |
luv[, 2:3] <- luv[, 2:3] * saturation_scale # Adjust saturation |
|
|
146 |
luv[, 1] <- luv[, 1] * luminance_scale # Adjust luminance |
|
|
147 |
|
|
|
148 |
# Convert back to RGB and correct color values to stay within the valid range |
|
|
149 |
rgb_new <- grDevices::convertColor(luv, from = "Luv", to = "sRGB") |
|
|
150 |
rgb_new <- rgb_new * 255 |
|
|
151 |
rgb_new[rgb_new > 255] <- 255 # Prevent color values from exceeding the maximum |
|
|
152 |
|
|
|
153 |
# Convert adjusted RGB values back to hexadecimal format |
|
|
154 |
apply(rgb_new, 1, function(x) grDevices::rgb(x[1], x[2], x[3], maxColorValue = 255)) |
|
|
155 |
} |
|
|
156 |
|
|
|
157 |
|
|
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
|
165 |
#' Render a Spiral Plot Using Run-Length Encoding |
|
|
166 |
#' |
|
|
167 |
#' This function creates a spiral plot for visualizing sequential data in a compact and visually appealing way. |
|
|
168 |
#' It uses run-length encoding to represent the lengths and colors of sequences in the spiral. |
|
|
169 |
#' |
|
|
170 |
#' @param x A vector representing categories or segments. |
|
|
171 |
#' @param samples A vector indicating the sample each segment belongs to. |
|
|
172 |
#' @param values Numeric vector indicating the lengths of each segment. |
|
|
173 |
#' @param colors Character vector specifying the colors for each segment. |
|
|
174 |
#' @param labels Logical, whether to add labels to each segment. |
|
|
175 |
#' @importFrom grid gpar unit |
|
|
176 |
#' @importFrom spiralize spiral_rect spiral_text spiral_initialize spiral_track |
|
|
177 |
#' @export |
|
|
178 |
#' @return No return value, called for side effects. This function generates a spiral plot and optionally adds labels. |
|
|
179 |
#' @examples |
|
|
180 |
#' # Example: Creating a spiral plot using the spiral_newrle function |
|
|
181 |
#' |
|
|
182 |
#' # Define example data |
|
|
183 |
#' x <- c("A", "A", "B", "C") |
|
|
184 |
#' samples <- c("Sample1", "Sample1", "Sample2", "Sample2") |
|
|
185 |
#' values <- c(20, 30, 15, 35) |
|
|
186 |
#' colors <- c("red", "blue", "green", "purple") |
|
|
187 |
#' labels <- TRUE |
|
|
188 |
#' |
|
|
189 |
#' # Initialize the spiral plot, setting the x-axis range and scaling |
|
|
190 |
#' spiralize::spiral_initialize(xlim = c(0, sum(values)), scale_by = "curve_length", |
|
|
191 |
#' vp_param = list(x = grid::unit(0, "npc"), just = "left")) |
|
|
192 |
#' |
|
|
193 |
#' # Create a track for the spiral plot |
|
|
194 |
#' spiralize::spiral_track(height = 0.5) |
|
|
195 |
#' |
|
|
196 |
#' # Add segments to the spiral plot using run-length encoding |
|
|
197 |
#' spiral_newrle(x, samples, values, colors, labels) |
|
|
198 |
#' |
|
|
199 |
spiral_newrle <- function(x, samples, values, colors, labels = FALSE) { |
|
|
200 |
x <- as.vector(x) # Ensure x is a vector |
|
|
201 |
samples <- as.vector(samples) # Ensure samples is a vector |
|
|
202 |
values <- as.numeric(values) # Ensure values are numeric |
|
|
203 |
position_start <- 0 # Initialize starting position |
|
|
204 |
current_sample <- samples[1] # Start with the first sample |
|
|
205 |
cumulative_start <- position_start # Initialize cumulative start for labels |
|
|
206 |
|
|
|
207 |
# Loop through each value |
|
|
208 |
for (i in seq_along(values)) { |
|
|
209 |
position_end <- position_start + values[i] # Calculate end position |
|
|
210 |
|
|
|
211 |
# Use the specified color, defaulting to red if missing |
|
|
212 |
color <- if (!is.na(colors[i])) colors[i] else "red" |
|
|
213 |
|
|
|
214 |
# Draw the segment in the spiral |
|
|
215 |
spiralize::spiral_rect(position_start, 0, position_end, 1, gp = grid::gpar(fill = color, col = NA)) |
|
|
216 |
|
|
|
217 |
# Check for sample change or last element |
|
|
218 |
if (i == length(values) || samples[i + 1] != current_sample) { |
|
|
219 |
if (labels) { |
|
|
220 |
label_position <- (cumulative_start + position_end) / 2 |
|
|
221 |
spiralize::spiral_text(label_position, 0.5, current_sample, facing = "curved_inside", nice_facing = TRUE) |
|
|
222 |
} |
|
|
223 |
cumulative_start <- position_end # Reset for next sample |
|
|
224 |
if (i < length(values)) { |
|
|
225 |
current_sample <- samples[i + 1] |
|
|
226 |
} |
|
|
227 |
} |
|
|
228 |
|
|
|
229 |
position_start <- position_end # Move to next start position |
|
|
230 |
} |
|
|
231 |
} |
|
|
232 |
|
|
|
233 |
|
|
|
234 |
|
|
|
235 |
|
|
|
236 |
|
|
|
237 |
|
|
|
238 |
#' Create Spiral Plots with Legends Using 'spiralize' and 'ComplexHeatmap' |
|
|
239 |
#' |
|
|
240 |
#' This function initializes a spiral plot, adds tracks for pathways and samples, |
|
|
241 |
#' and generates legends based on the sample and pathway information in the provided data frame. |
|
|
242 |
#' It uses 'spiralize' for the spiral plot and 'ComplexHeatmap' for handling legends. |
|
|
243 |
#' |
|
|
244 |
#' @param results A data frame containing 'Pathway', 'Sample', 'Value', 'PathwayColor', and 'SampleColor' columns. |
|
|
245 |
#' @importFrom grid gpar |
|
|
246 |
#' @importFrom spiralize spiral_initialize spiral_track |
|
|
247 |
#' @importFrom ComplexHeatmap packLegend Legend draw |
|
|
248 |
#' @importFrom ggplot2 unit |
|
|
249 |
#' @export |
|
|
250 |
#' @return No return value, called for side effects. This function generates spiral plots and adds legends based on sample and pathway information. |
|
|
251 |
#' @examples |
|
|
252 |
#' # Example: Creating enrichment spiral plots with legends |
|
|
253 |
#' |
|
|
254 |
#' # Define the results data frame |
|
|
255 |
#' results <- data.frame( |
|
|
256 |
#' Pathway = c("Pathway1", "Pathway1", "Pathway2", "Pathway2", "Pathway3"), |
|
|
257 |
#' Sample = c("Sample1", "Sample1", "Sample2", "Sample2", "Sample3"), |
|
|
258 |
#' Value = c(20, 30, 15, 35, 25), |
|
|
259 |
#' PathwayColor = c("red", "red", "blue", "blue", "orange"), |
|
|
260 |
#' SampleColor = c("green", "green", "purple", "purple", "cyan"), |
|
|
261 |
#' stringsAsFactors = FALSE |
|
|
262 |
#' ) |
|
|
263 |
#' |
|
|
264 |
#' # Create the enrichment spiral plots with legends |
|
|
265 |
#' enrichment_spiral_plots(results) |
|
|
266 |
#' |
|
|
267 |
enrichment_spiral_plots <- function(results) { |
|
|
268 |
|
|
|
269 |
if (!requireNamespace("systemfonts", quietly = TRUE)) { |
|
|
270 |
stop("ggplot2 is required to use the function. Please install it.", call. = FALSE) |
|
|
271 |
} |
|
|
272 |
|
|
|
273 |
# Calculate the total value for setting the x-axis range |
|
|
274 |
n <- sum(results$Value) |
|
|
275 |
|
|
|
276 |
# Initialize the spiral plot |
|
|
277 |
spiralize::spiral_initialize(xlim = c(0, n), scale_by = "curve_length", |
|
|
278 |
vp_param = list(x = ggplot2::unit(0, "npc"), just = "left")) |
|
|
279 |
|
|
|
280 |
# Add a track for pathways |
|
|
281 |
spiralize::spiral_track(height = 0.4) |
|
|
282 |
spiral_newrle(results$Pathway, results$Sample, results$Value, results$PathwayColor, labels = FALSE) |
|
|
283 |
|
|
|
284 |
# Add a track for samples |
|
|
285 |
spiralize::spiral_track(height = 0.4) |
|
|
286 |
spiral_newrle(results$Sample, results$Sample, results$Value, results$SampleColor, labels = TRUE) |
|
|
287 |
|
|
|
288 |
# Generate legends based on sample, using unique pathway and color information |
|
|
289 |
lgd_list <- tapply(1:nrow(results), results$Sample, function(ind) { |
|
|
290 |
ComplexHeatmap::Legend(title = results$Sample[ind][1], at = unique(results$Pathway[ind]), |
|
|
291 |
legend_gp = grid::gpar(fill = unique(results$PathwayColor[ind]))) |
|
|
292 |
}) |
|
|
293 |
|
|
|
294 |
# Set the maximum height for the legends and draw them |
|
|
295 |
lgd <- ComplexHeatmap::packLegend(list = lgd_list, max_height = ggplot2::unit(7, "inch")) |
|
|
296 |
ComplexHeatmap::draw(lgd, x = ggplot2::unit(1, "npc") + ggplot2::unit(1, "mm"), just = "left") |
|
|
297 |
} |
|
|
298 |
|
|
|
299 |
|
|
|
300 |
|
|
|
301 |
|