|
a |
|
b/R/ProcessHeatdata.R |
|
|
1 |
#' Process Heatmap Data with Various Selection Options |
|
|
2 |
#' |
|
|
3 |
#' This function processes heatmap data (`heatdata`) based on a given selection option. |
|
|
4 |
#' It allows customization of column names, selection of specific columns per group, |
|
|
5 |
#' or averaging columns based on a common prefix. |
|
|
6 |
#' |
|
|
7 |
#' @param heatdata A data frame containing the heatmap data. |
|
|
8 |
#' @param selection An integer specifying the processing method: |
|
|
9 |
#' - 1: Use custom names for columns. |
|
|
10 |
#' - 2: Select a given number of columns per group based on a prefix. |
|
|
11 |
#' - 3: Calculate the average of columns per group based on a prefix. |
|
|
12 |
#' @param custom_names A character vector of custom names for columns (used when `selection = 1`). |
|
|
13 |
#' The length of this vector must match the number of columns in `heatdata`. |
|
|
14 |
#' @param num_names_per_group An integer specifying the number of columns to select per group (used when `selection = 2`). |
|
|
15 |
#' @param prefix_length An integer specifying the length of the prefix for grouping columns (used when `selection = 2` or `selection = 3`). |
|
|
16 |
#' Default is 4. |
|
|
17 |
#' @return A processed data frame based on the specified selection option. |
|
|
18 |
#' @export |
|
|
19 |
#' |
|
|
20 |
#' @examples |
|
|
21 |
#' # Example heatmap data frame |
|
|
22 |
#' heatdata <- data.frame( |
|
|
23 |
#' groupA_1 = c(1, 2, 3), |
|
|
24 |
#' groupA_2 = c(4, 5, 6), |
|
|
25 |
#' groupB_1 = c(7, 8, 9), |
|
|
26 |
#' groupB_2 = c(10, 11, 12) |
|
|
27 |
#' ) |
|
|
28 |
#' |
|
|
29 |
#' # Selection 1: Use custom names for columns |
|
|
30 |
#' custom_names <- c("Sample1", "Sample2", "Sample3", "Sample4") |
|
|
31 |
#' processed_data1 <- process_heatdata(heatdata, selection = 1, custom_names = custom_names) |
|
|
32 |
#' |
|
|
33 |
#' # Selection 2: Select a given number of columns per group based on a prefix |
|
|
34 |
#' processed_data2 <- process_heatdata(heatdata, selection = 2, num_names_per_group = 1) |
|
|
35 |
#' |
|
|
36 |
#' # Selection 3: Calculate the average of columns per group based on a prefix |
|
|
37 |
#' processed_data3 <- process_heatdata(heatdata, selection = 3, prefix_length = 6) |
|
|
38 |
|
|
|
39 |
process_heatdata <- function(heatdata, |
|
|
40 |
selection = 1, |
|
|
41 |
custom_names = NULL, |
|
|
42 |
num_names_per_group = NULL, |
|
|
43 |
prefix_length = 4) { |
|
|
44 |
|
|
|
45 |
if (selection == 1) { |
|
|
46 |
# Option 1: Use custom names for columns |
|
|
47 |
if (length(custom_names) != ncol(heatdata)) { |
|
|
48 |
stop("Length of custom_names must match number of columns in heatdata") |
|
|
49 |
} |
|
|
50 |
names(heatdata) <- custom_names |
|
|
51 |
} else if (selection == 2) { |
|
|
52 |
# Option 2: Select a given number of columns per group based on a prefix |
|
|
53 |
group_names <- unique(substr(names(heatdata), 1, prefix_length)) # Get unique group names based on prefix |
|
|
54 |
selected_columns <- integer(0) |
|
|
55 |
new_names <- character(0) |
|
|
56 |
|
|
|
57 |
for (group in group_names) { |
|
|
58 |
group_cols <- grep(group, names(heatdata)) # Find columns for each group |
|
|
59 |
num_selected <- min(length(group_cols), num_names_per_group) |
|
|
60 |
selected_columns <- c(selected_columns, sample(group_cols, num_selected)) |
|
|
61 |
|
|
|
62 |
# Generate new names for selected columns |
|
|
63 |
new_group_names <- paste(group, seq_len(num_selected), sep = "_") |
|
|
64 |
new_names <- c(new_names, new_group_names) |
|
|
65 |
} |
|
|
66 |
|
|
|
67 |
heatdata <- heatdata[, selected_columns, drop = FALSE] # Keep only selected columns |
|
|
68 |
names(heatdata) <- new_names |
|
|
69 |
} else if (selection == 3) { |
|
|
70 |
# Option 3: Calculate the average of columns per group based on a prefix |
|
|
71 |
group_names <- unique(substr(names(heatdata), 1, prefix_length)) |
|
|
72 |
|
|
|
73 |
# Create a list to collect mean values for each group |
|
|
74 |
mean_list <- list() |
|
|
75 |
|
|
|
76 |
# Calculate mean values for each group and store them in the list |
|
|
77 |
for (group in group_names) { |
|
|
78 |
group_cols <- grep(group, names(heatdata), value = TRUE) |
|
|
79 |
mean_list[[paste(group, "mean", sep = "_")]] <- rowMeans(heatdata[, group_cols], na.rm = TRUE) |
|
|
80 |
} |
|
|
81 |
|
|
|
82 |
# Convert list to data frame and set row names |
|
|
83 |
heatdata <- as.data.frame(do.call(cbind, mean_list)) |
|
|
84 |
rownames(heatdata) <- rownames(heatdata) |
|
|
85 |
} else { |
|
|
86 |
stop("Invalid selection parameter") |
|
|
87 |
} |
|
|
88 |
|
|
|
89 |
return(heatdata) |
|
|
90 |
} |