Diff of /R/ProcessHeatdata.R [000000] .. [0f2269]

Switch to unified view

a b/R/ProcessHeatdata.R
1
#' Process Heatmap Data with Various Selection Options
2
#'
3
#' This function processes heatmap data (`heatdata`) based on a given selection option.
4
#' It allows customization of column names, selection of specific columns per group,
5
#' or averaging columns based on a common prefix.
6
#'
7
#' @param heatdata A data frame containing the heatmap data.
8
#' @param selection An integer specifying the processing method:
9
#' - 1: Use custom names for columns.
10
#' - 2: Select a given number of columns per group based on a prefix.
11
#' - 3: Calculate the average of columns per group based on a prefix.
12
#' @param custom_names A character vector of custom names for columns (used when `selection = 1`).
13
#' The length of this vector must match the number of columns in `heatdata`.
14
#' @param num_names_per_group An integer specifying the number of columns to select per group (used when `selection = 2`).
15
#' @param prefix_length An integer specifying the length of the prefix for grouping columns (used when `selection = 2` or `selection = 3`).
16
#' Default is 4.
17
#' @return A processed data frame based on the specified selection option.
18
#' @export
19
#'
20
#' @examples
21
#' # Example heatmap data frame
22
#' heatdata <- data.frame(
23
#'   groupA_1 = c(1, 2, 3),
24
#'   groupA_2 = c(4, 5, 6),
25
#'   groupB_1 = c(7, 8, 9),
26
#'   groupB_2 = c(10, 11, 12)
27
#' )
28
#'
29
#' # Selection 1: Use custom names for columns
30
#' custom_names <- c("Sample1", "Sample2", "Sample3", "Sample4")
31
#' processed_data1 <- process_heatdata(heatdata, selection = 1, custom_names = custom_names)
32
#'
33
#' # Selection 2: Select a given number of columns per group based on a prefix
34
#' processed_data2 <- process_heatdata(heatdata, selection = 2, num_names_per_group = 1)
35
#'
36
#' # Selection 3: Calculate the average of columns per group based on a prefix
37
#' processed_data3 <- process_heatdata(heatdata, selection = 3, prefix_length = 6)
38
39
process_heatdata <- function(heatdata,
40
                             selection = 1,
41
                             custom_names = NULL,
42
                             num_names_per_group = NULL,
43
                             prefix_length = 4) {
44
45
  if (selection == 1) {
46
    # Option 1: Use custom names for columns
47
    if (length(custom_names) != ncol(heatdata)) {
48
      stop("Length of custom_names must match number of columns in heatdata")
49
    }
50
    names(heatdata) <- custom_names
51
  } else if (selection == 2) {
52
    # Option 2: Select a given number of columns per group based on a prefix
53
    group_names <- unique(substr(names(heatdata), 1, prefix_length))  # Get unique group names based on prefix
54
    selected_columns <- integer(0)
55
    new_names <- character(0)
56
57
    for (group in group_names) {
58
      group_cols <- grep(group, names(heatdata))  # Find columns for each group
59
      num_selected <- min(length(group_cols), num_names_per_group)
60
      selected_columns <- c(selected_columns, sample(group_cols, num_selected))
61
62
      # Generate new names for selected columns
63
      new_group_names <- paste(group, seq_len(num_selected), sep = "_")
64
      new_names <- c(new_names, new_group_names)
65
    }
66
67
    heatdata <- heatdata[, selected_columns, drop = FALSE]  # Keep only selected columns
68
    names(heatdata) <- new_names
69
  } else if (selection == 3) {
70
    # Option 3: Calculate the average of columns per group based on a prefix
71
    group_names <- unique(substr(names(heatdata), 1, prefix_length))
72
73
    # Create a list to collect mean values for each group
74
    mean_list <- list()
75
76
    # Calculate mean values for each group and store them in the list
77
    for (group in group_names) {
78
      group_cols <- grep(group, names(heatdata), value = TRUE)
79
      mean_list[[paste(group, "mean", sep = "_")]] <- rowMeans(heatdata[, group_cols], na.rm = TRUE)
80
    }
81
82
    # Convert list to data frame and set row names
83
    heatdata <- as.data.frame(do.call(cbind, mean_list))
84
    rownames(heatdata) <- rownames(heatdata)
85
  } else {
86
    stop("Invalid selection parameter")
87
  }
88
89
  return(heatdata)
90
}