Diff of /R/LimmaAnalyze.R [000000] .. [0f2269]

Switch to unified view

a b/R/LimmaAnalyze.R
1
#' Differential Gene Expression Analysis using limma and voom
2
#'
3
#' This function performs differential gene expression analysis using the 'limma' package with voom normalization.
4
#' It reads tumor and normal expression data, merges them, filters low-expressed genes,
5
#' normalizes the data, performs limma analysis, and outputs the results along with information
6
#' on gene expression changes.
7
#'
8
#' @importFrom limma lmFit contrasts.fit eBayes topTable
9
#' @importFrom edgeR DGEList filterByExpr calcNormFactors
10
#' @importFrom dplyr mutate
11
#' @importFrom stats na.omit
12
#' @param tumor_file Path to the tumor data file (RDS format).
13
#' @param normal_file Path to the normal data file (RDS format).
14
#' @param output_file Path to save the output DEG data (RDS format).
15
#' @param logFC_threshold Threshold for log fold change for marking up/down-regulated genes.
16
#' @param p_value_threshold Threshold for p-value for filtering significant genes.
17
#' @return A data frame of differential expression results.
18
#' @references
19
#' limma:Linear Models for Microarray and RNA-Seq Data User’s Guide.
20
#' For more information, visit the page:
21
#' https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf
22
#' @export
23
#'
24
#' @examples
25
#' # Define file paths for tumor and normal data from the data folder
26
#' tumor_file <- system.file("extdata",
27
#'                           "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
28
#'                           package = "TransProR")
29
#' normal_file <- system.file("extdata",
30
#'                            "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
31
#'                            package = "TransProR")
32
#' output_file <- file.path(tempdir(), "DEG_limma_voom.rds")
33
#'
34
#' DEG_limma_voom <- limma_analyze(
35
#'   tumor_file = tumor_file,
36
#'   normal_file = normal_file,
37
#'   output_file = output_file,
38
#'   logFC_threshold = 2.5,
39
#'   p_value_threshold = 0.01
40
#' )
41
#'
42
#' # View the top 5 rows of the result
43
#' head(DEG_limma_voom, 5)
44
limma_analyze <- function(tumor_file, normal_file, output_file, logFC_threshold = 2.5, p_value_threshold = 0.01) {
45
  tumor <- readRDS(tumor_file)
46
  normal <- readRDS(normal_file)
47
48
  # Merge the datasets, ensuring both have genes as row names
49
  all_count_exp <- merge(tumor, normal, by = "row.names")
50
  all_count_exp <- tibble::column_to_rownames(all_count_exp, var = "Row.names")
51
52
  # Define groups for tumor and normal samples
53
  group <- c(rep('tumor', ncol(tumor)), rep('normal', ncol(normal)))
54
  group <- factor(group, levels = c("normal", "tumor"))
55
  group_table <- table(group)
56
57
  message("Group Table:")
58
  message(paste(names(group_table), group_table, sep = ": ", collapse = "\n"))
59
  # Add a space after the output for separation
60
  message(" ")
61
62
  # Create matrix
63
  design <- model.matrix(~0 + factor(group))
64
  colnames(design) <- levels(factor(group))
65
  rownames(design) <- colnames(all_count_exp)
66
67
  # Create DGEList object for gene expression data and group information
68
  dge <- edgeR::DGEList(counts = all_count_exp, group = group)
69
70
  # Filter lowly expressed genes
71
  keep <- edgeR::filterByExpr(dge)
72
  dge <- dge[keep, , keep.lib.sizes = FALSE]
73
74
  # The first step (TMM) scales the raw counts to adjust for library size differences, while the second step (quantile normalization in voom) ensures that the overall distribution of gene expression values is consistent across samples.
75
  # Normalize the data using the TMM method
76
  dge <- edgeR::calcNormFactors(dge)
77
  # Use voom method for normalization:Quantile Normalization
78
  v <- limma::voom(dge, design, plot = FALSE, normalize = "quantile")
79
80
  # Fit the linear model
81
  fit <- limma::lmFit(v, design)
82
83
  # Specify contrast
84
  con <- paste(rev(levels(group)), collapse = "-")
85
86
  # Create contrast matrix
87
  cont.matrix <- limma::makeContrasts(contrasts = c(con), levels = design)
88
  fit2 <- limma::contrasts.fit(fit, cont.matrix)
89
  fit2 <- limma::eBayes(fit2)
90
91
  # Get differential expression results
92
  tempOutput <- limma::topTable(fit2, coef = con, n = Inf)
93
  DEG_limma_voom <- stats::na.omit(tempOutput)
94
95
  # Add 'change' column to mark up/down-regulated genes
96
  k1 <- (DEG_limma_voom$P.Value < p_value_threshold) & (DEG_limma_voom$logFC < -logFC_threshold)
97
  k2 <- (DEG_limma_voom$P.Value < p_value_threshold) & (DEG_limma_voom$logFC > logFC_threshold)
98
  DEG_limma_voom <- dplyr::mutate(DEG_limma_voom, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
99
100
  change_table <- table(DEG_limma_voom$change)
101
102
  message("Change Table:")
103
  message(paste(names(change_table), change_table, sep = ": ", collapse = "\n"))
104
  # Add a space after the output for separation
105
  message(" ")
106
107
108
  # Save results to the specified output file
109
  #save(DEG_limma_voom, file = output_file)
110
  saveRDS(DEG_limma_voom, file = output_file)
111
112
  return(DEG_limma_voom)
113
}
114
115