a b/deseq_analysis.R
1
if (!requireNamespace("BiocManager", quietly = TRUE))
2
  install.packages("BiocManager")
3
BiocManager::install("DESeq2")
4
5
#load the DESeq2 package
6
library(DESeq2)
7
8
#load RNA-seq count data
9
counts <- read.delim("GSE103584_R01_NSCLC_RNAseq.txt", header=TRUE, row.names=1)
10
11
#replace NAs with zeros
12
counts[is.na(counts)] <- 0
13
#apply the integer conversion to all columns except the first
14
counts[, -1] <- lapply(counts[, -1], as.integer)
15
counts$`R01.023` <- as.integer(counts$`R01.023`)
16
counts$`R01.024` <- as.integer(counts$`R01.024`)
17
18
19
#read in metadata (sample information)
20
colData <- read.delim("sample_metadata.txt", header=TRUE, row.names=1)
21
rownames(colData) <- gsub("-", ".", rownames(colData))
22
23
#then we use these row names as the column names for counts
24
colnames(counts) <- rownames(colData)
25
#Convert the treatment column in colData to a factor
26
colData$treatment <- as.factor(colData$treatment)
27
28
#check for NA values in the counts matrix
29
if (any(is.na(counts))) {
30
  cat("NA values found in the counts matrix.\n")
31
  # Replace NA values with zeros
32
  counts[is.na(counts)] <- 0
33
}
34
35
#confirm that there are no more NA values
36
if (any(is.na(counts))) {
37
  cat("NA values are still present after replacement.\n")
38
} else {
39
  cat("No NA values are present. Proceeding with DESeq2.\n")
40
}
41
42
#proceed with DESeq2 if no NAs are present
43
dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, design = ~ treatment)
44
45
#run the DESeq pipeline
46
dds <- DESeq(dds)
47
48
#running the differential analysis
49
res <- results(dds, contrast=c("treatment", "male", "female"))
50
51
#ordering results by significance
52
resOrdered <- res[order(res$pvalue),]
53
54
#exporting results to csv
55
write.csv(as.data.frame(resOrdered), file="DESeq2_results.csv", row.names=TRUE)