[28fc72]: / deseq_analysis.R

Download this file

56 lines (42 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
#load the DESeq2 package
library(DESeq2)
#load RNA-seq count data
counts <- read.delim("GSE103584_R01_NSCLC_RNAseq.txt", header=TRUE, row.names=1)
#replace NAs with zeros
counts[is.na(counts)] <- 0
#apply the integer conversion to all columns except the first
counts[, -1] <- lapply(counts[, -1], as.integer)
counts$`R01.023` <- as.integer(counts$`R01.023`)
counts$`R01.024` <- as.integer(counts$`R01.024`)
#read in metadata (sample information)
colData <- read.delim("sample_metadata.txt", header=TRUE, row.names=1)
rownames(colData) <- gsub("-", ".", rownames(colData))
#then we use these row names as the column names for counts
colnames(counts) <- rownames(colData)
#Convert the treatment column in colData to a factor
colData$treatment <- as.factor(colData$treatment)
#check for NA values in the counts matrix
if (any(is.na(counts))) {
cat("NA values found in the counts matrix.\n")
# Replace NA values with zeros
counts[is.na(counts)] <- 0
}
#confirm that there are no more NA values
if (any(is.na(counts))) {
cat("NA values are still present after replacement.\n")
} else {
cat("No NA values are present. Proceeding with DESeq2.\n")
}
#proceed with DESeq2 if no NAs are present
dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, design = ~ treatment)
#run the DESeq pipeline
dds <- DESeq(dds)
#running the differential analysis
res <- results(dds, contrast=c("treatment", "male", "female"))
#ordering results by significance
resOrdered <- res[order(res$pvalue),]
#exporting results to csv
write.csv(as.data.frame(resOrdered), file="DESeq2_results.csv", row.names=TRUE)