|
a |
|
b/deseq_analysis.R |
|
|
1 |
if (!requireNamespace("BiocManager", quietly = TRUE)) |
|
|
2 |
install.packages("BiocManager") |
|
|
3 |
BiocManager::install("DESeq2") |
|
|
4 |
|
|
|
5 |
#load the DESeq2 package |
|
|
6 |
library(DESeq2) |
|
|
7 |
|
|
|
8 |
#load RNA-seq count data |
|
|
9 |
counts <- read.delim("GSE103584_R01_NSCLC_RNAseq.txt", header=TRUE, row.names=1) |
|
|
10 |
|
|
|
11 |
#replace NAs with zeros |
|
|
12 |
counts[is.na(counts)] <- 0 |
|
|
13 |
#apply the integer conversion to all columns except the first |
|
|
14 |
counts[, -1] <- lapply(counts[, -1], as.integer) |
|
|
15 |
counts$`R01.023` <- as.integer(counts$`R01.023`) |
|
|
16 |
counts$`R01.024` <- as.integer(counts$`R01.024`) |
|
|
17 |
|
|
|
18 |
|
|
|
19 |
#read in metadata (sample information) |
|
|
20 |
colData <- read.delim("sample_metadata.txt", header=TRUE, row.names=1) |
|
|
21 |
rownames(colData) <- gsub("-", ".", rownames(colData)) |
|
|
22 |
|
|
|
23 |
#then we use these row names as the column names for counts |
|
|
24 |
colnames(counts) <- rownames(colData) |
|
|
25 |
#Convert the treatment column in colData to a factor |
|
|
26 |
colData$treatment <- as.factor(colData$treatment) |
|
|
27 |
|
|
|
28 |
#check for NA values in the counts matrix |
|
|
29 |
if (any(is.na(counts))) { |
|
|
30 |
cat("NA values found in the counts matrix.\n") |
|
|
31 |
# Replace NA values with zeros |
|
|
32 |
counts[is.na(counts)] <- 0 |
|
|
33 |
} |
|
|
34 |
|
|
|
35 |
#confirm that there are no more NA values |
|
|
36 |
if (any(is.na(counts))) { |
|
|
37 |
cat("NA values are still present after replacement.\n") |
|
|
38 |
} else { |
|
|
39 |
cat("No NA values are present. Proceeding with DESeq2.\n") |
|
|
40 |
} |
|
|
41 |
|
|
|
42 |
#proceed with DESeq2 if no NAs are present |
|
|
43 |
dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, design = ~ treatment) |
|
|
44 |
|
|
|
45 |
#run the DESeq pipeline |
|
|
46 |
dds <- DESeq(dds) |
|
|
47 |
|
|
|
48 |
#running the differential analysis |
|
|
49 |
res <- results(dds, contrast=c("treatment", "male", "female")) |
|
|
50 |
|
|
|
51 |
#ordering results by significance |
|
|
52 |
resOrdered <- res[order(res$pvalue),] |
|
|
53 |
|
|
|
54 |
#exporting results to csv |
|
|
55 |
write.csv(as.data.frame(resOrdered), file="DESeq2_results.csv", row.names=TRUE) |