Diff of /deseq_analysis.R [000000] .. [28fc72]

Switch to side-by-side view

--- a
+++ b/deseq_analysis.R
@@ -0,0 +1,55 @@
+if (!requireNamespace("BiocManager", quietly = TRUE))
+  install.packages("BiocManager")
+BiocManager::install("DESeq2")
+
+#load the DESeq2 package
+library(DESeq2)
+
+#load RNA-seq count data
+counts <- read.delim("GSE103584_R01_NSCLC_RNAseq.txt", header=TRUE, row.names=1)
+
+#replace NAs with zeros
+counts[is.na(counts)] <- 0
+#apply the integer conversion to all columns except the first
+counts[, -1] <- lapply(counts[, -1], as.integer)
+counts$`R01.023` <- as.integer(counts$`R01.023`)
+counts$`R01.024` <- as.integer(counts$`R01.024`)
+
+
+#read in metadata (sample information)
+colData <- read.delim("sample_metadata.txt", header=TRUE, row.names=1)
+rownames(colData) <- gsub("-", ".", rownames(colData))
+
+#then we use these row names as the column names for counts
+colnames(counts) <- rownames(colData)
+#Convert the treatment column in colData to a factor
+colData$treatment <- as.factor(colData$treatment)
+
+#check for NA values in the counts matrix
+if (any(is.na(counts))) {
+  cat("NA values found in the counts matrix.\n")
+  # Replace NA values with zeros
+  counts[is.na(counts)] <- 0
+}
+
+#confirm that there are no more NA values
+if (any(is.na(counts))) {
+  cat("NA values are still present after replacement.\n")
+} else {
+  cat("No NA values are present. Proceeding with DESeq2.\n")
+}
+
+#proceed with DESeq2 if no NAs are present
+dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, design = ~ treatment)
+
+#run the DESeq pipeline
+dds <- DESeq(dds)
+
+#running the differential analysis
+res <- results(dds, contrast=c("treatment", "male", "female"))
+
+#ordering results by significance
+resOrdered <- res[order(res$pvalue),]
+
+#exporting results to csv
+write.csv(as.data.frame(resOrdered), file="DESeq2_results.csv", row.names=TRUE)