--- a +++ b/deseq_analysis.R @@ -0,0 +1,55 @@ +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("DESeq2") + +#load the DESeq2 package +library(DESeq2) + +#load RNA-seq count data +counts <- read.delim("GSE103584_R01_NSCLC_RNAseq.txt", header=TRUE, row.names=1) + +#replace NAs with zeros +counts[is.na(counts)] <- 0 +#apply the integer conversion to all columns except the first +counts[, -1] <- lapply(counts[, -1], as.integer) +counts$`R01.023` <- as.integer(counts$`R01.023`) +counts$`R01.024` <- as.integer(counts$`R01.024`) + + +#read in metadata (sample information) +colData <- read.delim("sample_metadata.txt", header=TRUE, row.names=1) +rownames(colData) <- gsub("-", ".", rownames(colData)) + +#then we use these row names as the column names for counts +colnames(counts) <- rownames(colData) +#Convert the treatment column in colData to a factor +colData$treatment <- as.factor(colData$treatment) + +#check for NA values in the counts matrix +if (any(is.na(counts))) { + cat("NA values found in the counts matrix.\n") + # Replace NA values with zeros + counts[is.na(counts)] <- 0 +} + +#confirm that there are no more NA values +if (any(is.na(counts))) { + cat("NA values are still present after replacement.\n") +} else { + cat("No NA values are present. Proceeding with DESeq2.\n") +} + +#proceed with DESeq2 if no NAs are present +dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, design = ~ treatment) + +#run the DESeq pipeline +dds <- DESeq(dds) + +#running the differential analysis +res <- results(dds, contrast=c("treatment", "male", "female")) + +#ordering results by significance +resOrdered <- res[order(res$pvalue),] + +#exporting results to csv +write.csv(as.data.frame(resOrdered), file="DESeq2_results.csv", row.names=TRUE)