|
a |
|
b/bin/csv_to_seurat.R |
|
|
1 |
# Seurat based analysis of GM12878 paired data scRNA component |
|
|
2 |
# Preparation for integration with ArchR |
|
|
3 |
# References |
|
|
4 |
# https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html |
|
|
5 |
# https://satijalab.org/seurat/v3.1/merge_vignette.html |
|
|
6 |
# https://learn.gencore.bio.nyu.edu/single-cell-rnaseq/loading-your-own-data-in-seurat-reanalyze-a-different-dataset/ |
|
|
7 |
rm(list = ls(all.names = TRUE)) # Clear environment |
|
|
8 |
|
|
|
9 |
library(dplyr) |
|
|
10 |
library(Seurat) |
|
|
11 |
|
|
|
12 |
option_list = list( |
|
|
13 |
make_option(c("-i", "--input"), type="character", default=NULL, |
|
|
14 |
help="Input csv file", metavar="character"), |
|
|
15 |
make_option(c("-p", "--project"), type="character", default="proj", |
|
|
16 |
help="Project name", metavar="character"), |
|
|
17 |
make_option(c("-o", "--output"), type="character", default=NULL, |
|
|
18 |
help="output file name", metavar = "character") |
|
|
19 |
); |
|
|
20 |
opt_parser = OptionParser(option_list=option_list); |
|
|
21 |
opt = parse_args(opt_parser); |
|
|
22 |
|
|
|
23 |
raw_counts <- read.table(file=opt$input, sep=",") |
|
|
24 |
sobj <- CreateSeuratObject(raw.data = raw_counts, min.cells = 3, min.genes = 200, project = opt$project) |
|
|
25 |
|
|
|
26 |
sobj[["percent.mt"]] <- PercentageFeatureSet(sobj, pattern = "^MT-") |
|
|
27 |
VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) |
|
|
28 |
|
|
|
29 |
# Remove some unwanted cells |
|
|
30 |
sobj <- subset(sobj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 25) |
|
|
31 |
|
|
|
32 |
# Normalize |
|
|
33 |
sobj <- NormalizeData(sobj, normalization.method = "LogNormalize", scale.factor = 10000) |
|
|
34 |
|
|
|
35 |
# Find variable features |
|
|
36 |
sobj <- FindVariableFeatures(sobj, selection.method = "vst", nfeatures = 2000) |
|
|
37 |
|
|
|
38 |
# Scale |
|
|
39 |
sobj <- ScaleData(sobj, features = rownames(sobj)) |
|
|
40 |
|
|
|
41 |
# PCA |
|
|
42 |
sobj <- RunPCA(sobj, features = VariableFeatures(object = sobj)) |
|
|
43 |
DimPlot(sobj, reduction="pca") |
|
|
44 |
ElbowPlot(sobj) |
|
|
45 |
|
|
|
46 |
# Cluster the cells |
|
|
47 |
sobj <- FindNeighbors(sobj, dims=1:10) # Appears to be the same for 20 or 10 dimensions |
|
|
48 |
sobj <- FindClusters(sobj, resolution=1) |
|
|
49 |
|
|
|
50 |
# Plot |
|
|
51 |
sobj <- RunUMAP(sobj, dims=1:20) |
|
|
52 |
DimPlot(sobj, reduction="umap") |
|
|
53 |
|
|
|
54 |
# Save |
|
|
55 |
saveRDS(sobj, opt$output) |