|
a |
|
b/preprocessing/Preprocessing_scRNA_PB_Citeseq.R |
|
|
1 |
library(Matrix) |
|
|
2 |
library(Seurat) |
|
|
3 |
source("/research/users/ppolonen/git_home/common_scripts/scRNA/functions.scRNA.analysis.R") |
|
|
4 |
|
|
|
5 |
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/scRNA") |
|
|
6 |
|
|
|
7 |
name="CD8_Citeseq" |
|
|
8 |
|
|
|
9 |
options(future.globals.maxSize= 4991289600) |
|
|
10 |
|
|
|
11 |
# files=list.files(".", "matrix.mtx", full.names = T, recursive = T) |
|
|
12 |
files=list.files("/research/groups/sysgen/PROJECTS/students/citeseq/", "matrix.mtx", full.names = T, recursive = T) |
|
|
13 |
files=gsub("matrix.mtx.gz", "", files) |
|
|
14 |
ids=c("cd8_donor1", "cd8_donor2", "cd8_donor3", "cd8_donor4") |
|
|
15 |
names(files)=ids |
|
|
16 |
|
|
|
17 |
# Each patient: |
|
|
18 |
# for(i in seq(ids)[c(1,2,3,4)]){ |
|
|
19 |
# datas=Read10X(data.dir = files[i]) |
|
|
20 |
# rownames(datas[[2]])=gsub("_TotalSeqC","", rownames(datas[[2]])) |
|
|
21 |
# names(datas)=c("RNA", "PROT") |
|
|
22 |
# test3=sc.data.analysis(scmat = datas, name = ids[i], nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 6) |
|
|
23 |
# } |
|
|
24 |
|
|
|
25 |
data=Read10X(data.dir = files[c(1,2,3,4)]) |
|
|
26 |
rownames(data[[2]])=gsub("_TotalSeqC","", rownames(data[[2]])) |
|
|
27 |
names(data)=c("RNA", "PROT") |
|
|
28 |
|
|
|
29 |
# take data with enough both protein counts: |
|
|
30 |
filt=colSums(data[[2]])>2647&colSums(data[[2]])<6664 |
|
|
31 |
filt.genes=rowSums(data[[1]]>0)>20 |
|
|
32 |
|
|
|
33 |
data[[1]]=data[[1]][filt.genes,filt] |
|
|
34 |
|
|
|
35 |
data[[2]]=data[[2]][,filt] |
|
|
36 |
|
|
|
37 |
batch=substr(colnames(data[[1]]), 1,10) |
|
|
38 |
|
|
|
39 |
test1=sc.data.analysis(scmat = data, regress.cell.label = batch, batch.correction.method = "MNNcorrect", name="CD8_Citeseq", nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 2) |
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
# sbatch |
|
|
44 |
library(Matrix) |
|
|
45 |
library(Seurat) |
|
|
46 |
source("functions.scRNA.analysis.R") |
|
|
47 |
|
|
|
48 |
future::plan("multiprocess", workers = 10) |
|
|
49 |
options(future.globals.maxSize= 8991289600) |
|
|
50 |
|
|
|
51 |
name="CD8_Citeseq" |
|
|
52 |
|
|
|
53 |
options(future.globals.maxSize= 4991289600) |
|
|
54 |
|
|
|
55 |
# files=list.files(".", "matrix.mtx", full.names = T, recursive = T) |
|
|
56 |
files=list.files(".", "matrix.mtx", full.names = T, recursive = T) |
|
|
57 |
files=gsub("matrix.mtx.gz", "", files) |
|
|
58 |
ids=c("cd8_donor1", "cd8_donor2", "cd8_donor3", "cd8_donor4") |
|
|
59 |
names(files)=ids |
|
|
60 |
|
|
|
61 |
# Each patient: |
|
|
62 |
# for(i in seq(ids)[c(1,2,3,4)]){ |
|
|
63 |
# datas=Read10X(data.dir = files[i]) |
|
|
64 |
# rownames(datas[[2]])=gsub("_TotalSeqC","", rownames(datas[[2]])) |
|
|
65 |
# names(datas)=c("RNA", "PROT") |
|
|
66 |
# test3=sc.data.analysis(scmat = datas, name = ids[i], nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 6) |
|
|
67 |
# } |
|
|
68 |
|
|
|
69 |
data=Read10X(data.dir = files[c(1,2,3,4)]) |
|
|
70 |
rownames(data[[2]])=gsub("_TotalSeqC","", rownames(data[[2]])) |
|
|
71 |
names(data)=c("RNA", "PROT") |
|
|
72 |
|
|
|
73 |
# take data with enough protein counts: |
|
|
74 |
filt=colSums(data[[2]])>2647&colSums(data[[2]])<6664 |
|
|
75 |
filt.genes=rowSums(data[[1]]>0)>20 |
|
|
76 |
|
|
|
77 |
data[[1]]=data[[1]][filt.genes,filt] |
|
|
78 |
|
|
|
79 |
data[[2]]=data[[2]][,filt] |
|
|
80 |
|
|
|
81 |
batch=substr(colnames(data[[1]]), 1,10) |
|
|
82 |
|
|
|
83 |
test1=sc.data.analysis(scmat = data, regress.cell.label = batch, batch.correction.method = "MNNcorrect", name="CD8_Citeseq", nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 2) |