Switch to unified view

a b/preprocessing/Preprocessing_scRNA_PB_Citeseq.R
1
library(Matrix)
2
library(Seurat)
3
source("/research/users/ppolonen/git_home/common_scripts/scRNA/functions.scRNA.analysis.R")
4
5
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/scRNA")
6
7
name="CD8_Citeseq"
8
9
options(future.globals.maxSize= 4991289600)
10
11
# files=list.files(".", "matrix.mtx", full.names = T, recursive = T)
12
files=list.files("/research/groups/sysgen/PROJECTS/students/citeseq/", "matrix.mtx", full.names = T, recursive = T)
13
files=gsub("matrix.mtx.gz", "", files)
14
ids=c("cd8_donor1", "cd8_donor2", "cd8_donor3", "cd8_donor4")
15
names(files)=ids
16
17
# Each patient:
18
# for(i in seq(ids)[c(1,2,3,4)]){
19
#   datas=Read10X(data.dir = files[i])
20
#   rownames(datas[[2]])=gsub("_TotalSeqC","", rownames(datas[[2]]))
21
#   names(datas)=c("RNA", "PROT")
22
#   test3=sc.data.analysis(scmat = datas, name = ids[i], nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 6)
23
# }
24
25
data=Read10X(data.dir = files[c(1,2,3,4)])
26
rownames(data[[2]])=gsub("_TotalSeqC","", rownames(data[[2]]))
27
names(data)=c("RNA", "PROT")
28
29
# take data with enough both protein counts:
30
filt=colSums(data[[2]])>2647&colSums(data[[2]])<6664
31
filt.genes=rowSums(data[[1]]>0)>20
32
33
data[[1]]=data[[1]][filt.genes,filt]
34
35
data[[2]]=data[[2]][,filt]
36
37
batch=substr(colnames(data[[1]]), 1,10)
38
39
test1=sc.data.analysis(scmat = data, regress.cell.label = batch, batch.correction.method = "MNNcorrect", name="CD8_Citeseq", nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 2)
40
41
42
43
# sbatch
44
library(Matrix)
45
library(Seurat)
46
source("functions.scRNA.analysis.R")
47
48
future::plan("multiprocess", workers = 10)
49
options(future.globals.maxSize= 8991289600)
50
51
name="CD8_Citeseq"
52
53
options(future.globals.maxSize= 4991289600)
54
55
# files=list.files(".", "matrix.mtx", full.names = T, recursive = T)
56
files=list.files(".", "matrix.mtx", full.names = T, recursive = T)
57
files=gsub("matrix.mtx.gz", "", files)
58
ids=c("cd8_donor1", "cd8_donor2", "cd8_donor3", "cd8_donor4")
59
names(files)=ids
60
61
# Each patient:
62
# for(i in seq(ids)[c(1,2,3,4)]){
63
#   datas=Read10X(data.dir = files[i])
64
#   rownames(datas[[2]])=gsub("_TotalSeqC","", rownames(datas[[2]]))
65
#   names(datas)=c("RNA", "PROT")
66
#   test3=sc.data.analysis(scmat = datas, name = ids[i], nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 6)
67
# }
68
69
data=Read10X(data.dir = files[c(1,2,3,4)])
70
rownames(data[[2]])=gsub("_TotalSeqC","", rownames(data[[2]]))
71
names(data)=c("RNA", "PROT")
72
73
# take data with enough protein counts:
74
filt=colSums(data[[2]])>2647&colSums(data[[2]])<6664
75
filt.genes=rowSums(data[[1]]>0)>20
76
77
data[[1]]=data[[1]][filt.genes,filt]
78
79
data[[2]]=data[[2]][,filt]
80
81
batch=substr(colnames(data[[1]]), 1,10)
82
83
test1=sc.data.analysis(scmat = data, regress.cell.label = batch, batch.correction.method = "MNNcorrect", name="CD8_Citeseq", nr.pcs = 50, check.pcs=F, plot.umap = T, nFeature.min = 200, nFeature.max = 4000, percent.mitoDNA = 10, cores = 2)