|
a |
|
b/preprocessing/Preprocessing_scRNA_Galen_AML.R |
|
|
1 |
library(Matrix) |
|
|
2 |
library(Seurat) |
|
|
3 |
source("/research/users/ppolonen/git_home/common_scripts/scRNA/functions.scRNA.analysis.R") |
|
|
4 |
|
|
|
5 |
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/scRNA") |
|
|
6 |
|
|
|
7 |
name="AML_Galen" |
|
|
8 |
|
|
|
9 |
# read data |
|
|
10 |
files=list.files("/research/groups/biowhat_share/public_data/scRNAseq/GSE116256_Galen_AML/","dem.txt", full.names = T) |
|
|
11 |
|
|
|
12 |
files=files[grepl("D0", files)] |
|
|
13 |
files=files |
|
|
14 |
data=do.call(cbind, parallel::mclapply(files, read.delim, row.names=1, mc.cores=8)) |
|
|
15 |
|
|
|
16 |
files_anno=list.files("/research/groups/biowhat_share/public_data/scRNAseq/GSE116256_Galen_AML/","_AML.*.anno.txt", full.names = T) |
|
|
17 |
|
|
|
18 |
anno.list=lapply(files_anno[grepl("D0", files_anno)], function(f){ |
|
|
19 |
mat <- read.delim(file = f, stringsAsFactors = F) |
|
|
20 |
}) |
|
|
21 |
|
|
|
22 |
batch=gsub("_.*.", "", colnames(data)) |
|
|
23 |
|
|
|
24 |
test1=sc.data.analysis(scmat = data, regress.cell.label = batch, batch.correction.method = "MNNcorrect", name=name, nr.pcs = 30, check.pcs=F, plot.umap = T, nFeature.min = 100, nFeature.max = 3000, percent.mitoDNA = 10) |
|
|
25 |
|
|
|
26 |
# Each patient: |
|
|
27 |
ids=gsub(".dem.txt|/research/groups/biowhat_share/public_data/scRNAseq/GSE116256_Galen_AML//GSM35879.._","", files) |
|
|
28 |
|
|
|
29 |
for(i in seq(files)){ |
|
|
30 |
|
|
|
31 |
datas=read.delim(files[i], row.names=1) |
|
|
32 |
|
|
|
33 |
if(dim(datas)[2]>1000)test3=sc.data.analysis(scmat = datas, name = ids[i], nr.pcs = 15, check.pcs=F, plot.umap = T, nFeature.min = 100, nFeature.max = 3000, percent.mitoDNA = 10) |
|
|
34 |
|
|
|
35 |
} |