|
a |
|
b/common_scripts/scRNA/functions.scRNA.analysis.R |
|
|
1 |
#' |
|
|
2 |
#' |
|
|
3 |
plot.scRNA.features.complexHeatmap=function(s.anno, features, name,width=300, cores){ |
|
|
4 |
log=mclapply(seq(dim(s.anno)[1]), plot.scRNA.features, s.anno,features, name, mc.cores = cores, width=width) |
|
|
5 |
} |
|
|
6 |
|
|
|
7 |
plot.scRNA.features=function(i, s.anno, features, name, add.annotation=NULL, width=300){ |
|
|
8 |
# eccite PBMC dataset: |
|
|
9 |
load(s.anno[i,2]) |
|
|
10 |
|
|
|
11 |
#HSC, myeloid, B-solu, T-solu, erythroi |
|
|
12 |
# STEP1 find the cluster order, and set them based on lineage: |
|
|
13 |
clusters=Idents(scmat) |
|
|
14 |
if(s.anno[i,3]!=""){ |
|
|
15 |
order.clu=as.numeric(unlist(strsplit(s.anno[i,3], ",|, | ,"))) # MODIFY |
|
|
16 |
|
|
|
17 |
ord.names=names(clusters[order(match(clusters,order.clu))]) |
|
|
18 |
|
|
|
19 |
}else{ |
|
|
20 |
|
|
|
21 |
order.clu=levels(clusters) |
|
|
22 |
|
|
|
23 |
# order samples within cluster by umap component with more spread |
|
|
24 |
# this way there is still information within cluster, for heatmaps |
|
|
25 |
coords=Embeddings(scmat[["umap"]]) |
|
|
26 |
fit.cluster=levels(Idents(scmat)) |
|
|
27 |
|
|
|
28 |
ord.names=unlist(lapply(fit.cluster, function(clu){ |
|
|
29 |
d=coords[Idents(scmat)%in%clu,] |
|
|
30 |
ind=which.max(c(max(d[,1])-min(d[,1]), max(d[,2])-min(d[,2]))) # take the axis with most spread |
|
|
31 |
names(sort(d[,ind])) |
|
|
32 |
})) |
|
|
33 |
|
|
|
34 |
} |
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
features=data.frame(features, stringsAsFactors = F) |
|
|
39 |
|
|
|
40 |
if(!grepl("^B:|^N:", features[1,1])){ |
|
|
41 |
features[,1]=paste0("N:GEXP:", features[,1]) |
|
|
42 |
allfeats=rownames(fm)[!rowSums(fm>0)==0] |
|
|
43 |
|
|
|
44 |
}else{ |
|
|
45 |
allfeats=gsub("N:GEXP:|N:RPPA:", "", rownames(fm)[!rowSums(fm>0)==0]) |
|
|
46 |
} |
|
|
47 |
|
|
|
48 |
# text annot created if more columns than 1. |
|
|
49 |
if(dim(features)[2]>1){ |
|
|
50 |
features=features[features[,1]%in%allfeats,] |
|
|
51 |
feats=features[,1] |
|
|
52 |
t.annot=apply(features[,2:dim(features)[2], drop=F], 1, paste, collapse=" ") |
|
|
53 |
names(t.annot)=feats |
|
|
54 |
}else{ |
|
|
55 |
t.annot=NULL |
|
|
56 |
feats=features[features[,1]%in%allfeats,1] |
|
|
57 |
} |
|
|
58 |
|
|
|
59 |
# make a data frame for annotations on top of the heatmap |
|
|
60 |
annotdf=data.frame("cluster"=clusters,"HLAIScore"=as.numeric(fm["N:SAMP:HLAIScore",]), "HLAIIScore"=as.numeric(fm["N:SAMP:HLAIIScore",]), "CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",])) |
|
|
61 |
|
|
|
62 |
if(!is.null(add.annotation))annotdf$annotation=add.annotation |
|
|
63 |
|
|
|
64 |
name=gsub("__", "_", gsub("[[:punct:]]", "_", paste(name, s.anno[i,1], sep="_"))) |
|
|
65 |
|
|
|
66 |
# plot using a complexheatmap package using a featurematrix format and custom plotting function |
|
|
67 |
r1=plot.complexHM.fm(feats = feats, text_annot = t.annot, fm.f = fm, annotdf = annotdf, feats.barplot = c("HLAIScore", "HLAIIScore", "CytolyticScore"), order_columns=ord.names, order_rows=F, split.columns = T, plotting.param="/research/groups/sysgen/PROJECTS/students/HEMAP_IMMUNOLOGY/minna_work/scRNA_genelist_plotting/plotting_param_fm.txt", NAME=name, WIDTH = width) |
|
|
68 |
} |
|
|
69 |
|
|
|
70 |
plot.gene.seurat=function(s.anno, feature, name,cols=c("grey75", "red"), max=1, min=0, cores=1){ |
|
|
71 |
plot.feature=function(i, feature){ |
|
|
72 |
# plot proportions as a barplot next to 2D points |
|
|
73 |
load(s.anno[i,2]) |
|
|
74 |
|
|
|
75 |
if(!feature%in%rownames(scmat))return(NULL) |
|
|
76 |
|
|
|
77 |
p <- FeaturePlot(scmat, features = feature, cols = cols, max.cutoff = max, min.cutoff = min, order = T, combine = FALSE) |
|
|
78 |
p=lapply(p, `+`, labs(title = s.anno[i,1])) |
|
|
79 |
|
|
|
80 |
if(i>1){ |
|
|
81 |
for(i in 1:length(p)) { |
|
|
82 |
p[[i]] <- p[[i]] + NoLegend() + NoAxes() |
|
|
83 |
} |
|
|
84 |
}else{ |
|
|
85 |
p[[1]] <- p[[1]] + NoAxes() |
|
|
86 |
} |
|
|
87 |
|
|
|
88 |
cowplot::plot_grid(plotlist = p) |
|
|
89 |
} |
|
|
90 |
|
|
|
91 |
# plot here lineage proportion per patient |
|
|
92 |
plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores) |
|
|
93 |
plot.list=plot.list[!sapply(plot.list, is.null)] |
|
|
94 |
|
|
|
95 |
if(!length(plot.list))return(NULL) |
|
|
96 |
|
|
|
97 |
# 74mm * 99mm per panelwidth = 77, height = 74 |
|
|
98 |
figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
99 |
|
|
|
100 |
for(i in seq(plot.list)){ |
|
|
101 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
102 |
} |
|
|
103 |
|
|
|
104 |
multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) |
|
|
105 |
} |
|
|
106 |
|
|
|
107 |
plot.fm.feature=function(s.anno, feature, name, max=3, min=0, cores=1){ |
|
|
108 |
plot.feature=function(i, feature){ |
|
|
109 |
# plot proportions as a barplot next to 2D points |
|
|
110 |
load(s.anno[i,2]) |
|
|
111 |
|
|
|
112 |
scmat[[feature]]=fm[rownames(fm)%in%feature,] |
|
|
113 |
|
|
|
114 |
p <- FeaturePlot(scmat, features = feature, cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE) |
|
|
115 |
p=lapply(p, `+`, labs(title = s.anno[i,1])) |
|
|
116 |
|
|
|
117 |
if(i>1){ |
|
|
118 |
for(i in 1:length(p)) { |
|
|
119 |
p[[i]] <- p[[i]] + NoLegend() + NoAxes() |
|
|
120 |
} |
|
|
121 |
}else{ |
|
|
122 |
p[[1]] <- p[[1]] + NoAxes() |
|
|
123 |
} |
|
|
124 |
|
|
|
125 |
return(p) |
|
|
126 |
} |
|
|
127 |
|
|
|
128 |
# plot here lineage proportion per patient |
|
|
129 |
plot.list=unlist(mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores), recursive = F) |
|
|
130 |
|
|
|
131 |
# 74mm * 99mm per panelwidth = 77, height = 74 |
|
|
132 |
figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
133 |
|
|
|
134 |
for(i in seq(plot.list)){ |
|
|
135 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
136 |
} |
|
|
137 |
|
|
|
138 |
# 74mm * 99mm per panelwidth = 77, height = 74 |
|
|
139 |
figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
140 |
|
|
|
141 |
for(i in seq(plot.list)){ |
|
|
142 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
143 |
} |
|
|
144 |
|
|
|
145 |
multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) |
|
|
146 |
} |
|
|
147 |
|
|
|
148 |
plot.several.fm.feature=function(s.anno.object, features, name, min=0, max=5, cores=5){ |
|
|
149 |
load(s.anno.object) |
|
|
150 |
|
|
|
151 |
features=features[features%in%rownames(fm)] |
|
|
152 |
|
|
|
153 |
plot.feature=function(i, features){ |
|
|
154 |
# plot proportions as a barplot next to 2D points |
|
|
155 |
|
|
|
156 |
scmat[[features[i]]]=fm[rownames(fm)%in%features[i],] |
|
|
157 |
|
|
|
158 |
p <- FeaturePlot(scmat, features = features[i], cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE) |
|
|
159 |
p=lapply(p, `+`, labs(title = features[i])) |
|
|
160 |
|
|
|
161 |
if(i>1){ |
|
|
162 |
for(i in 1:length(p)) { |
|
|
163 |
p[[i]] <- p[[i]] + NoLegend() + NoAxes() |
|
|
164 |
} |
|
|
165 |
}else{ |
|
|
166 |
p[[1]] <- p[[1]] + NoAxes() |
|
|
167 |
} |
|
|
168 |
return(p) |
|
|
169 |
} |
|
|
170 |
|
|
|
171 |
# plot here lineage proportion per patient |
|
|
172 |
plot.list=unlist(mclapply(seq(features), plot.feature, features, mc.cores=cores), recursive = F) |
|
|
173 |
|
|
|
174 |
# 74mm * 99mm per panelwidth = 77, height = 74 |
|
|
175 |
figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
176 |
|
|
|
177 |
for(i in seq(plot.list)){ |
|
|
178 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
179 |
} |
|
|
180 |
|
|
|
181 |
multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) |
|
|
182 |
} |
|
|
183 |
|
|
|
184 |
plot.seurat.ident=function(s.anno, colorv, name, cores=1){ |
|
|
185 |
plot.feature=function(i){ |
|
|
186 |
# plot proportions as a barplot next to 2D points |
|
|
187 |
load(s.anno[i,2]) |
|
|
188 |
|
|
|
189 |
cells=gsub(" \\d+$", "", levels(Idents(scmat))) |
|
|
190 |
|
|
|
191 |
p=DimPlot(scmat, group.by = c("ident"), cols = colorv[match(cells, colorv[,1]),2], ncol = 1, label = T, repel = T) + NoLegend() + NoAxes() |
|
|
192 |
} |
|
|
193 |
|
|
|
194 |
# plot here lineage proportion per patient |
|
|
195 |
plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, mc.cores=cores) |
|
|
196 |
|
|
|
197 |
# 74mm * 99mm per panelwidth = 77, height = 74 |
|
|
198 |
figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
199 |
|
|
|
200 |
for(i in seq(plot.list)){ |
|
|
201 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
202 |
} |
|
|
203 |
|
|
|
204 |
multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) |
|
|
205 |
} |
|
|
206 |
|
|
|
207 |
|
|
|
208 |
#' |
|
|
209 |
plot.multi.scatter.scRNA=function(s.anno, scmat=NULL, colors.group, identityVector.samples.list=NULL, seurat.feature=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height = 74, text.size=10){ |
|
|
210 |
# tools: |
|
|
211 |
library(Matrix) |
|
|
212 |
library(Seurat) |
|
|
213 |
source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R") |
|
|
214 |
library(data.table) |
|
|
215 |
library(ComplexHeatmap) |
|
|
216 |
library(circlize) |
|
|
217 |
library(parallel) |
|
|
218 |
library(ggplot2) |
|
|
219 |
|
|
|
220 |
|
|
|
221 |
# plot here lineage proportion per patient |
|
|
222 |
plot.list=mclapply(seq(dim(s.anno)[1]), function(i){ |
|
|
223 |
# plot proportions as a barplot next to 2D points |
|
|
224 |
load(s.anno[i,2]) |
|
|
225 |
coords=Embeddings(scmat[["umap"]]) |
|
|
226 |
identityVector=unlist(strsplit(s.anno$Type[i], ", |,")) |
|
|
227 |
clusters=unlist(strsplit(s.anno$Clusters[i], ", |,")) |
|
|
228 |
clusters.samples=as.character(Idents(scmat)) |
|
|
229 |
|
|
|
230 |
if(is.null(identityVector.samples.list)){ |
|
|
231 |
identityVector.samples=clusters.samples |
|
|
232 |
|
|
|
233 |
for(j in seq(clusters)){ |
|
|
234 |
identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j] |
|
|
235 |
} |
|
|
236 |
}else{ |
|
|
237 |
identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list |
|
|
238 |
} |
|
|
239 |
|
|
|
240 |
if(!is.null(seurat.feature)){ |
|
|
241 |
identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list |
|
|
242 |
} |
|
|
243 |
|
|
|
244 |
name.sample=s.anno$Sample[i] |
|
|
245 |
|
|
|
246 |
# take the colors: |
|
|
247 |
colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2] |
|
|
248 |
|
|
|
249 |
a=plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, namev=name, main = name, add.proportions = T, SIZE = SIZE, add.legend=F, rasterize=rasterize, width=width, height = height, text.size=text.size) |
|
|
250 |
return(a) |
|
|
251 |
}, mc.cores=cores) |
|
|
252 |
|
|
|
253 |
# add legend last |
|
|
254 |
add.legend=get.only.legend(group = colors.group[,1], colorv = colors.group[,2]) |
|
|
255 |
|
|
|
256 |
nrows=ceiling(length(plot.list)/2)+1 |
|
|
257 |
# 74mm * 99mm per panelwidth = 77, height = 74 |
|
|
258 |
figure <-multipanelfigure:: multi_panel_figure(width = 2*width+width*0.2, height = ceiling(length(plot.list)/2)*height+120, rows = nrows, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
259 |
|
|
|
260 |
rowi=1 |
|
|
261 |
for(i in seq(plot.list)){ |
|
|
262 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
263 |
} |
|
|
264 |
|
|
|
265 |
figure <- multipanelfigure::fill_panel(figure,row = nrows,column = 1:2, add.legend) |
|
|
266 |
multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) |
|
|
267 |
} |
|
|
268 |
|
|
|
269 |
plot.scatter.seurat=function(scmat, colors.group,clusters="", identityVector="", identityVector.samples.list=NULL, seurat.feature=NULL, lv=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height=74, text.size=10, add.proportions=T, add.density=F, add.labels=F){ |
|
|
270 |
# tools: |
|
|
271 |
library(Matrix) |
|
|
272 |
library(Seurat) |
|
|
273 |
source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R") |
|
|
274 |
library(data.table) |
|
|
275 |
library(ComplexHeatmap) |
|
|
276 |
library(circlize) |
|
|
277 |
library(parallel) |
|
|
278 |
library(ggplot2) |
|
|
279 |
|
|
|
280 |
coords=Embeddings(scmat[["umap"]]) |
|
|
281 |
clusters.samples=as.character(Idents(scmat)) |
|
|
282 |
|
|
|
283 |
if(is.null(identityVector.samples.list)){ |
|
|
284 |
identityVector.samples=clusters.samples |
|
|
285 |
|
|
|
286 |
for(j in seq(clusters)){ |
|
|
287 |
identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j] |
|
|
288 |
} |
|
|
289 |
}else{ |
|
|
290 |
identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list |
|
|
291 |
} |
|
|
292 |
|
|
|
293 |
if(!is.null(seurat.feature)){ |
|
|
294 |
identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list |
|
|
295 |
|
|
|
296 |
ind=order(match(identityVector.samples, colors.group[,1])) |
|
|
297 |
|
|
|
298 |
# order everything: |
|
|
299 |
identityVector.samples=identityVector.samples[ind] |
|
|
300 |
coords=coords[ind,] |
|
|
301 |
lv=lv[ind] |
|
|
302 |
} |
|
|
303 |
|
|
|
304 |
# take the colors: |
|
|
305 |
colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2] |
|
|
306 |
|
|
|
307 |
plot.list=list(plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, lv = lv, namev=name, main = name, add.proportions = add.proportions, add.density = add.density, add.labels = add.labels, SIZE = SIZE, add.legend=T, rasterize=rasterize, width=width, height = height, text.size=text.size)) |
|
|
308 |
|
|
|
309 |
figure <-multipanelfigure:: multi_panel_figure(width = width+50, height = height, rows = 1, columns = 1, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) |
|
|
310 |
|
|
|
311 |
for(i in seq(plot.list)){ |
|
|
312 |
figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) |
|
|
313 |
} |
|
|
314 |
|
|
|
315 |
multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) |
|
|
316 |
} |
|
|
317 |
|
|
|
318 |
|
|
|
319 |
#' |
|
|
320 |
#' |
|
|
321 |
compute.FDR.scRNA=function(i, s.anno, nperm=1000, geneset, nCores=6){ |
|
|
322 |
library(AUCell) |
|
|
323 |
library(Matrix) |
|
|
324 |
library(Seurat) |
|
|
325 |
|
|
|
326 |
load(s.anno[i,2]) |
|
|
327 |
|
|
|
328 |
# take only genes that are in the matrix |
|
|
329 |
geneset=geneset[geneset%in%rownames(scmat)] |
|
|
330 |
|
|
|
331 |
# Random |
|
|
332 |
geneset.random=lapply(seq(nperm), function(i, genes){ |
|
|
333 |
sample(rownames(scmat), length(geneset)) |
|
|
334 |
}) |
|
|
335 |
|
|
|
336 |
names(geneset.random)=paste0("Random",seq(nperm)) |
|
|
337 |
|
|
|
338 |
geneset.observed=list("MDS_signature"=geneset) |
|
|
339 |
|
|
|
340 |
data_n <- data.matrix(GetAssayData(scmat, assay = "RNA")) |
|
|
341 |
|
|
|
342 |
cells_rankings <- AUCell_buildRankings(data_n, nCores=nCores, plotStats=TRUE) |
|
|
343 |
AUC.random <- AUCell_calcAUC(geneset.random, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05) |
|
|
344 |
AUC.observed <- AUCell_calcAUC(geneset.observed, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05) |
|
|
345 |
|
|
|
346 |
# Alternative: assign cells according to the 'automatic' threshold |
|
|
347 |
cells_assignment <- AUCell_exploreThresholds(AUC.observed, plotHist=T, assignCells=TRUE) |
|
|
348 |
|
|
|
349 |
# how many times random higher than observed, divide by the number of permutations == eFDR |
|
|
350 |
y=as.numeric(getAUC(AUC.observed)) |
|
|
351 |
x=getAUC(AUC.random) |
|
|
352 |
|
|
|
353 |
FDR=sapply(seq(y), function(i){ |
|
|
354 |
sum(x[,i]>y[i])/nperm |
|
|
355 |
}) |
|
|
356 |
|
|
|
357 |
empirical.FDR=data.frame(t(FDR), check.names = F) |
|
|
358 |
rownames(empirical.FDR)=paste0(names(geneset.observed), "_eFDR_", nperm, "_", length(geneset)) |
|
|
359 |
colnames(empirical.FDR)=colnames(scmat) |
|
|
360 |
|
|
|
361 |
a=scmat[["SingleR.label"]] |
|
|
362 |
table(a[rownames(a)%in%cells_assignment$MDS_signature$assignment,]) |
|
|
363 |
table(a[empirical.FDR<0.05,]) |
|
|
364 |
|
|
|
365 |
scmat[["empirical.FDR"]]=as.numeric(empirical.FDR) |
|
|
366 |
DimPlot(scmat, group.by = "empirical.FDR") |
|
|
367 |
|
|
|
368 |
scmat[["y"]]=as.numeric(y)>0.04 |
|
|
369 |
DimPlot(scmat, group.by = "y") |
|
|
370 |
|
|
|
371 |
return(list(empirical.FDR, y)) |
|
|
372 |
} |
|
|
373 |
|
|
|
374 |
get.only.legend=function(colorv, group, position="right", direction="vertical"){ |
|
|
375 |
library(ggplot2) |
|
|
376 |
|
|
|
377 |
dat=data.frame(group, colorv) |
|
|
378 |
levels(dat$group)=group |
|
|
379 |
myColors <- colorv |
|
|
380 |
names(myColors) <- levels(group) |
|
|
381 |
colScale <- scale_colour_manual(name = "group",values = myColors) |
|
|
382 |
dat$x=dim(dat)[1] |
|
|
383 |
dat$y=dim(dat)[1] |
|
|
384 |
|
|
|
385 |
legend <- cowplot::get_legend(ggplot(dat, aes(x, y, colour = group)) + theme_bw() + |
|
|
386 |
geom_point(size=0.6) + colScale +theme(legend.position = position, legend.direction = direction, legend.title = element_blank(), |
|
|
387 |
legend.key=element_blank(), legend.box.background=element_blank(), |
|
|
388 |
legend.text=element_text(size = 10, face = "bold", family="Helvetica"))+ |
|
|
389 |
theme(plot.margin=unit(c(1,1,1,1), "mm"))+ |
|
|
390 |
guides(colour = guide_legend(override.aes = list(size=3)))) |
|
|
391 |
return(legend) |
|
|
392 |
} |
|
|
393 |
|
|
|
394 |
|
|
|
395 |
statistical.comparisons.scRNA=function(i, s.anno, genelist.statistical.analysis=NULL, test.use="wilcox"){ |
|
|
396 |
# plot proportions as a barplot next to 2D points |
|
|
397 |
load(s.anno[i,2]) |
|
|
398 |
coords=Embeddings(scmat[["umap"]]) |
|
|
399 |
identityVector=gsub(" ", "", unlist(strsplit(s.anno$Type[i], ", |,"))) |
|
|
400 |
clusters=gsub(" ", "", unlist(strsplit(s.anno$Clusters[i], ", |,"))) |
|
|
401 |
clusters.samples=as.character(Idents(scmat)) |
|
|
402 |
identityVector.samples=clusters.samples |
|
|
403 |
|
|
|
404 |
for(j in seq(clusters)){ |
|
|
405 |
identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j] |
|
|
406 |
} |
|
|
407 |
|
|
|
408 |
name.sample=s.anno$Sample[i] |
|
|
409 |
|
|
|
410 |
if(is.null(genelist.statistical.analysis))genelist.statistical.analysis=rownames(scmat) |
|
|
411 |
|
|
|
412 |
# Find the DE genes per factor and within factor: |
|
|
413 |
statistical.analysis=do.call(rbind, mclapply(unique(identityVector.samples), function(id){ |
|
|
414 |
clusters2test=unique(clusters.samples[identityVector.samples%in%id]) |
|
|
415 |
|
|
|
416 |
# clusters2test vs Rest: |
|
|
417 |
markers=FindMarkers(scmat, ident.1 = clusters2test, group.by = "seurat_clusters", test.use = test.use, logfc.threshold =0.01, only.pos = T, features = genelist.statistical.analysis) |
|
|
418 |
|
|
|
419 |
#DE in clusters within category |
|
|
420 |
cluster.markers=FindAllMarkers(scmat[,clusters.samples%in%clusters2test], test.use = test.use, logfc.threshold = 0.01, only.pos = T, features = genelist.statistical.analysis) |
|
|
421 |
|
|
|
422 |
# make genelists with annotation: |
|
|
423 |
markers$test=paste0(id, " vs. Rest") |
|
|
424 |
markers$gene=rownames(markers) |
|
|
425 |
cluster.markers$test=paste0(id, " ", cluster.markers$cluster, " vs. Rest") |
|
|
426 |
cluster.markers=cluster.markers[,!colnames(cluster.markers)%in%"cluster"] |
|
|
427 |
df.statistics=plyr::rbind.fill(list(markers, cluster.markers)) |
|
|
428 |
df.statistics$test.used=test.use |
|
|
429 |
return(df.statistics) |
|
|
430 |
})) |
|
|
431 |
|
|
|
432 |
return(statistical.analysis) |
|
|
433 |
} |
|
|
434 |
|
|
|
435 |
|
|
|
436 |
#' @param seurat.object Seurat class object or data matrix that can be converted to Seurat object. Can also be a list of data matrices (RNA+citeseq etc.), Must be named by assay type with four uppercase letters (PROT, etc) (also found from seurat object with this name). |
|
|
437 |
#' @param regress.cell.label Character vector with as many columns as seurat.object Contains batch information for each cell. Uses fastMNN/SCTtransform to batch correct the data (usually different samples or experiments). |
|
|
438 |
#' @param check.pcs Plot jackstraw and elbowplot to optimize the number of PCA componens |
|
|
439 |
#' @param plot.umap Umap plot for clusters and for sample ID, if batch corrected also batch clustering |
|
|
440 |
#' @param resolution Resolution parameter for clustering |
|
|
441 |
#' @param nFeature.min Filter cells with < nFeature.min |
|
|
442 |
#' @param nFeature.max Filter cells with > nFeature.max |
|
|
443 |
#' @param percent.mitoDNA Filter cells with mitochondrial genes > percent.mitoDNA |
|
|
444 |
#' @param singleR Annotate each cell using singleR built in annotations. Mainly uses Encode+blueprint annotations. |
|
|
445 |
#' @param cores Number of cores to use for Seurat/singleR/MNNcorrect |
|
|
446 |
#' @param add.gm.score Adds geneset score (using geometric mean) to fm, must be a named list of genes. Geneset name is added to fm and seurat object. |
|
|
447 |
#' @param ... Pass parameters to Seurat tools |
|
|
448 |
sc.data.analysis=function(scmat, name="scRNA_object", nr.pcs=30, regress.cell.label=NULL, batch.correction.method=c("SCTtransform","MNNcorrect"), auto.order=F, normalize.input=T, check.pcs=T, plot.umap=T, resolution=0.5, nFeature.min=0, nFeature.max=10000, percent.mitoDNA=25, singleR=T, singleR.reference=c("blueprint_encode", "HPCA"), cores=6, add.gm.score=NULL, ...){ |
|
|
449 |
|
|
|
450 |
# determine computational resources: |
|
|
451 |
future::plan("multiprocess", workers = cores) |
|
|
452 |
options(future.globals.maxSize= 5e+09) |
|
|
453 |
|
|
|
454 |
# set method for batch correction |
|
|
455 |
batch.correction.method=match.arg(batch.correction.method) |
|
|
456 |
|
|
|
457 |
# list of data matrix, can be citeseq data: |
|
|
458 |
if(class(scmat)=="list"){ |
|
|
459 |
cat("Input is list, splitting to data matrix by type and adding to Seurat", sep="\n\n") |
|
|
460 |
|
|
|
461 |
list.additional=lapply(2:length(scmat), function(i)CreateAssayObject(scmat[[i]])) |
|
|
462 |
names(list.additional)=names(scmat)[2:length(scmat)] |
|
|
463 |
scmat <- CreateSeuratObject(scmat[[1]]) |
|
|
464 |
}else{ |
|
|
465 |
list.additional=NULL |
|
|
466 |
# if class is not seurat object |
|
|
467 |
if(!class(scmat)=="Seurat"){ |
|
|
468 |
cat("Input is data matrix, converting to Seurat object", sep="\n\n") |
|
|
469 |
scmat <- CreateSeuratObject(scmat) |
|
|
470 |
}else{ |
|
|
471 |
cat("Input is Seurat object", sep="\n\n") |
|
|
472 |
} |
|
|
473 |
} |
|
|
474 |
DefaultAssay(object = scmat) <- "RNA" |
|
|
475 |
|
|
|
476 |
# is it a vector of pcs or single value? |
|
|
477 |
if(length(nr.pcs)==1)nr.pcs=seq(nr.pcs) |
|
|
478 |
|
|
|
479 |
# QC |
|
|
480 |
scmat=PercentageFeatureSet(scmat, pattern = "^MT-", col.name = "percent.mt") |
|
|
481 |
r1=VlnPlot(scmat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) |
|
|
482 |
ggplot2::ggsave(r1, filename = paste0(name, "_QC.pdf"), width = 12) |
|
|
483 |
|
|
|
484 |
# Filtering |
|
|
485 |
nFeature_RNA <- FetchData(object = scmat, vars = "nFeature_RNA") |
|
|
486 |
pct.mt <- FetchData(object = scmat, vars = "percent.mt") |
|
|
487 |
filt=which(x = nFeature_RNA > nFeature.min & nFeature_RNA < nFeature.max & pct.mt < percent.mitoDNA) |
|
|
488 |
cat(paste("Dimension before/after filtering:", paste(paste(dim(scmat), collapse="x"), paste(dim(scmat[, filt]), collapse="x"), collapse = "/")), sep="\n\n") |
|
|
489 |
scmat=scmat[, filt] |
|
|
490 |
|
|
|
491 |
if(normalize.input){ |
|
|
492 |
assay="SCT" |
|
|
493 |
scmat=SCTransform(scmat, variable.features.n = 3000) #,...) |
|
|
494 |
|
|
|
495 |
# seurat standard processing |
|
|
496 |
scmat=RunPCA(scmat, npcs = 100) |
|
|
497 |
reduction="pca" |
|
|
498 |
}else{ |
|
|
499 |
assay="RNA" |
|
|
500 |
|
|
|
501 |
# assume that the data is normalized |
|
|
502 |
scmat <- FindVariableFeatures(object = scmat) |
|
|
503 |
scmat <- ScaleData(object = scmat) |
|
|
504 |
scmat=RunPCA(scmat, npcs = 100) |
|
|
505 |
reduction="pca" |
|
|
506 |
} |
|
|
507 |
|
|
|
508 |
# set default assay to use: |
|
|
509 |
DefaultAssay(object = scmat) <- assay |
|
|
510 |
|
|
|
511 |
# use SCTransform to remove batch effect, used in umap and clustering |
|
|
512 |
# else use SCT to normalize data |
|
|
513 |
if(!is.null(regress.cell.label)&batch.correction.method=="SCTtransform"){ |
|
|
514 |
batch.df=data.frame("batch"=regress.cell.label[filt]) |
|
|
515 |
rownames(batch.df)=colnames(scmat) |
|
|
516 |
scmat[["batch"]] = batch.df |
|
|
517 |
scmat=SCTransform(scmat,vars.to.regress="batch", variable.features.n = 3000) |
|
|
518 |
|
|
|
519 |
# seurat standard processing |
|
|
520 |
scmat=RunPCA(scmat, npcs = 100) |
|
|
521 |
|
|
|
522 |
name=paste0(name, "_", batch.correction.method) |
|
|
523 |
reduction="pca" |
|
|
524 |
} |
|
|
525 |
|
|
|
526 |
# optimize pcs number: |
|
|
527 |
if(check.pcs){ |
|
|
528 |
|
|
|
529 |
# setting here original RNA to define PCAs, SCT not working yet, wait for a fix |
|
|
530 |
# also the issue here how to deal with the mnnCorrect? |
|
|
531 |
# DefaultAssay(object = scmat) <- "RNA" |
|
|
532 |
# scmat <- NormalizeData(scmat,display.progress = FALSE) |
|
|
533 |
# scmat <- FindVariableFeatures(scmat,do.plot = F,display.progress = FALSE) |
|
|
534 |
# scmat <- ScaleData(scmat) |
|
|
535 |
# scmat=RunPCA(scmat, npcs = 100) |
|
|
536 |
|
|
|
537 |
# determine how many pcs should be used: |
|
|
538 |
r3=ElbowPlot(scmat, ndims = 100, reduction = "pca") |
|
|
539 |
ggplot2::ggsave(r3, filename = paste0(name, "_elbow.pdf")) |
|
|
540 |
|
|
|
541 |
# find optimal number of PCs to use: |
|
|
542 |
scmat <- JackStraw(scmat, num.replicate = 100, dims = 100, reduction = "pca") |
|
|
543 |
scmat <- ScoreJackStraw(scmat, dims = 1:100, reduction = "pca") |
|
|
544 |
r4=JackStrawPlot(scmat, dims = 1:100) |
|
|
545 |
ggplot2::ggsave(r4, filename = paste0(name, "_Jackstraw.pdf"), width = 12) |
|
|
546 |
|
|
|
547 |
cat("PCs checking done", sep="\n\n") |
|
|
548 |
} |
|
|
549 |
|
|
|
550 |
# use MNN to remove batch effect, used in umap and clustering: |
|
|
551 |
if(!is.null(regress.cell.label)&batch.correction.method=="MNNcorrect"){ |
|
|
552 |
library(BiocParallel) |
|
|
553 |
library(batchelor) |
|
|
554 |
|
|
|
555 |
cat("FastMNN batch correction...", sep="\n\n") |
|
|
556 |
|
|
|
557 |
if(is.factor(regress.cell.label)&!auto.order){ |
|
|
558 |
order=levels(regress.cell.label) |
|
|
559 |
cat("FastMNN batch correction order:", sep="\n\n") |
|
|
560 |
cat(order, sep="-->") |
|
|
561 |
cat("", sep="\n\n") |
|
|
562 |
} |
|
|
563 |
|
|
|
564 |
if(!is.factor(regress.cell.label)&!auto.order){ |
|
|
565 |
order=unique(regress.cell.label) |
|
|
566 |
cat("FastMNN batch correction order:", sep="\n\n") |
|
|
567 |
cat(order, sep="-->") |
|
|
568 |
cat("", sep="\n\n") |
|
|
569 |
} |
|
|
570 |
|
|
|
571 |
batch.df=data.frame("batch"=regress.cell.label[filt]) |
|
|
572 |
rownames(batch.df)=colnames(scmat) |
|
|
573 |
scmat[["batch"]] = batch.df |
|
|
574 |
|
|
|
575 |
# http://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/fast_mnn.html |
|
|
576 |
scmat <- NormalizeData(scmat) |
|
|
577 |
scmat <- FindVariableFeatures(scmat) |
|
|
578 |
|
|
|
579 |
object.list = SplitObject(scmat, split.by = "batch")[order(order)] |
|
|
580 |
|
|
|
581 |
scmat <- SeuratWrappers::RunFastMNN(object.list, reduction.name = "mnnCorrect", auto.order=auto.order, BPPARAM=MulticoreParam(cores)) |
|
|
582 |
|
|
|
583 |
# set defaults |
|
|
584 |
reduction="mnnCorrect" |
|
|
585 |
cat("FastMNN batch correction... done", sep="\n\n") |
|
|
586 |
|
|
|
587 |
} |
|
|
588 |
|
|
|
589 |
# Basic seurat processing with umap: |
|
|
590 |
scmat=RunUMAP(scmat, dims = nr.pcs, reduction = reduction) |
|
|
591 |
scmat=FindNeighbors(scmat, dims = nr.pcs, reduction = reduction) |
|
|
592 |
scmat=FindClusters(scmat, resolution = resolution) |
|
|
593 |
|
|
|
594 |
# add celltype automatically |
|
|
595 |
if(singleR){ |
|
|
596 |
cat("Annotating with singleR", sep="\n\n") |
|
|
597 |
|
|
|
598 |
if(!file.exists(paste0(name, "_singler_object.Rdata"))){ |
|
|
599 |
library("SingleR") |
|
|
600 |
|
|
|
601 |
counts <- GetAssayData(scmat, assay = assay, slot = "counts") |
|
|
602 |
|
|
|
603 |
if(singleR.reference=="blueprint_encode"){ |
|
|
604 |
if(dim(counts)[2]<100000){ |
|
|
605 |
singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(blueprint_encode), project.name=name, fine.tune = T,do.signatures = T, numCores=cores) |
|
|
606 |
}else{ |
|
|
607 |
cat("SingleR in batches", sep="\n\n") |
|
|
608 |
singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat), N=30000, ref.list = list(blueprint_encode), xy = Embeddings(scmat[["umap"]]), project.name=name, fine.tune = T, do.signatures = T, numCores=cores) |
|
|
609 |
} |
|
|
610 |
} |
|
|
611 |
|
|
|
612 |
if(singleR.reference=="HPCA"){ |
|
|
613 |
if(dim(counts)[2]<100000){ |
|
|
614 |
singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores) |
|
|
615 |
}else{ |
|
|
616 |
cat("SingleR in batches", sep="\n\n") |
|
|
617 |
singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat), N=30000, ref.list = list(hpca), project.name=name, xy = Embeddings(scmat[["umap"]]), fine.tune = T,do.signatures = T, numCores=cores) |
|
|
618 |
} |
|
|
619 |
} |
|
|
620 |
|
|
|
621 |
# add original identifiers |
|
|
622 |
singler$meta.data$orig.ident = scmat@meta.data$orig.ident # the original identities, if not supplied in 'annot' |
|
|
623 |
|
|
|
624 |
## if using Seurat v3.0 and over use: |
|
|
625 |
singler$meta.data$xy = scmat@reductions$umap@cell.embeddings # the tSNE coordinates |
|
|
626 |
singler$meta.data$clusters = scmat@active.ident # the Seurat clusters (if 'clusters' not provided) |
|
|
627 |
|
|
|
628 |
# add annot from hpca |
|
|
629 |
# singler2 = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores) |
|
|
630 |
# singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells" |
|
|
631 |
# singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells" |
|
|
632 |
# singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells" |
|
|
633 |
# singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells" |
|
|
634 |
|
|
|
635 |
# these names are not intuitive, replace them, mentioned that these are actually naive http://comphealth.ucsf.edu/SingleR/SupplementaryInformation2.html: |
|
|
636 |
singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD4+ T-cells"]="CD4+ Tn" |
|
|
637 |
singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD8+ T-cells"]="CD8+ Tn" |
|
|
638 |
singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD4+ T-cells"]="CD4+ Tn" |
|
|
639 |
singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD8+ T-cells"]="CD8+ Tn" |
|
|
640 |
|
|
|
641 |
save(singler, file=paste0(name, "_singler_object.Rdata")) |
|
|
642 |
|
|
|
643 |
}else{ |
|
|
644 |
library("SingleR") |
|
|
645 |
load(paste0(name, "_singler_object.Rdata")) |
|
|
646 |
cat(paste0("Loaded existing SingleR object (", paste0(name, "_singler_object.Rdata"), "), remove/rename it if you want to re-compute."), sep="\n\n") |
|
|
647 |
|
|
|
648 |
} |
|
|
649 |
# can be added as cluster id |
|
|
650 |
cluster=singler$singler[[1]]$SingleR.clusters$labels |
|
|
651 |
cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels |
|
|
652 |
|
|
|
653 |
# order based on cell type and add celltype to cluster: |
|
|
654 |
lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes") |
|
|
655 |
lineage=unique(c(lineage,blueprint_encode$types, hpca$types)) |
|
|
656 |
lineage=lineage[lineage%in%singler$singler[[1]]$SingleR.single$labels] |
|
|
657 |
|
|
|
658 |
scmat[["SingleR.label.main"]]=singler$singler[[1]]$SingleR.single.main$labels |
|
|
659 |
scmat[["SingleR.label.cluster"]]=paste(singler$singler[[1]]$SingleR.single$labels, Idents(scmat)) |
|
|
660 |
scmat[["SingleR.label"]]=factor(singler$singler[[1]]$SingleR.single$labels, levels=lineage[lineage%in%unique(singler$singler[[1]]$SingleR.single$labels)]) |
|
|
661 |
|
|
|
662 |
identityVector.samples=as.character(Idents(scmat)) |
|
|
663 |
clusters.samples=Idents(scmat) |
|
|
664 |
|
|
|
665 |
for(j in seq(cluster)){ |
|
|
666 |
identityVector.samples[clusters.samples%in%rownames(cluster)[j]]=paste0(cluster[j], ":", rownames(cluster)[j]) |
|
|
667 |
} |
|
|
668 |
|
|
|
669 |
# order and cluster identity; |
|
|
670 |
cluster=cluster[order(match(cluster[,1],lineage)),,drop=F] |
|
|
671 |
cat(paste(levels(Idents(scmat)), collapse=","), paste(cluster, collapse=","), sep="\t\t") |
|
|
672 |
|
|
|
673 |
scmat[["SingleR.cluster"]]=gsub(":.*.", "", Idents(scmat)) # just the "cluster label" per cell |
|
|
674 |
|
|
|
675 |
# order seurat clusters too: |
|
|
676 |
Idents(scmat)=factor(identityVector.samples, levels=paste0(cluster, ":", rownames(cluster))) |
|
|
677 |
|
|
|
678 |
r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE, pt.size = 0.5, repel = T) + NoLegend() + NoAxes() |
|
|
679 |
ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6) |
|
|
680 |
|
|
|
681 |
cat("\nSingleR done", sep="\n\n") |
|
|
682 |
|
|
|
683 |
|
|
|
684 |
# new singleR: |
|
|
685 |
# counts <- GetAssayData(scmat, assay = "RNA", slot = "counts") |
|
|
686 |
# |
|
|
687 |
# library(SingleR) |
|
|
688 |
# library(scater) |
|
|
689 |
# |
|
|
690 |
# ref=NovershternHematopoieticData() |
|
|
691 |
# |
|
|
692 |
# common <- intersect(rownames(counts), rownames(ref)) |
|
|
693 |
# ref <- ref[common,] |
|
|
694 |
# counts <- counts[common,] |
|
|
695 |
# counts.log <- logNormCounts(counts) |
|
|
696 |
# |
|
|
697 |
# singler <- SingleR(test = counts.log, ref = ref, labels = ref$label.main, numCores=cores) |
|
|
698 |
# |
|
|
699 |
# # order based on cell type and add celltype to cluster: |
|
|
700 |
# lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes") |
|
|
701 |
# lineage=unique(c(lineage,ref$label.main)) |
|
|
702 |
# |
|
|
703 |
# scmat[["SingleR.label.main"]]=singler$first.labels |
|
|
704 |
# scmat[["SingleR.label"]]=factor(singler$labels, levels=lineage[lineage%in%unique(singler$labels)]) |
|
|
705 |
# |
|
|
706 |
# r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE, pt.size = 0.5, repel = T) + NoLegend() + NoAxes() |
|
|
707 |
# ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6) |
|
|
708 |
# |
|
|
709 |
# cat("\nSingleR done", sep="\n\n") |
|
|
710 |
|
|
|
711 |
} |
|
|
712 |
|
|
|
713 |
# plot clusters and samples |
|
|
714 |
if(plot.umap){ |
|
|
715 |
if(!is.null(regress.cell.label)){ |
|
|
716 |
r6=DimPlot(scmat, group.by = c("batch"), ncol = 2, label = T, repel = T) + NoLegend() + NoAxes() |
|
|
717 |
ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_batch.pdf"), width = 6, height = 6) |
|
|
718 |
r6=DimPlot(scmat, group.by = c("ident"), ncol = 2, label = T, repel = T) + NoLegend() + NoAxes() |
|
|
719 |
ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6) |
|
|
720 |
}else{ |
|
|
721 |
r6=DimPlot(scmat, group.by = c("ident"), ncol = 1, label = T, repel = T) + NoLegend() + NoAxes() |
|
|
722 |
ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6) |
|
|
723 |
} |
|
|
724 |
} |
|
|
725 |
|
|
|
726 |
# add immunoscores to FM format data matrix |
|
|
727 |
fm.f <- GetAssayData(scmat) |
|
|
728 |
|
|
|
729 |
# add gm based score: |
|
|
730 |
add.scores=list(HLAIScore=c("B2M", "HLA-A", "HLA-B","HLA-C"), HLAIIScore=c("HLA-DMA","HLA-DMB","HLA-DPA1","HLA-DPB1","HLA-DRA","HLA-DRB1"), CytolyticScore=c("GZMA", "PRF1", "GNLY", "GZMH", "GZMM")) |
|
|
731 |
if(!is.null(add.gm.score))add.scores=append(add.scores, add.gm.score) |
|
|
732 |
|
|
|
733 |
gm.objects=do.call(rbind, lapply(seq(add.scores), function(i){ |
|
|
734 |
dat3=fm.f[rownames(fm.f)%in%add.scores[[i]],] |
|
|
735 |
gm=t(apply(dat3, 2, gm_mean)) # done to normalized values |
|
|
736 |
rownames(gm)=names(add.scores)[i] |
|
|
737 |
return(gm) |
|
|
738 |
})) |
|
|
739 |
|
|
|
740 |
# also add to seurat object: |
|
|
741 |
for(i in seq(add.scores)){ |
|
|
742 |
scmat[[names(add.scores)[i]]] <- gm.objects[i,] |
|
|
743 |
} |
|
|
744 |
|
|
|
745 |
# gene expression and scores to fm: |
|
|
746 |
rownames(fm.f)=paste0("N:GEXP:", rownames(fm.f)) |
|
|
747 |
rownames(gm.objects)=paste0("N:SAMP:", rownames(gm.objects)) |
|
|
748 |
fm=rbind(gm.objects, fm.f) |
|
|
749 |
|
|
|
750 |
# DE analysis: |
|
|
751 |
markers.all=FindAllMarkers(scmat, only.pos = T)#, ...) |
|
|
752 |
|
|
|
753 |
# go through each data matrix, add to seurat and fm if more matrices |
|
|
754 |
if(!is.null(list.additional)){ |
|
|
755 |
|
|
|
756 |
for(i in seq(list.additional)){ |
|
|
757 |
mat.add=list.additional[[i]] |
|
|
758 |
mat.add=subset(mat.add,cells = colnames(scmat)) |
|
|
759 |
scmat[[names(list.additional)[i]]] <- mat.add |
|
|
760 |
scmat <- NormalizeData(scmat, assay = names(list.additional)[i], normalization.method = "CLR") # test scttransform too |
|
|
761 |
scmat <- ScaleData(scmat, assay = names(list.additional)[i]) |
|
|
762 |
|
|
|
763 |
add.data.normalized <- data.matrix(GetAssayData(scmat, assay = names(list.additional)[i], slot = "data")) |
|
|
764 |
rownames(add.data.normalized)=paste0("N:",names(list.additional)[i], ":", rownames(add.data.normalized)) |
|
|
765 |
fm=rbind(fm, add.data.normalized) |
|
|
766 |
} |
|
|
767 |
} |
|
|
768 |
|
|
|
769 |
save(list=c("fm", "scmat", "markers.all"), file=paste0(name, "_scRNA.Rdata")) |
|
|
770 |
|
|
|
771 |
return(paste0(name, "_scRNA.Rdata")) |
|
|
772 |
} |
|
|
773 |
|
|
|
774 |
# make sure data is on a linear non-log scale |
|
|
775 |
# check 0 values, if a lot between 0-1 better to use add.one. If counts, remove works too quite well |
|
|
776 |
# zero fix methods: https://arxiv.org/pdf/1806.06403.pdf |
|
|
777 |
gm_mean=function(x, na.rm=TRUE, zero.handle=c("remove", "add.one", "zero.propagate")){ |
|
|
778 |
zero.handle=match.arg(zero.handle) |
|
|
779 |
if(any(x < 0, na.rm = TRUE)){ |
|
|
780 |
warning("Negative values produced NaN - is the data on linear - non-log scale?") |
|
|
781 |
return(NaN) |
|
|
782 |
} |
|
|
783 |
|
|
|
784 |
if(zero.handle=="remove"){ |
|
|
785 |
return(exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))) |
|
|
786 |
} |
|
|
787 |
|
|
|
788 |
if(zero.handle=="add.one"){ |
|
|
789 |
return(exp(mean(log(x+1), na.rm = na.rm))-1) |
|
|
790 |
} |
|
|
791 |
|
|
|
792 |
if(zero.handle=="zero.propagate"){ |
|
|
793 |
if(any(x == 0, na.rm = TRUE)){ |
|
|
794 |
return(0) |
|
|
795 |
} |
|
|
796 |
return(exp(mean(log(x), na.rm = na.rm))) |
|
|
797 |
} |
|
|
798 |
|
|
|
799 |
} |
|
|
800 |
|
|
|
801 |
get.cluster.label.singleR=function(singler){ |
|
|
802 |
|
|
|
803 |
# how to add this? |
|
|
804 |
cluster=singler$singler[[1]]$SingleR.clusters$labels |
|
|
805 |
cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels |
|
|
806 |
|
|
|
807 |
# order based on cell type: |
|
|
808 |
# cat(paste0("'", colors.group[,1], "'"), sep=",") |
|
|
809 |
lineage=c('HSC','MPP','CLP','CMP','GMP','MEP','Monocytes','DC','Macrophages','Macrophages M1','Macrophages M2','CD4+ T-cells','CD8+ T-cells','Tregs','T_cell:gamma-delta','T_cell:CD4+_Naive','T_cell:CD8+_naive','T_cell:Treg:Naive','CD4+ Tcm','CD4+ Tem','CD8+ Tcm','CD8+ Tem','NK cells','naive B-cells','Memory B-cells','B-cells','Class-switched memory B-cells','Plasma cells','Endothelial cells','Neutrophils','Eosinophils','Fibroblasts','Smooth muscle','Erythroblast','Erythrocytes','Megakaryocytes') |
|
|
810 |
|
|
|
811 |
cluster=cluster[order(match(cluster[,1],lineage)),,drop=F] |
|
|
812 |
|
|
|
813 |
# order and cluster identity; |
|
|
814 |
cat(paste(rownames(cluster), collapse=","), paste(cluster, collapse=","), sep="\t") |
|
|
815 |
cat("", sep="\n") |
|
|
816 |
return(cluster) |
|
|
817 |
} |
|
|
818 |
|
|
|
819 |
# run cellphoneDB |
|
|
820 |
run.cellphonedb=function(scmat, celltype, out=getwd(), cores=10, threshold=0.05){ |
|
|
821 |
|
|
|
822 |
if(!dir.exists(out))dir.create(out, recursive = T) |
|
|
823 |
|
|
|
824 |
if(is.null(celltype)){ |
|
|
825 |
celltype=gsub(" ", "_", as.character(Idents(scmat))) |
|
|
826 |
} |
|
|
827 |
|
|
|
828 |
setwd(out) |
|
|
829 |
# input1: |
|
|
830 |
df=data.frame("Cell"=gsub("\\.", "_", colnames(scmat)), "cell_type"=celltype, stringsAsFactors = F) |
|
|
831 |
df$cell_type[scmat[["SingleR.label"]]=="Tregs"]="Tregs" |
|
|
832 |
|
|
|
833 |
meta=file.path(out, "meta.tsv") |
|
|
834 |
write.table(df, meta, row.names = F, col.names = T, quote = F, sep="\t") |
|
|
835 |
|
|
|
836 |
# input2: |
|
|
837 |
counts <- data.matrix(GetAssayData(scmat, assay = "SCT", slot = "data")) |
|
|
838 |
countsfile=tempfile() |
|
|
839 |
|
|
|
840 |
# gene symbol was not working... |
|
|
841 |
library("org.Hs.eg.db") # remember to install it if you don't have it already |
|
|
842 |
ensambleid <- mapIds(org.Hs.eg.db, keys = rownames(counts), keytype = "SYMBOL", column="ENSEMBL") |
|
|
843 |
rownames(counts)=ensambleid |
|
|
844 |
|
|
|
845 |
counts=counts[!is.na(rownames(counts)),] |
|
|
846 |
|
|
|
847 |
countsfile=file.path(out, "counts.tsv") |
|
|
848 |
|
|
|
849 |
write.table(t(c("Gene", gsub("\\.", "_", colnames(counts)))), countsfile, row.names = F, col.names = F, quote = F, sep="\t") |
|
|
850 |
write.table(counts, countsfile, row.names = T, col.names = F, quote = F, sep="\t", append=T) |
|
|
851 |
|
|
|
852 |
system(paste0("cellphonedb method statistical_analysis ", meta, " ", countsfile, " --iterations=1000 --threads=", cores, " --threshold=", threshold)) |
|
|
853 |
|
|
|
854 |
# make heatmap of interactions: |
|
|
855 |
system(paste0("cellphonedb plot heatmap_plot ", meta)) |
|
|
856 |
|
|
|
857 |
deconvoluted=data.table::fread(file.path(out, "out/deconvoluted.txt"), data.table = F) |
|
|
858 |
pvalues=data.table::fread(file.path(out, "out/pvalues.txt"), data.table = F) |
|
|
859 |
significant_means.txt=data.table::fread(file.path(out, "out/significant_means.txt"), data.table = F) |
|
|
860 |
means=data.table::fread(file.path(out, "out/means.txt"), data.table = F) |
|
|
861 |
|
|
|
862 |
unlink(countsfile) |
|
|
863 |
unlink(meta) |
|
|
864 |
|
|
|
865 |
return(list("deconvoluted"=deconvoluted, "pvalues"=pvalues, "significant_means"=significant_means.txt, "means"=means)) |
|
|
866 |
} |
|
|
867 |
|
|
|
868 |
# found bug in reference list, was missing, also min genes was 200 not 0 causing errors... |
|
|
869 |
CreateBigSingleRObject.custom=function (counts, annot = NULL, project.name, xy, clusters, N = 10000, |
|
|
870 |
min.genes = 0, technology = "10X", species = "Human", citation = "", |
|
|
871 |
ref.list = list(), normalize.gene.length = F, variable.genes = "de", |
|
|
872 |
fine.tune = T, reduce.file.size = T, do.signatures = F, do.main.types = T, |
|
|
873 |
temp.dir = getwd(), numCores = SingleR.numCores){ |
|
|
874 |
n = ncol(counts) |
|
|
875 |
s = seq(1, n, by = N) |
|
|
876 |
dir.create(paste0(temp.dir, "/singler.temp/"), showWarnings = FALSE) |
|
|
877 |
for (i in s) { |
|
|
878 |
print(i) |
|
|
879 |
A = seq(i, min(i + N - 1, n)) |
|
|
880 |
|
|
|
881 |
singler = CreateSinglerObject(counts[, A], annot = annot[A], ref.list = ref.list, |
|
|
882 |
project.name = project.name, min.genes = min.genes, |
|
|
883 |
technology = technology, species = species, citation = citation, |
|
|
884 |
do.signatures = do.signatures, clusters = NULL, numCores = numCores) |
|
|
885 |
|
|
|
886 |
|
|
|
887 |
save(singler, file = paste0(temp.dir, "/singler.temp/", |
|
|
888 |
project.name, ".", i, ".RData")) |
|
|
889 |
} |
|
|
890 |
singler.objects.file <- list.files(paste0(temp.dir, "/singler.temp/"), |
|
|
891 |
pattern = "RData", full.names = T) |
|
|
892 |
singler.objects = list() |
|
|
893 |
for (i in 1:length(singler.objects.file)) { |
|
|
894 |
load(singler.objects.file[[i]]) |
|
|
895 |
singler.objects[[i]] = singler |
|
|
896 |
} |
|
|
897 |
singler = SingleR.Combine.custom(singler.objects, order = colnames(counts),expr = counts, clusters = clusters, xy = xy) |
|
|
898 |
singler |
|
|
899 |
} |
|
|
900 |
|
|
|
901 |
# bug also in this... |
|
|
902 |
SingleR.Combine.custom=function (singler.list, order = NULL, clusters = NULL, expr = NULL, xy = NULL) |
|
|
903 |
{ |
|
|
904 |
singler = c() |
|
|
905 |
singler$singler = singler.list[[1]]$singler |
|
|
906 |
for (j in 1:length(singler.list[[1]]$singler)) { |
|
|
907 |
singler$singler[[j]]$SingleR.cluster = c() |
|
|
908 |
singler$singler[[j]]$SingleR.cluster.main = c() |
|
|
909 |
singler$singler[[j]]$SingleR.single$clusters = c() |
|
|
910 |
} |
|
|
911 |
singler$meta.data = singler.list[[1]]$meta.data |
|
|
912 |
singler$meta.data$clusters = c() |
|
|
913 |
singler$meta.data$xy = c() |
|
|
914 |
singler$meta.data$data.sets = rep(singler$meta.data$project.name, |
|
|
915 |
length(singler$meta.data$orig.ident)) |
|
|
916 |
for (i in 2:length(singler.list)) { |
|
|
917 |
for (j in 1:length(singler$singler)) { |
|
|
918 |
if (singler.list[[i]]$singler[[j]]$about$RefData != |
|
|
919 |
singler.list[[1]]$singler[[j]]$about$RefData) { |
|
|
920 |
stop("The objects are not ordered by the same reference data.") |
|
|
921 |
} |
|
|
922 |
singler$singler[[j]]$about$Organism = c(singler$singler[[j]]$about$Organism, |
|
|
923 |
singler.list[[i]]$singler[[j]]$about$Organism) |
|
|
924 |
singler$singler[[j]]$about$Citation = c(singler$singler[[j]]$about$Citation, |
|
|
925 |
singler.list[[i]]$singler[[j]]$about$Citation) |
|
|
926 |
singler$singler[[j]]$about$Technology = c(singler$singler[[j]]$about$Technology, |
|
|
927 |
singler.list[[i]]$singler[[j]]$about$Technology) |
|
|
928 |
singler$singler[[j]]$SingleR.single$labels = rbind(singler$singler[[j]]$SingleR.single$labels, |
|
|
929 |
singler.list[[i]]$singler[[j]]$SingleR.single$labels) |
|
|
930 |
if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) { |
|
|
931 |
singler$singler[[j]]$SingleR.single$labels1 = rbind(singler$singler[[j]]$SingleR.single$labels1, |
|
|
932 |
singler.list[[i]]$singler[[j]]$SingleR.single$labels1) |
|
|
933 |
} |
|
|
934 |
singler$singler[[j]]$SingleR.single$scores = rbind(singler$singler[[j]]$SingleR.single$scores, |
|
|
935 |
singler.list[[i]]$singler[[j]]$SingleR.single$scores) |
|
|
936 |
singler$singler[[j]]$SingleR.single.main$labels = rbind(singler$singler[[j]]$SingleR.single.main$labels, |
|
|
937 |
singler.list[[i]]$singler[[j]]$SingleR.single.main$labels) |
|
|
938 |
if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) { |
|
|
939 |
singler$singler[[j]]$SingleR.single.main$labels1 = rbind(singler$singler[[j]]$SingleR.single.main$labels1, |
|
|
940 |
singler.list[[i]]$singler[[j]]$SingleR.single.main$labels1) |
|
|
941 |
} |
|
|
942 |
singler$singler[[j]]$SingleR.single.main$scores = rbind(singler$singler[[j]]$SingleR.single.main$scores, |
|
|
943 |
singler.list[[i]]$singler[[j]]$SingleR.single.main$scores) |
|
|
944 |
singler$singler[[j]]$SingleR.single$cell.names = c(singler$singler[[j]]$SingleR.single$cell.names, |
|
|
945 |
singler.list[[i]]$singler[[j]]$SingleR.single$cell.names) |
|
|
946 |
singler$singler[[j]]$SingleR.single.main$cell.names = c(singler$singler[[j]]$SingleR.single.main$cell.names, |
|
|
947 |
singler.list[[i]]$singler[[j]]$SingleR.single.main$cell.names) |
|
|
948 |
if (!is.null(singler$singler[[j]]$SingleR.single.main$pval)) { |
|
|
949 |
singler$singler[[j]]$SingleR.single.main$pval = c(singler$singler[[j]]$SingleR.single.main$pval, |
|
|
950 |
singler.list[[i]]$singler[[j]]$SingleR.single.main$pval) |
|
|
951 |
} |
|
|
952 |
if (!is.null(singler$singler[[j]]$SingleR.single$pval)) { |
|
|
953 |
singler$singler[[j]]$SingleR.single$pval = c(singler$singler[[j]]$SingleR.single$pval, |
|
|
954 |
singler.list[[i]]$singler[[j]]$SingleR.single$pval) |
|
|
955 |
} |
|
|
956 |
} |
|
|
957 |
singler$meta.data$project.name = paste(singler$meta.data$project.name, |
|
|
958 |
singler.list[[i]]$meta.data$project.name, sep = "+") |
|
|
959 |
singler$meta.data$orig.ident = c(singler$meta.data$orig.ident, |
|
|
960 |
singler.list[[i]]$meta.data$orig.ident) |
|
|
961 |
singler$meta.data$data.sets = c(singler$meta.data$data.sets, |
|
|
962 |
rep(singler.list[[i]]$meta.data$project.name, length(singler.list[[i]]$meta.data$orig.ident))) |
|
|
963 |
} |
|
|
964 |
for (j in 1:length(singler$singler)) { |
|
|
965 |
if (!is.null(order)) { |
|
|
966 |
singler$singler[[j]]$SingleR.single$labels = singler$singler[[j]]$SingleR.single$labels[order, |
|
|
967 |
] |
|
|
968 |
if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) { |
|
|
969 |
singler$singler[[j]]$SingleR.single$labels1 = singler$singler[[j]]$SingleR.single$labels1[order, |
|
|
970 |
] |
|
|
971 |
} |
|
|
972 |
singler$singler[[j]]$SingleR.single$scores = singler$singler[[j]]$SingleR.single$scores[order, |
|
|
973 |
] |
|
|
974 |
singler$singler[[j]]$SingleR.single$cell.names = singler$singler[[j]]$SingleR.single$cell.names[order] |
|
|
975 |
singler$singler[[j]]$SingleR.single.main$labels = singler$singler[[j]]$SingleR.single.main$labels[order, |
|
|
976 |
] |
|
|
977 |
if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) { |
|
|
978 |
singler$singler[[j]]$SingleR.single.main$labels1 = singler$singler[[j]]$SingleR.single.main$labels1[order, |
|
|
979 |
] |
|
|
980 |
} |
|
|
981 |
singler$singler[[j]]$SingleR.single.main$scores = singler$singler[[j]]$SingleR.single.main$scores[order, |
|
|
982 |
] |
|
|
983 |
singler$singler[[j]]$SingleR.single.main$cell.names = singler$singler[[j]]$SingleR.single.main$cell.names[order] |
|
|
984 |
if (!is.null(singler$singler[[j]]$SingleR.single$pval)) { |
|
|
985 |
singler$singler[[j]]$SingleR.single$pval = singler$singler[[j]]$SingleR.single$pval[order] |
|
|
986 |
singler$singler[[j]]$SingleR.single.main$pval = singler$singler[[j]]$SingleR.single.main$pval[order] |
|
|
987 |
} |
|
|
988 |
} |
|
|
989 |
} |
|
|
990 |
if (!is.null(clusters) && !is.null(expr)) { |
|
|
991 |
for (j in 1:length(singler$singler)) { |
|
|
992 |
if (is.character(singler$singler[[j]]$about$RefData)) { |
|
|
993 |
ref = get(tolower(singler$singler[[j]]$about$RefData)) |
|
|
994 |
} |
|
|
995 |
else { |
|
|
996 |
ref = singler$singler[[j]]$about$RefData |
|
|
997 |
} |
|
|
998 |
singler$singler[[j]]$SingleR.clusters = SingleR("cluster", |
|
|
999 |
expr, ref$data, types = ref$types, clusters = factor(clusters), |
|
|
1000 |
sd.thres = ref$sd.thres, genes = "de", fine.tune = T) |
|
|
1001 |
singler$singler[[j]]$SingleR.clusters.main = SingleR("cluster", |
|
|
1002 |
expr, ref$data, types = ref$main_types, clusters = factor(clusters), |
|
|
1003 |
sd.thres = ref$sd.thres, genes = "de", fine.tune = T) |
|
|
1004 |
} |
|
|
1005 |
singler$meta.data$clusters = clusters |
|
|
1006 |
if (!is.null(xy)) { |
|
|
1007 |
singler$meta.data$xy = xy |
|
|
1008 |
} |
|
|
1009 |
} |
|
|
1010 |
singler |
|
|
1011 |
} |