a b/common_scripts/scRNA/functions.scRNA.analysis.R
1
#'
2
#'
3
plot.scRNA.features.complexHeatmap=function(s.anno, features, name,width=300, cores){
4
  log=mclapply(seq(dim(s.anno)[1]), plot.scRNA.features, s.anno,features, name, mc.cores = cores, width=width)
5
}
6
7
plot.scRNA.features=function(i, s.anno, features, name, add.annotation=NULL, width=300){
8
  # eccite PBMC dataset:
9
  load(s.anno[i,2])
10
  
11
  #HSC, myeloid, B-solu, T-solu, erythroi
12
  # STEP1 find the cluster order, and set them based on lineage:
13
  clusters=Idents(scmat)
14
  if(s.anno[i,3]!=""){
15
    order.clu=as.numeric(unlist(strsplit(s.anno[i,3], ",|, | ,"))) # MODIFY
16
    
17
    ord.names=names(clusters[order(match(clusters,order.clu))])
18
    
19
  }else{
20
    
21
    order.clu=levels(clusters)
22
    
23
    # order samples within cluster by umap component with more spread
24
    # this way there is still information within cluster, for heatmaps
25
    coords=Embeddings(scmat[["umap"]])
26
    fit.cluster=levels(Idents(scmat))
27
    
28
    ord.names=unlist(lapply(fit.cluster, function(clu){
29
      d=coords[Idents(scmat)%in%clu,]
30
      ind=which.max(c(max(d[,1])-min(d[,1]), max(d[,2])-min(d[,2]))) # take the axis with most spread
31
      names(sort(d[,ind]))
32
    }))
33
    
34
  }
35
  
36
  
37
  
38
  features=data.frame(features, stringsAsFactors = F)
39
  
40
  if(!grepl("^B:|^N:", features[1,1])){
41
    features[,1]=paste0("N:GEXP:", features[,1])
42
    allfeats=rownames(fm)[!rowSums(fm>0)==0]
43
    
44
  }else{
45
    allfeats=gsub("N:GEXP:|N:RPPA:", "", rownames(fm)[!rowSums(fm>0)==0])
46
  }
47
  
48
  # text annot created if more columns than 1.
49
  if(dim(features)[2]>1){
50
    features=features[features[,1]%in%allfeats,]
51
    feats=features[,1]
52
    t.annot=apply(features[,2:dim(features)[2], drop=F], 1, paste, collapse=" ")
53
    names(t.annot)=feats
54
  }else{
55
    t.annot=NULL
56
    feats=features[features[,1]%in%allfeats,1]
57
  }
58
  
59
  # make a data frame for annotations on top of the heatmap
60
  annotdf=data.frame("cluster"=clusters,"HLAIScore"=as.numeric(fm["N:SAMP:HLAIScore",]), "HLAIIScore"=as.numeric(fm["N:SAMP:HLAIIScore",]), "CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",]))
61
  
62
  if(!is.null(add.annotation))annotdf$annotation=add.annotation
63
  
64
  name=gsub("__", "_", gsub("[[:punct:]]", "_", paste(name, s.anno[i,1], sep="_")))
65
  
66
  # plot using a complexheatmap package using a featurematrix format and custom plotting function
67
  r1=plot.complexHM.fm(feats = feats, text_annot = t.annot, fm.f = fm, annotdf = annotdf, feats.barplot = c("HLAIScore", "HLAIIScore", "CytolyticScore"), order_columns=ord.names, order_rows=F, split.columns = T, plotting.param="/research/groups/sysgen/PROJECTS/students/HEMAP_IMMUNOLOGY/minna_work/scRNA_genelist_plotting/plotting_param_fm.txt", NAME=name, WIDTH = width)
68
}
69
70
plot.gene.seurat=function(s.anno, feature, name,cols=c("grey75", "red"), max=1, min=0, cores=1){
71
  plot.feature=function(i, feature){
72
    # plot proportions as a barplot next to 2D points
73
    load(s.anno[i,2])
74
    
75
    if(!feature%in%rownames(scmat))return(NULL)
76
    
77
    p <- FeaturePlot(scmat, features = feature, cols = cols, max.cutoff = max, min.cutoff = min, order = T, combine = FALSE)
78
    p=lapply(p, `+`, labs(title = s.anno[i,1]))
79
    
80
    if(i>1){
81
      for(i in 1:length(p)) {
82
        p[[i]] <- p[[i]] + NoLegend() + NoAxes()
83
      }
84
    }else{
85
      p[[1]] <- p[[1]] + NoAxes()
86
    }
87
    
88
    cowplot::plot_grid(plotlist = p)
89
  }
90
  
91
  # plot here lineage proportion per patient
92
  plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores)
93
  plot.list=plot.list[!sapply(plot.list, is.null)]
94
  
95
  if(!length(plot.list))return(NULL)
96
  
97
  # 74mm * 99mm per panelwidth = 77, height = 74
98
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
99
  
100
  for(i in seq(plot.list)){
101
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
102
  }
103
  
104
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
105
}
106
107
plot.fm.feature=function(s.anno, feature, name, max=3, min=0, cores=1){
108
  plot.feature=function(i, feature){
109
    # plot proportions as a barplot next to 2D points
110
    load(s.anno[i,2])
111
    
112
    scmat[[feature]]=fm[rownames(fm)%in%feature,]
113
    
114
    p <- FeaturePlot(scmat, features = feature, cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE)
115
    p=lapply(p, `+`, labs(title = s.anno[i,1]))
116
    
117
    if(i>1){
118
      for(i in 1:length(p)) {
119
        p[[i]] <- p[[i]] + NoLegend() + NoAxes()
120
      }
121
    }else{
122
      p[[1]] <- p[[1]] + NoAxes()
123
    }
124
    
125
    return(p)
126
  }
127
  
128
  # plot here lineage proportion per patient
129
  plot.list=unlist(mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores), recursive = F)
130
  
131
  # 74mm * 99mm per panelwidth = 77, height = 74
132
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
133
  
134
  for(i in seq(plot.list)){
135
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
136
  }
137
  
138
  # 74mm * 99mm per panelwidth = 77, height = 74
139
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
140
  
141
  for(i in seq(plot.list)){
142
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
143
  }
144
  
145
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
146
}
147
148
plot.several.fm.feature=function(s.anno.object, features, name, min=0, max=5, cores=5){
149
  load(s.anno.object)
150
  
151
  features=features[features%in%rownames(fm)]
152
  
153
  plot.feature=function(i, features){
154
    # plot proportions as a barplot next to 2D points
155
    
156
    scmat[[features[i]]]=fm[rownames(fm)%in%features[i],]
157
    
158
    p <- FeaturePlot(scmat, features = features[i], cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE)
159
    p=lapply(p, `+`, labs(title = features[i]))
160
    
161
    if(i>1){
162
      for(i in 1:length(p)) {
163
        p[[i]] <- p[[i]] + NoLegend() + NoAxes()
164
      }
165
    }else{
166
      p[[1]] <- p[[1]] + NoAxes()
167
    }
168
    return(p)
169
  }
170
  
171
  # plot here lineage proportion per patient
172
  plot.list=unlist(mclapply(seq(features), plot.feature, features, mc.cores=cores), recursive = F)
173
  
174
  # 74mm * 99mm per panelwidth = 77, height = 74
175
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
176
  
177
  for(i in seq(plot.list)){
178
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
179
  }
180
  
181
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
182
}
183
184
plot.seurat.ident=function(s.anno, colorv, name, cores=1){
185
  plot.feature=function(i){
186
    # plot proportions as a barplot next to 2D points
187
    load(s.anno[i,2])
188
    
189
    cells=gsub(" \\d+$", "", levels(Idents(scmat)))   
190
    
191
    p=DimPlot(scmat, group.by = c("ident"), cols = colorv[match(cells, colorv[,1]),2], ncol = 1, label = T, repel = T) + NoLegend() + NoAxes()
192
  }
193
  
194
  # plot here lineage proportion per patient
195
  plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, mc.cores=cores)
196
  
197
  # 74mm * 99mm per panelwidth = 77, height = 74
198
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
199
  
200
  for(i in seq(plot.list)){
201
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
202
  }
203
  
204
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
205
}
206
207
208
#'
209
plot.multi.scatter.scRNA=function(s.anno, scmat=NULL, colors.group, identityVector.samples.list=NULL, seurat.feature=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height = 74, text.size=10){
210
  # tools:
211
  library(Matrix)
212
  library(Seurat)
213
  source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R")
214
  library(data.table)
215
  library(ComplexHeatmap)
216
  library(circlize)
217
  library(parallel)
218
  library(ggplot2)
219
  
220
  
221
  # plot here lineage proportion per patient
222
  plot.list=mclapply(seq(dim(s.anno)[1]), function(i){
223
    # plot proportions as a barplot next to 2D points
224
    load(s.anno[i,2])
225
    coords=Embeddings(scmat[["umap"]])
226
    identityVector=unlist(strsplit(s.anno$Type[i], ", |,"))
227
    clusters=unlist(strsplit(s.anno$Clusters[i], ", |,"))
228
    clusters.samples=as.character(Idents(scmat))
229
    
230
    if(is.null(identityVector.samples.list)){
231
      identityVector.samples=clusters.samples
232
      
233
      for(j in seq(clusters)){
234
        identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j]
235
      }
236
    }else{
237
      identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list
238
    }
239
    
240
    if(!is.null(seurat.feature)){
241
      identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list
242
    }
243
    
244
    name.sample=s.anno$Sample[i]
245
    
246
    # take the colors:
247
    colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2]
248
    
249
    a=plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, namev=name, main = name, add.proportions = T, SIZE = SIZE, add.legend=F, rasterize=rasterize, width=width, height = height, text.size=text.size)
250
    return(a)
251
  }, mc.cores=cores)
252
  
253
  # add legend last
254
  add.legend=get.only.legend(group = colors.group[,1], colorv = colors.group[,2])
255
  
256
  nrows=ceiling(length(plot.list)/2)+1
257
  # 74mm * 99mm per panelwidth = 77, height = 74
258
  figure <-multipanelfigure:: multi_panel_figure(width = 2*width+width*0.2, height = ceiling(length(plot.list)/2)*height+120, rows = nrows, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
259
  
260
  rowi=1
261
  for(i in seq(plot.list)){
262
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
263
  }
264
  
265
  figure <- multipanelfigure::fill_panel(figure,row = nrows,column = 1:2,  add.legend)
266
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
267
}
268
269
plot.scatter.seurat=function(scmat, colors.group,clusters="", identityVector="",  identityVector.samples.list=NULL, seurat.feature=NULL, lv=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height=74, text.size=10, add.proportions=T, add.density=F, add.labels=F){
270
  # tools:
271
  library(Matrix)
272
  library(Seurat)
273
  source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R")
274
  library(data.table)
275
  library(ComplexHeatmap)
276
  library(circlize)
277
  library(parallel)
278
  library(ggplot2)
279
  
280
  coords=Embeddings(scmat[["umap"]])
281
  clusters.samples=as.character(Idents(scmat))
282
  
283
  if(is.null(identityVector.samples.list)){
284
    identityVector.samples=clusters.samples
285
    
286
    for(j in seq(clusters)){
287
      identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j]
288
    }
289
  }else{
290
    identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list
291
  }
292
  
293
  if(!is.null(seurat.feature)){
294
    identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list
295
    
296
    ind=order(match(identityVector.samples, colors.group[,1]))
297
    
298
    # order everything:
299
    identityVector.samples=identityVector.samples[ind]
300
    coords=coords[ind,]
301
    lv=lv[ind]
302
  }
303
  
304
  # take the colors:
305
  colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2]
306
  
307
  plot.list=list(plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, lv = lv, namev=name, main = name, add.proportions = add.proportions, add.density = add.density, add.labels = add.labels, SIZE = SIZE, add.legend=T, rasterize=rasterize, width=width, height = height, text.size=text.size))
308
309
  figure <-multipanelfigure:: multi_panel_figure(width = width+50, height = height, rows = 1, columns = 1, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
310
  
311
  for(i in seq(plot.list)){
312
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
313
  }
314
  
315
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
316
}
317
318
319
#'
320
#'
321
compute.FDR.scRNA=function(i, s.anno, nperm=1000, geneset, nCores=6){
322
  library(AUCell)
323
  library(Matrix)
324
  library(Seurat)
325
  
326
  load(s.anno[i,2])
327
  
328
  # take only genes that are in the matrix
329
  geneset=geneset[geneset%in%rownames(scmat)]
330
  
331
  # Random
332
  geneset.random=lapply(seq(nperm), function(i, genes){
333
    sample(rownames(scmat), length(geneset))
334
  })
335
  
336
  names(geneset.random)=paste0("Random",seq(nperm))
337
  
338
  geneset.observed=list("MDS_signature"=geneset)
339
  
340
  data_n <- data.matrix(GetAssayData(scmat, assay = "RNA"))
341
  
342
  cells_rankings <- AUCell_buildRankings(data_n, nCores=nCores, plotStats=TRUE)
343
  AUC.random <- AUCell_calcAUC(geneset.random, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05)
344
  AUC.observed <- AUCell_calcAUC(geneset.observed, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05)
345
  
346
  # Alternative: assign cells according to the 'automatic' threshold
347
  cells_assignment <- AUCell_exploreThresholds(AUC.observed, plotHist=T, assignCells=TRUE)
348
  
349
  # how many times random higher than observed, divide by the number of permutations == eFDR
350
  y=as.numeric(getAUC(AUC.observed))
351
  x=getAUC(AUC.random)
352
  
353
  FDR=sapply(seq(y), function(i){
354
    sum(x[,i]>y[i])/nperm
355
  })
356
  
357
  empirical.FDR=data.frame(t(FDR), check.names = F)
358
  rownames(empirical.FDR)=paste0(names(geneset.observed), "_eFDR_", nperm, "_", length(geneset))
359
  colnames(empirical.FDR)=colnames(scmat)
360
  
361
  a=scmat[["SingleR.label"]]
362
  table(a[rownames(a)%in%cells_assignment$MDS_signature$assignment,])
363
  table(a[empirical.FDR<0.05,])
364
  
365
  scmat[["empirical.FDR"]]=as.numeric(empirical.FDR)
366
  DimPlot(scmat, group.by = "empirical.FDR")
367
  
368
  scmat[["y"]]=as.numeric(y)>0.04
369
  DimPlot(scmat, group.by = "y")
370
  
371
  return(list(empirical.FDR, y))
372
}
373
374
get.only.legend=function(colorv, group, position="right", direction="vertical"){
375
  library(ggplot2)
376
  
377
  dat=data.frame(group, colorv)
378
  levels(dat$group)=group
379
  myColors <- colorv
380
  names(myColors) <- levels(group)
381
  colScale <- scale_colour_manual(name = "group",values = myColors)
382
  dat$x=dim(dat)[1]
383
  dat$y=dim(dat)[1]
384
  
385
  legend <- cowplot::get_legend(ggplot(dat, aes(x, y, colour = group)) + theme_bw() +
386
                                  geom_point(size=0.6) + colScale +theme(legend.position = position, legend.direction = direction, legend.title = element_blank(), 
387
                                                                         legend.key=element_blank(), legend.box.background=element_blank(),
388
                                                                         legend.text=element_text(size = 10, face = "bold", family="Helvetica"))+
389
                                  theme(plot.margin=unit(c(1,1,1,1), "mm"))+
390
                                  guides(colour = guide_legend(override.aes = list(size=3))))
391
  return(legend)
392
}
393
394
395
statistical.comparisons.scRNA=function(i, s.anno, genelist.statistical.analysis=NULL, test.use="wilcox"){
396
  # plot proportions as a barplot next to 2D points
397
  load(s.anno[i,2])
398
  coords=Embeddings(scmat[["umap"]])
399
  identityVector=gsub(" ", "", unlist(strsplit(s.anno$Type[i], ", |,")))
400
  clusters=gsub(" ", "", unlist(strsplit(s.anno$Clusters[i], ", |,")))
401
  clusters.samples=as.character(Idents(scmat))
402
  identityVector.samples=clusters.samples
403
  
404
  for(j in seq(clusters)){
405
    identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j]
406
  }
407
  
408
  name.sample=s.anno$Sample[i]
409
  
410
  if(is.null(genelist.statistical.analysis))genelist.statistical.analysis=rownames(scmat)
411
  
412
  # Find the DE genes per factor and within factor:
413
  statistical.analysis=do.call(rbind, mclapply(unique(identityVector.samples), function(id){
414
    clusters2test=unique(clusters.samples[identityVector.samples%in%id])
415
    
416
    # clusters2test vs Rest:
417
    markers=FindMarkers(scmat, ident.1 = clusters2test, group.by = "seurat_clusters", test.use = test.use,  logfc.threshold =0.01, only.pos = T, features = genelist.statistical.analysis)
418
    
419
    #DE in clusters within category
420
    cluster.markers=FindAllMarkers(scmat[,clusters.samples%in%clusters2test], test.use = test.use, logfc.threshold = 0.01,  only.pos = T, features = genelist.statistical.analysis)
421
    
422
    # make genelists with annotation:
423
    markers$test=paste0(id, " vs. Rest")
424
    markers$gene=rownames(markers)
425
    cluster.markers$test=paste0(id, " ", cluster.markers$cluster, " vs. Rest")
426
    cluster.markers=cluster.markers[,!colnames(cluster.markers)%in%"cluster"]
427
    df.statistics=plyr::rbind.fill(list(markers, cluster.markers))
428
    df.statistics$test.used=test.use
429
    return(df.statistics)
430
  }))
431
  
432
  return(statistical.analysis)
433
}
434
435
436
#' @param seurat.object Seurat class object or data matrix that can be converted to Seurat object. Can also be a list of data matrices (RNA+citeseq etc.), Must be named by assay type with four uppercase letters (PROT, etc) (also found from seurat object with this name). 
437
#' @param regress.cell.label Character vector with as many columns as seurat.object Contains batch information for each cell. Uses fastMNN/SCTtransform to batch correct the data (usually different samples or experiments).
438
#' @param check.pcs Plot jackstraw and elbowplot to optimize the number of PCA componens
439
#' @param plot.umap Umap plot for clusters and for sample ID, if batch corrected also batch clustering
440
#' @param resolution Resolution parameter for clustering
441
#' @param nFeature.min Filter cells with < nFeature.min
442
#' @param nFeature.max Filter cells with > nFeature.max
443
#' @param percent.mitoDNA Filter cells with mitochondrial genes > percent.mitoDNA
444
#' @param singleR Annotate each cell using singleR built in annotations. Mainly uses Encode+blueprint annotations.
445
#' @param cores Number of cores to use for Seurat/singleR/MNNcorrect
446
#' @param add.gm.score Adds geneset score (using geometric mean) to fm, must be a named list of genes. Geneset name is added to fm and seurat object.
447
#' @param ... Pass parameters to Seurat tools
448
sc.data.analysis=function(scmat, name="scRNA_object", nr.pcs=30, regress.cell.label=NULL, batch.correction.method=c("SCTtransform","MNNcorrect"), auto.order=F, normalize.input=T, check.pcs=T, plot.umap=T, resolution=0.5, nFeature.min=0, nFeature.max=10000, percent.mitoDNA=25, singleR=T, singleR.reference=c("blueprint_encode", "HPCA"), cores=6, add.gm.score=NULL, ...){
449
  
450
  # determine computational resources:
451
  future::plan("multiprocess", workers = cores)
452
  options(future.globals.maxSize= 5e+09)
453
  
454
  # set method for batch correction
455
  batch.correction.method=match.arg(batch.correction.method)
456
  
457
  # list of data matrix, can be citeseq data:
458
  if(class(scmat)=="list"){
459
    cat("Input is list, splitting to data matrix by type and adding to Seurat", sep="\n\n")
460
    
461
    list.additional=lapply(2:length(scmat), function(i)CreateAssayObject(scmat[[i]]))
462
    names(list.additional)=names(scmat)[2:length(scmat)]
463
    scmat <- CreateSeuratObject(scmat[[1]])
464
  }else{
465
    list.additional=NULL
466
    # if class is not seurat object
467
    if(!class(scmat)=="Seurat"){
468
      cat("Input is data matrix, converting to Seurat object", sep="\n\n")
469
      scmat <- CreateSeuratObject(scmat)
470
    }else{
471
      cat("Input is Seurat object", sep="\n\n")
472
    }
473
  }
474
  DefaultAssay(object = scmat) <- "RNA"
475
  
476
  # is it a vector of pcs or single value?
477
  if(length(nr.pcs)==1)nr.pcs=seq(nr.pcs)
478
  
479
  # QC
480
  scmat=PercentageFeatureSet(scmat, pattern = "^MT-", col.name = "percent.mt")
481
  r1=VlnPlot(scmat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
482
  ggplot2::ggsave(r1, filename = paste0(name, "_QC.pdf"), width = 12)
483
  
484
  # Filtering
485
  nFeature_RNA <- FetchData(object = scmat, vars = "nFeature_RNA")
486
  pct.mt <- FetchData(object = scmat, vars = "percent.mt")
487
  filt=which(x = nFeature_RNA > nFeature.min & nFeature_RNA < nFeature.max & pct.mt < percent.mitoDNA)
488
  cat(paste("Dimension before/after filtering:", paste(paste(dim(scmat), collapse="x"), paste(dim(scmat[, filt]), collapse="x"), collapse = "/")), sep="\n\n")
489
  scmat=scmat[, filt]
490
  
491
  if(normalize.input){
492
    assay="SCT"
493
    scmat=SCTransform(scmat, variable.features.n = 3000) #,...)
494
    
495
    # seurat standard processing
496
    scmat=RunPCA(scmat, npcs = 100)
497
    reduction="pca"
498
  }else{
499
    assay="RNA"
500
    
501
    # assume that the data is normalized
502
    scmat <- FindVariableFeatures(object = scmat)
503
    scmat <- ScaleData(object = scmat)
504
    scmat=RunPCA(scmat, npcs = 100)
505
    reduction="pca"
506
  }
507
  
508
  # set default assay to use:
509
  DefaultAssay(object = scmat) <- assay
510
  
511
  # use SCTransform to remove batch effect, used in umap and clustering
512
  # else use SCT to normalize data
513
  if(!is.null(regress.cell.label)&batch.correction.method=="SCTtransform"){
514
    batch.df=data.frame("batch"=regress.cell.label[filt])
515
    rownames(batch.df)=colnames(scmat)
516
    scmat[["batch"]] = batch.df
517
    scmat=SCTransform(scmat,vars.to.regress="batch", variable.features.n = 3000)
518
    
519
    # seurat standard processing
520
    scmat=RunPCA(scmat, npcs = 100)
521
    
522
    name=paste0(name, "_", batch.correction.method)
523
    reduction="pca"
524
  }
525
  
526
  # optimize pcs number:
527
  if(check.pcs){
528
    
529
    # setting here original RNA to define PCAs, SCT not working yet, wait for a fix
530
    # also the issue here how to deal with the mnnCorrect?
531
    # DefaultAssay(object = scmat) <- "RNA"
532
    # scmat <- NormalizeData(scmat,display.progress = FALSE)
533
    # scmat <- FindVariableFeatures(scmat,do.plot = F,display.progress = FALSE)
534
    # scmat <- ScaleData(scmat)
535
    # scmat=RunPCA(scmat, npcs = 100)
536
    
537
    # determine how many pcs should be used:
538
    r3=ElbowPlot(scmat, ndims = 100, reduction = "pca")
539
    ggplot2::ggsave(r3, filename = paste0(name, "_elbow.pdf"))
540
    
541
    # find optimal number of PCs to use:
542
    scmat <- JackStraw(scmat, num.replicate = 100, dims = 100, reduction = "pca")
543
    scmat <- ScoreJackStraw(scmat, dims = 1:100, reduction = "pca")
544
    r4=JackStrawPlot(scmat, dims = 1:100)
545
    ggplot2::ggsave(r4, filename = paste0(name, "_Jackstraw.pdf"), width = 12)
546
    
547
    cat("PCs checking done", sep="\n\n")
548
  }
549
  
550
  # use MNN to remove batch effect, used in umap and clustering:
551
  if(!is.null(regress.cell.label)&batch.correction.method=="MNNcorrect"){
552
    library(BiocParallel)
553
    library(batchelor)
554
    
555
    cat("FastMNN batch correction...", sep="\n\n")
556
    
557
    if(is.factor(regress.cell.label)&!auto.order){
558
      order=levels(regress.cell.label)
559
      cat("FastMNN batch correction order:", sep="\n\n")
560
      cat(order, sep="-->")
561
      cat("", sep="\n\n")
562
    }
563
    
564
    if(!is.factor(regress.cell.label)&!auto.order){
565
      order=unique(regress.cell.label)
566
      cat("FastMNN batch correction order:", sep="\n\n")
567
      cat(order, sep="-->")
568
      cat("", sep="\n\n")
569
    }
570
    
571
    batch.df=data.frame("batch"=regress.cell.label[filt])
572
    rownames(batch.df)=colnames(scmat)
573
    scmat[["batch"]] = batch.df
574
    
575
    # http://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/fast_mnn.html
576
    scmat <- NormalizeData(scmat)
577
    scmat <- FindVariableFeatures(scmat)
578
    
579
    object.list = SplitObject(scmat, split.by = "batch")[order(order)]
580
    
581
    scmat <- SeuratWrappers::RunFastMNN(object.list, reduction.name = "mnnCorrect", auto.order=auto.order, BPPARAM=MulticoreParam(cores))
582
    
583
    # set defaults 
584
    reduction="mnnCorrect"
585
    cat("FastMNN batch correction... done", sep="\n\n")
586
    
587
  }
588
  
589
  # Basic seurat processing with umap:
590
  scmat=RunUMAP(scmat, dims = nr.pcs, reduction = reduction)
591
  scmat=FindNeighbors(scmat, dims = nr.pcs, reduction = reduction)
592
  scmat=FindClusters(scmat, resolution = resolution)
593
  
594
  # add celltype automatically
595
  if(singleR){
596
    cat("Annotating with singleR", sep="\n\n")
597
    
598
    if(!file.exists(paste0(name, "_singler_object.Rdata"))){
599
      library("SingleR")
600
      
601
      counts <- GetAssayData(scmat, assay = assay, slot = "counts")
602
      
603
      if(singleR.reference=="blueprint_encode"){
604
        if(dim(counts)[2]<100000){
605
          singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(blueprint_encode), project.name=name, fine.tune = T,do.signatures = T, numCores=cores)
606
        }else{
607
          cat("SingleR in batches", sep="\n\n")
608
          singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat),  N=30000, ref.list = list(blueprint_encode), xy = Embeddings(scmat[["umap"]]), project.name=name, fine.tune = T, do.signatures = T, numCores=cores)
609
        }
610
      }
611
      
612
      if(singleR.reference=="HPCA"){
613
        if(dim(counts)[2]<100000){
614
          singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores)
615
        }else{
616
          cat("SingleR in batches", sep="\n\n")
617
          singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat), N=30000, ref.list = list(hpca), project.name=name, xy = Embeddings(scmat[["umap"]]), fine.tune = T,do.signatures = T, numCores=cores)
618
        }
619
      }
620
      
621
      # add original identifiers
622
      singler$meta.data$orig.ident = scmat@meta.data$orig.ident # the original identities, if not supplied in 'annot'
623
      
624
      ## if using Seurat v3.0 and over use:
625
      singler$meta.data$xy = scmat@reductions$umap@cell.embeddings # the tSNE coordinates
626
      singler$meta.data$clusters = scmat@active.ident # the Seurat clusters (if 'clusters' not provided)
627
      
628
      # add annot from hpca
629
      # singler2 = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores)
630
      # singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells"
631
      # singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells"
632
      # singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells"
633
      # singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells"
634
      
635
      # these names are not intuitive, replace them, mentioned that these are actually naive http://comphealth.ucsf.edu/SingleR/SupplementaryInformation2.html:
636
      singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD4+ T-cells"]="CD4+ Tn"
637
      singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD8+ T-cells"]="CD8+ Tn"
638
      singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD4+ T-cells"]="CD4+ Tn"
639
      singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD8+ T-cells"]="CD8+ Tn"
640
      
641
      save(singler, file=paste0(name, "_singler_object.Rdata"))
642
      
643
    }else{
644
      library("SingleR")
645
      load(paste0(name, "_singler_object.Rdata"))
646
      cat(paste0("Loaded existing SingleR object (", paste0(name, "_singler_object.Rdata"), "), remove/rename it if you want to re-compute."), sep="\n\n")
647
      
648
    }
649
    # can be added as cluster id
650
    cluster=singler$singler[[1]]$SingleR.clusters$labels
651
    cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels
652
    
653
    # order based on cell type and add celltype to cluster:
654
    lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes")
655
    lineage=unique(c(lineage,blueprint_encode$types, hpca$types))
656
    lineage=lineage[lineage%in%singler$singler[[1]]$SingleR.single$labels]
657
    
658
    scmat[["SingleR.label.main"]]=singler$singler[[1]]$SingleR.single.main$labels
659
    scmat[["SingleR.label.cluster"]]=paste(singler$singler[[1]]$SingleR.single$labels, Idents(scmat))
660
    scmat[["SingleR.label"]]=factor(singler$singler[[1]]$SingleR.single$labels, levels=lineage[lineage%in%unique(singler$singler[[1]]$SingleR.single$labels)])
661
    
662
    identityVector.samples=as.character(Idents(scmat))
663
    clusters.samples=Idents(scmat)
664
    
665
    for(j in seq(cluster)){
666
      identityVector.samples[clusters.samples%in%rownames(cluster)[j]]=paste0(cluster[j], ":", rownames(cluster)[j])
667
    }
668
    
669
    # order and cluster identity;
670
    cluster=cluster[order(match(cluster[,1],lineage)),,drop=F]
671
    cat(paste(levels(Idents(scmat)), collapse=","), paste(cluster, collapse=","), sep="\t\t")   
672
    
673
    scmat[["SingleR.cluster"]]=gsub(":.*.", "", Idents(scmat)) # just the "cluster label" per cell
674
    
675
    # order seurat clusters too:
676
    Idents(scmat)=factor(identityVector.samples, levels=paste0(cluster, ":", rownames(cluster)))
677
    
678
    r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE,  pt.size = 0.5, repel = T)  + NoLegend() + NoAxes()
679
    ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6)
680
    
681
    cat("\nSingleR done", sep="\n\n")
682
    
683
    
684
    # new singleR:
685
    # counts <- GetAssayData(scmat, assay = "RNA", slot = "counts")
686
    # 
687
    # library(SingleR)
688
    # library(scater)
689
    # 
690
    # ref=NovershternHematopoieticData()    
691
    # 
692
    # common <- intersect(rownames(counts), rownames(ref))
693
    # ref <- ref[common,]
694
    # counts <- counts[common,]
695
    # counts.log <- logNormCounts(counts)
696
    # 
697
    # singler <- SingleR(test = counts.log, ref = ref, labels = ref$label.main, numCores=cores)
698
    # 
699
    # # order based on cell type and add celltype to cluster:
700
    # lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes")
701
    # lineage=unique(c(lineage,ref$label.main))
702
    # 
703
    # scmat[["SingleR.label.main"]]=singler$first.labels
704
    # scmat[["SingleR.label"]]=factor(singler$labels, levels=lineage[lineage%in%unique(singler$labels)])
705
    # 
706
    # r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE,  pt.size = 0.5, repel = T)  + NoLegend() + NoAxes()
707
    # ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6)
708
    # 
709
    # cat("\nSingleR done", sep="\n\n")
710
    
711
  }
712
  
713
  # plot clusters and samples
714
  if(plot.umap){
715
    if(!is.null(regress.cell.label)){
716
      r6=DimPlot(scmat, group.by = c("batch"), ncol = 2, label = T, repel = T)  + NoLegend() + NoAxes()
717
      ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_batch.pdf"), width = 6, height = 6)
718
      r6=DimPlot(scmat, group.by = c("ident"), ncol = 2, label = T, repel = T)  + NoLegend() + NoAxes()
719
      ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6)
720
    }else{
721
      r6=DimPlot(scmat, group.by = c("ident"), ncol = 1, label = T, repel = T)  + NoLegend() + NoAxes()
722
      ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6)
723
    }
724
  }
725
  
726
  # add immunoscores to FM format data matrix
727
  fm.f <- GetAssayData(scmat)
728
  
729
  # add gm based score:
730
  add.scores=list(HLAIScore=c("B2M", "HLA-A", "HLA-B","HLA-C"), HLAIIScore=c("HLA-DMA","HLA-DMB","HLA-DPA1","HLA-DPB1","HLA-DRA","HLA-DRB1"), CytolyticScore=c("GZMA", "PRF1", "GNLY", "GZMH", "GZMM"))
731
  if(!is.null(add.gm.score))add.scores=append(add.scores, add.gm.score)
732
  
733
  gm.objects=do.call(rbind, lapply(seq(add.scores), function(i){
734
    dat3=fm.f[rownames(fm.f)%in%add.scores[[i]],]
735
    gm=t(apply(dat3, 2, gm_mean)) # done to normalized values
736
    rownames(gm)=names(add.scores)[i]
737
    return(gm)
738
  }))
739
  
740
  # also add to seurat object:
741
  for(i in seq(add.scores)){
742
    scmat[[names(add.scores)[i]]] <- gm.objects[i,]
743
  }
744
  
745
  # gene expression and scores to fm:
746
  rownames(fm.f)=paste0("N:GEXP:", rownames(fm.f))
747
  rownames(gm.objects)=paste0("N:SAMP:", rownames(gm.objects))
748
  fm=rbind(gm.objects, fm.f)
749
  
750
  # DE analysis:
751
  markers.all=FindAllMarkers(scmat, only.pos = T)#, ...)
752
  
753
  # go through each data matrix, add to seurat and fm if more matrices
754
  if(!is.null(list.additional)){
755
    
756
    for(i in seq(list.additional)){
757
      mat.add=list.additional[[i]]
758
      mat.add=subset(mat.add,cells = colnames(scmat))
759
      scmat[[names(list.additional)[i]]] <- mat.add
760
      scmat <- NormalizeData(scmat, assay = names(list.additional)[i], normalization.method = "CLR") # test scttransform too
761
      scmat <- ScaleData(scmat, assay = names(list.additional)[i])
762
      
763
      add.data.normalized <- data.matrix(GetAssayData(scmat, assay = names(list.additional)[i], slot = "data"))
764
      rownames(add.data.normalized)=paste0("N:",names(list.additional)[i], ":", rownames(add.data.normalized))
765
      fm=rbind(fm, add.data.normalized)
766
    }
767
  }
768
  
769
  save(list=c("fm", "scmat", "markers.all"), file=paste0(name, "_scRNA.Rdata"))
770
  
771
  return(paste0(name, "_scRNA.Rdata"))
772
}
773
774
# make sure  data is on a linear non-log scale
775
# check 0 values, if a lot between 0-1 better to use add.one. If counts, remove works too quite well
776
# zero fix methods: https://arxiv.org/pdf/1806.06403.pdf
777
gm_mean=function(x, na.rm=TRUE, zero.handle=c("remove", "add.one", "zero.propagate")){
778
  zero.handle=match.arg(zero.handle)
779
  if(any(x < 0, na.rm = TRUE)){
780
    warning("Negative values produced NaN - is the data on linear - non-log scale?")
781
    return(NaN)
782
  }
783
  
784
  if(zero.handle=="remove"){
785
    return(exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)))
786
  }
787
  
788
  if(zero.handle=="add.one"){
789
    return(exp(mean(log(x+1), na.rm = na.rm))-1)
790
  }
791
  
792
  if(zero.handle=="zero.propagate"){
793
    if(any(x == 0, na.rm = TRUE)){
794
      return(0)
795
    }
796
    return(exp(mean(log(x), na.rm = na.rm)))
797
  }
798
  
799
}
800
801
get.cluster.label.singleR=function(singler){
802
  
803
  # how to add this?
804
  cluster=singler$singler[[1]]$SingleR.clusters$labels
805
  cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels
806
  
807
  # order based on cell type:
808
  # cat(paste0("'", colors.group[,1], "'"), sep=",")
809
  lineage=c('HSC','MPP','CLP','CMP','GMP','MEP','Monocytes','DC','Macrophages','Macrophages M1','Macrophages M2','CD4+ T-cells','CD8+ T-cells','Tregs','T_cell:gamma-delta','T_cell:CD4+_Naive','T_cell:CD8+_naive','T_cell:Treg:Naive','CD4+ Tcm','CD4+ Tem','CD8+ Tcm','CD8+ Tem','NK cells','naive B-cells','Memory B-cells','B-cells','Class-switched memory B-cells','Plasma cells','Endothelial cells','Neutrophils','Eosinophils','Fibroblasts','Smooth muscle','Erythroblast','Erythrocytes','Megakaryocytes')
810
  
811
  cluster=cluster[order(match(cluster[,1],lineage)),,drop=F]
812
  
813
  # order and cluster identity;
814
  cat(paste(rownames(cluster), collapse=","), paste(cluster, collapse=","), sep="\t")
815
  cat("", sep="\n")
816
  return(cluster)
817
}
818
819
# run cellphoneDB
820
run.cellphonedb=function(scmat, celltype, out=getwd(), cores=10, threshold=0.05){
821
  
822
  if(!dir.exists(out))dir.create(out, recursive = T)
823
  
824
  if(is.null(celltype)){
825
    celltype=gsub(" ", "_", as.character(Idents(scmat)))
826
  }
827
  
828
  setwd(out)
829
  # input1:
830
  df=data.frame("Cell"=gsub("\\.", "_", colnames(scmat)), "cell_type"=celltype, stringsAsFactors = F)
831
  df$cell_type[scmat[["SingleR.label"]]=="Tregs"]="Tregs"
832
  
833
  meta=file.path(out, "meta.tsv")
834
  write.table(df, meta, row.names = F, col.names = T, quote = F, sep="\t")
835
  
836
  # input2:
837
  counts <- data.matrix(GetAssayData(scmat, assay = "SCT", slot = "data"))
838
  countsfile=tempfile()
839
  
840
  # gene symbol was not working...
841
  library("org.Hs.eg.db") # remember to install it if you don't have it already
842
  ensambleid <- mapIds(org.Hs.eg.db, keys = rownames(counts), keytype = "SYMBOL", column="ENSEMBL")
843
  rownames(counts)=ensambleid
844
  
845
  counts=counts[!is.na(rownames(counts)),]
846
  
847
  countsfile=file.path(out, "counts.tsv")
848
  
849
  write.table(t(c("Gene", gsub("\\.", "_", colnames(counts)))), countsfile, row.names = F, col.names = F, quote = F, sep="\t")
850
  write.table(counts, countsfile, row.names = T, col.names = F, quote = F, sep="\t", append=T)
851
  
852
  system(paste0("cellphonedb method statistical_analysis ", meta, " ", countsfile, " --iterations=1000 --threads=", cores, " --threshold=", threshold))
853
  
854
  # make heatmap of interactions:
855
  system(paste0("cellphonedb plot heatmap_plot ", meta))
856
  
857
  deconvoluted=data.table::fread(file.path(out, "out/deconvoluted.txt"), data.table = F)
858
  pvalues=data.table::fread(file.path(out, "out/pvalues.txt"), data.table = F)
859
  significant_means.txt=data.table::fread(file.path(out, "out/significant_means.txt"), data.table = F)
860
  means=data.table::fread(file.path(out, "out/means.txt"), data.table = F)
861
  
862
  unlink(countsfile)
863
  unlink(meta)
864
  
865
  return(list("deconvoluted"=deconvoluted, "pvalues"=pvalues, "significant_means"=significant_means.txt, "means"=means))
866
}
867
868
# found bug in reference list, was missing, also min genes was 200 not 0 causing errors...
869
CreateBigSingleRObject.custom=function (counts, annot = NULL, project.name, xy, clusters, N = 10000,
870
                                        min.genes = 0, technology = "10X", species = "Human", citation = "",
871
                                        ref.list = list(), normalize.gene.length = F, variable.genes = "de",
872
                                        fine.tune = T, reduce.file.size = T, do.signatures = F, do.main.types = T,
873
                                        temp.dir = getwd(), numCores = SingleR.numCores){
874
  n = ncol(counts)
875
  s = seq(1, n, by = N)
876
  dir.create(paste0(temp.dir, "/singler.temp/"), showWarnings = FALSE)
877
  for (i in s) {
878
    print(i)
879
    A = seq(i, min(i + N - 1, n))
880
    
881
    singler = CreateSinglerObject(counts[, A], annot = annot[A], ref.list = ref.list,
882
                                  project.name = project.name, min.genes = min.genes,
883
                                  technology = technology, species = species, citation = citation,
884
                                  do.signatures = do.signatures, clusters = NULL, numCores = numCores)
885
    
886
    
887
    save(singler, file = paste0(temp.dir, "/singler.temp/",
888
                                project.name, ".", i, ".RData"))
889
  }
890
  singler.objects.file <- list.files(paste0(temp.dir, "/singler.temp/"),
891
                                     pattern = "RData", full.names = T)
892
  singler.objects = list()
893
  for (i in 1:length(singler.objects.file)) {
894
    load(singler.objects.file[[i]])
895
    singler.objects[[i]] = singler
896
  }
897
  singler = SingleR.Combine.custom(singler.objects, order = colnames(counts),expr = counts, clusters = clusters, xy = xy)
898
  singler
899
}
900
901
# bug also in this...
902
SingleR.Combine.custom=function (singler.list, order = NULL, clusters = NULL, expr = NULL, xy = NULL) 
903
{
904
  singler = c()
905
  singler$singler = singler.list[[1]]$singler
906
  for (j in 1:length(singler.list[[1]]$singler)) {
907
    singler$singler[[j]]$SingleR.cluster = c()
908
    singler$singler[[j]]$SingleR.cluster.main = c()
909
    singler$singler[[j]]$SingleR.single$clusters = c()
910
  }
911
  singler$meta.data = singler.list[[1]]$meta.data
912
  singler$meta.data$clusters = c()
913
  singler$meta.data$xy = c()
914
  singler$meta.data$data.sets = rep(singler$meta.data$project.name, 
915
                                    length(singler$meta.data$orig.ident))
916
  for (i in 2:length(singler.list)) {
917
    for (j in 1:length(singler$singler)) {
918
      if (singler.list[[i]]$singler[[j]]$about$RefData != 
919
          singler.list[[1]]$singler[[j]]$about$RefData) {
920
        stop("The objects are not ordered by the same reference data.")
921
      }
922
      singler$singler[[j]]$about$Organism = c(singler$singler[[j]]$about$Organism, 
923
                                              singler.list[[i]]$singler[[j]]$about$Organism)
924
      singler$singler[[j]]$about$Citation = c(singler$singler[[j]]$about$Citation, 
925
                                              singler.list[[i]]$singler[[j]]$about$Citation)
926
      singler$singler[[j]]$about$Technology = c(singler$singler[[j]]$about$Technology, 
927
                                                singler.list[[i]]$singler[[j]]$about$Technology)
928
      singler$singler[[j]]$SingleR.single$labels = rbind(singler$singler[[j]]$SingleR.single$labels, 
929
                                                         singler.list[[i]]$singler[[j]]$SingleR.single$labels)
930
      if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) {
931
        singler$singler[[j]]$SingleR.single$labels1 = rbind(singler$singler[[j]]$SingleR.single$labels1, 
932
                                                            singler.list[[i]]$singler[[j]]$SingleR.single$labels1)
933
      }
934
      singler$singler[[j]]$SingleR.single$scores = rbind(singler$singler[[j]]$SingleR.single$scores, 
935
                                                         singler.list[[i]]$singler[[j]]$SingleR.single$scores)
936
      singler$singler[[j]]$SingleR.single.main$labels = rbind(singler$singler[[j]]$SingleR.single.main$labels, 
937
                                                              singler.list[[i]]$singler[[j]]$SingleR.single.main$labels)
938
      if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) {
939
        singler$singler[[j]]$SingleR.single.main$labels1 = rbind(singler$singler[[j]]$SingleR.single.main$labels1, 
940
                                                                 singler.list[[i]]$singler[[j]]$SingleR.single.main$labels1)
941
      }
942
      singler$singler[[j]]$SingleR.single.main$scores = rbind(singler$singler[[j]]$SingleR.single.main$scores, 
943
                                                              singler.list[[i]]$singler[[j]]$SingleR.single.main$scores)
944
      singler$singler[[j]]$SingleR.single$cell.names = c(singler$singler[[j]]$SingleR.single$cell.names, 
945
                                                         singler.list[[i]]$singler[[j]]$SingleR.single$cell.names)
946
      singler$singler[[j]]$SingleR.single.main$cell.names = c(singler$singler[[j]]$SingleR.single.main$cell.names, 
947
                                                              singler.list[[i]]$singler[[j]]$SingleR.single.main$cell.names)
948
      if (!is.null(singler$singler[[j]]$SingleR.single.main$pval)) {
949
        singler$singler[[j]]$SingleR.single.main$pval = c(singler$singler[[j]]$SingleR.single.main$pval, 
950
                                                          singler.list[[i]]$singler[[j]]$SingleR.single.main$pval)
951
      }
952
      if (!is.null(singler$singler[[j]]$SingleR.single$pval)) {
953
        singler$singler[[j]]$SingleR.single$pval = c(singler$singler[[j]]$SingleR.single$pval, 
954
                                                     singler.list[[i]]$singler[[j]]$SingleR.single$pval)
955
      }
956
    }
957
    singler$meta.data$project.name = paste(singler$meta.data$project.name, 
958
                                           singler.list[[i]]$meta.data$project.name, sep = "+")
959
    singler$meta.data$orig.ident = c(singler$meta.data$orig.ident, 
960
                                     singler.list[[i]]$meta.data$orig.ident)
961
    singler$meta.data$data.sets = c(singler$meta.data$data.sets, 
962
                                    rep(singler.list[[i]]$meta.data$project.name, length(singler.list[[i]]$meta.data$orig.ident)))
963
  }
964
  for (j in 1:length(singler$singler)) {
965
    if (!is.null(order)) {
966
      singler$singler[[j]]$SingleR.single$labels = singler$singler[[j]]$SingleR.single$labels[order, 
967
                                                                                              ]
968
      if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) {
969
        singler$singler[[j]]$SingleR.single$labels1 = singler$singler[[j]]$SingleR.single$labels1[order, 
970
                                                                                                  ]
971
      }
972
      singler$singler[[j]]$SingleR.single$scores = singler$singler[[j]]$SingleR.single$scores[order, 
973
                                                                                              ]
974
      singler$singler[[j]]$SingleR.single$cell.names = singler$singler[[j]]$SingleR.single$cell.names[order]
975
      singler$singler[[j]]$SingleR.single.main$labels = singler$singler[[j]]$SingleR.single.main$labels[order, 
976
                                                                                                        ]
977
      if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) {
978
        singler$singler[[j]]$SingleR.single.main$labels1 = singler$singler[[j]]$SingleR.single.main$labels1[order, 
979
                                                                                                            ]
980
      }
981
      singler$singler[[j]]$SingleR.single.main$scores = singler$singler[[j]]$SingleR.single.main$scores[order, 
982
                                                                                                        ]
983
      singler$singler[[j]]$SingleR.single.main$cell.names = singler$singler[[j]]$SingleR.single.main$cell.names[order]
984
      if (!is.null(singler$singler[[j]]$SingleR.single$pval)) {
985
        singler$singler[[j]]$SingleR.single$pval = singler$singler[[j]]$SingleR.single$pval[order]
986
        singler$singler[[j]]$SingleR.single.main$pval = singler$singler[[j]]$SingleR.single.main$pval[order]
987
      }
988
    }
989
  }
990
  if (!is.null(clusters) && !is.null(expr)) {
991
    for (j in 1:length(singler$singler)) {
992
      if (is.character(singler$singler[[j]]$about$RefData)) {
993
        ref = get(tolower(singler$singler[[j]]$about$RefData))
994
      }
995
      else {
996
        ref = singler$singler[[j]]$about$RefData
997
      }
998
      singler$singler[[j]]$SingleR.clusters = SingleR("cluster", 
999
                                                      expr, ref$data, types = ref$types, clusters = factor(clusters), 
1000
                                                      sd.thres = ref$sd.thres, genes = "de", fine.tune = T)
1001
      singler$singler[[j]]$SingleR.clusters.main = SingleR("cluster", 
1002
                                                           expr, ref$data, types = ref$main_types, clusters = factor(clusters), 
1003
                                                           sd.thres = ref$sd.thres, genes = "de", fine.tune = T)
1004
    }
1005
    singler$meta.data$clusters = clusters
1006
    if (!is.null(xy)) {
1007
      singler$meta.data$xy = xy
1008
    }
1009
  }
1010
  singler
1011
}