ImmunogenomicLandscape-BC / Git / [8e0848] /common_scripts/scRNA/functions.scRNA.analysis.R

Models:
WandaB/
ImmunogenomicLandscape-BC
Downloads: 1
[8e0848]: / common_scripts / scRNA / functions.scRNA.analysis.R
History
Download this file
1011 lines (801 with data), 47.3 kB

#'
#'
plot.scRNA.features.complexHeatmap=function(s.anno, features, name,width=300, cores){
  log=mclapply(seq(dim(s.anno)[1]), plot.scRNA.features, s.anno,features, name, mc.cores = cores, width=width)
}

plot.scRNA.features=function(i, s.anno, features, name, add.annotation=NULL, width=300){
  # eccite PBMC dataset:
  load(s.anno[i,2])
  
  #HSC, myeloid, B-solu, T-solu, erythroi
  # STEP1 find the cluster order, and set them based on lineage:
  clusters=Idents(scmat)
  if(s.anno[i,3]!=""){
    order.clu=as.numeric(unlist(strsplit(s.anno[i,3], ",|, | ,"))) # MODIFY
    
    ord.names=names(clusters[order(match(clusters,order.clu))])
    
  }else{
    
    order.clu=levels(clusters)
    
    # order samples within cluster by umap component with more spread
    # this way there is still information within cluster, for heatmaps
    coords=Embeddings(scmat[["umap"]])
    fit.cluster=levels(Idents(scmat))
    
    ord.names=unlist(lapply(fit.cluster, function(clu){
      d=coords[Idents(scmat)%in%clu,]
      ind=which.max(c(max(d[,1])-min(d[,1]), max(d[,2])-min(d[,2]))) # take the axis with most spread
      names(sort(d[,ind]))
    }))
    
  }
  
  
  
  features=data.frame(features, stringsAsFactors = F)
  
  if(!grepl("^B:|^N:", features[1,1])){
    features[,1]=paste0("N:GEXP:", features[,1])
    allfeats=rownames(fm)[!rowSums(fm>0)==0]
    
  }else{
    allfeats=gsub("N:GEXP:|N:RPPA:", "", rownames(fm)[!rowSums(fm>0)==0])
  }
  
  # text annot created if more columns than 1.
  if(dim(features)[2]>1){
    features=features[features[,1]%in%allfeats,]
    feats=features[,1]
    t.annot=apply(features[,2:dim(features)[2], drop=F], 1, paste, collapse=" ")
    names(t.annot)=feats
  }else{
    t.annot=NULL
    feats=features[features[,1]%in%allfeats,1]
  }
  
  # make a data frame for annotations on top of the heatmap
  annotdf=data.frame("cluster"=clusters,"HLAIScore"=as.numeric(fm["N:SAMP:HLAIScore",]), "HLAIIScore"=as.numeric(fm["N:SAMP:HLAIIScore",]), "CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",]))
  
  if(!is.null(add.annotation))annotdf$annotation=add.annotation
  
  name=gsub("__", "_", gsub("[[:punct:]]", "_", paste(name, s.anno[i,1], sep="_")))
  
  # plot using a complexheatmap package using a featurematrix format and custom plotting function
  r1=plot.complexHM.fm(feats = feats, text_annot = t.annot, fm.f = fm, annotdf = annotdf, feats.barplot = c("HLAIScore", "HLAIIScore", "CytolyticScore"), order_columns=ord.names, order_rows=F, split.columns = T, plotting.param="/research/groups/sysgen/PROJECTS/students/HEMAP_IMMUNOLOGY/minna_work/scRNA_genelist_plotting/plotting_param_fm.txt", NAME=name, WIDTH = width)
}

plot.gene.seurat=function(s.anno, feature, name,cols=c("grey75", "red"), max=1, min=0, cores=1){
  plot.feature=function(i, feature){
    # plot proportions as a barplot next to 2D points
    load(s.anno[i,2])
    
    if(!feature%in%rownames(scmat))return(NULL)
    
    p <- FeaturePlot(scmat, features = feature, cols = cols, max.cutoff = max, min.cutoff = min, order = T, combine = FALSE)
    p=lapply(p, `+`, labs(title = s.anno[i,1]))
    
    if(i>1){
      for(i in 1:length(p)) {
        p[[i]] <- p[[i]] + NoLegend() + NoAxes()
      }
    }else{
      p[[1]] <- p[[1]] + NoAxes()
    }
    
    cowplot::plot_grid(plotlist = p)
  }
  
  # plot here lineage proportion per patient
  plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores)
  plot.list=plot.list[!sapply(plot.list, is.null)]
  
  if(!length(plot.list))return(NULL)
  
  # 74mm * 99mm per panelwidth = 77, height = 74
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
}

plot.fm.feature=function(s.anno, feature, name, max=3, min=0, cores=1){
  plot.feature=function(i, feature){
    # plot proportions as a barplot next to 2D points
    load(s.anno[i,2])
    
    scmat[[feature]]=fm[rownames(fm)%in%feature,]
    
    p <- FeaturePlot(scmat, features = feature, cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE)
    p=lapply(p, `+`, labs(title = s.anno[i,1]))
    
    if(i>1){
      for(i in 1:length(p)) {
        p[[i]] <- p[[i]] + NoLegend() + NoAxes()
      }
    }else{
      p[[1]] <- p[[1]] + NoAxes()
    }
    
    return(p)
  }
  
  # plot here lineage proportion per patient
  plot.list=unlist(mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores), recursive = F)
  
  # 74mm * 99mm per panelwidth = 77, height = 74
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  # 74mm * 99mm per panelwidth = 77, height = 74
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
}

plot.several.fm.feature=function(s.anno.object, features, name, min=0, max=5, cores=5){
  load(s.anno.object)
  
  features=features[features%in%rownames(fm)]
  
  plot.feature=function(i, features){
    # plot proportions as a barplot next to 2D points
    
    scmat[[features[i]]]=fm[rownames(fm)%in%features[i],]
    
    p <- FeaturePlot(scmat, features = features[i], cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE)
    p=lapply(p, `+`, labs(title = features[i]))
    
    if(i>1){
      for(i in 1:length(p)) {
        p[[i]] <- p[[i]] + NoLegend() + NoAxes()
      }
    }else{
      p[[1]] <- p[[1]] + NoAxes()
    }
    return(p)
  }
  
  # plot here lineage proportion per patient
  plot.list=unlist(mclapply(seq(features), plot.feature, features, mc.cores=cores), recursive = F)
  
  # 74mm * 99mm per panelwidth = 77, height = 74
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
}

plot.seurat.ident=function(s.anno, colorv, name, cores=1){
  plot.feature=function(i){
    # plot proportions as a barplot next to 2D points
    load(s.anno[i,2])
    
    cells=gsub(" \\d+$", "", levels(Idents(scmat)))   
    
    p=DimPlot(scmat, group.by = c("ident"), cols = colorv[match(cells, colorv[,1]),2], ncol = 1, label = T, repel = T) + NoLegend() + NoAxes()
  }
  
  # plot here lineage proportion per patient
  plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, mc.cores=cores)
  
  # 74mm * 99mm per panelwidth = 77, height = 74
  figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
}


#'
plot.multi.scatter.scRNA=function(s.anno, scmat=NULL, colors.group, identityVector.samples.list=NULL, seurat.feature=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height = 74, text.size=10){
  # tools:
  library(Matrix)
  library(Seurat)
  source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R")
  library(data.table)
  library(ComplexHeatmap)
  library(circlize)
  library(parallel)
  library(ggplot2)
  
  
  # plot here lineage proportion per patient
  plot.list=mclapply(seq(dim(s.anno)[1]), function(i){
    # plot proportions as a barplot next to 2D points
    load(s.anno[i,2])
    coords=Embeddings(scmat[["umap"]])
    identityVector=unlist(strsplit(s.anno$Type[i], ", |,"))
    clusters=unlist(strsplit(s.anno$Clusters[i], ", |,"))
    clusters.samples=as.character(Idents(scmat))
    
    if(is.null(identityVector.samples.list)){
      identityVector.samples=clusters.samples
      
      for(j in seq(clusters)){
        identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j]
      }
    }else{
      identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list
    }
    
    if(!is.null(seurat.feature)){
      identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list
    }
    
    name.sample=s.anno$Sample[i]
    
    # take the colors:
    colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2]
    
    a=plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, namev=name, main = name, add.proportions = T, SIZE = SIZE, add.legend=F, rasterize=rasterize, width=width, height = height, text.size=text.size)
    return(a)
  }, mc.cores=cores)
  
  # add legend last
  add.legend=get.only.legend(group = colors.group[,1], colorv = colors.group[,2])
  
  nrows=ceiling(length(plot.list)/2)+1
  # 74mm * 99mm per panelwidth = 77, height = 74
  figure <-multipanelfigure:: multi_panel_figure(width = 2*width+width*0.2, height = ceiling(length(plot.list)/2)*height+120, rows = nrows, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  rowi=1
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  figure <- multipanelfigure::fill_panel(figure,row = nrows,column = 1:2,  add.legend)
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
}

plot.scatter.seurat=function(scmat, colors.group,clusters="", identityVector="",  identityVector.samples.list=NULL, seurat.feature=NULL, lv=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height=74, text.size=10, add.proportions=T, add.density=F, add.labels=F){
  # tools:
  library(Matrix)
  library(Seurat)
  source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R")
  library(data.table)
  library(ComplexHeatmap)
  library(circlize)
  library(parallel)
  library(ggplot2)
  
  coords=Embeddings(scmat[["umap"]])
  clusters.samples=as.character(Idents(scmat))
  
  if(is.null(identityVector.samples.list)){
    identityVector.samples=clusters.samples
    
    for(j in seq(clusters)){
      identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j]
    }
  }else{
    identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list
  }
  
  if(!is.null(seurat.feature)){
    identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list
    
    ind=order(match(identityVector.samples, colors.group[,1]))
    
    # order everything:
    identityVector.samples=identityVector.samples[ind]
    coords=coords[ind,]
    lv=lv[ind]
  }
  
  # take the colors:
  colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2]
  
  plot.list=list(plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, lv = lv, namev=name, main = name, add.proportions = add.proportions, add.density = add.density, add.labels = add.labels, SIZE = SIZE, add.legend=T, rasterize=rasterize, width=width, height = height, text.size=text.size))

  figure <-multipanelfigure:: multi_panel_figure(width = width+50, height = height, rows = 1, columns = 1, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm"))
  
  for(i in seq(plot.list)){
    figure <- multipanelfigure::fill_panel(figure,  plot.list[[i]])
  }
  
  multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE)
}


#'
#'
compute.FDR.scRNA=function(i, s.anno, nperm=1000, geneset, nCores=6){
  library(AUCell)
  library(Matrix)
  library(Seurat)
  
  load(s.anno[i,2])
  
  # take only genes that are in the matrix
  geneset=geneset[geneset%in%rownames(scmat)]
  
  # Random
  geneset.random=lapply(seq(nperm), function(i, genes){
    sample(rownames(scmat), length(geneset))
  })
  
  names(geneset.random)=paste0("Random",seq(nperm))
  
  geneset.observed=list("MDS_signature"=geneset)
  
  data_n <- data.matrix(GetAssayData(scmat, assay = "RNA"))
  
  cells_rankings <- AUCell_buildRankings(data_n, nCores=nCores, plotStats=TRUE)
  AUC.random <- AUCell_calcAUC(geneset.random, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05)
  AUC.observed <- AUCell_calcAUC(geneset.observed, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05)
  
  # Alternative: assign cells according to the 'automatic' threshold
  cells_assignment <- AUCell_exploreThresholds(AUC.observed, plotHist=T, assignCells=TRUE)
  
  # how many times random higher than observed, divide by the number of permutations == eFDR
  y=as.numeric(getAUC(AUC.observed))
  x=getAUC(AUC.random)
  
  FDR=sapply(seq(y), function(i){
    sum(x[,i]>y[i])/nperm
  })
  
  empirical.FDR=data.frame(t(FDR), check.names = F)
  rownames(empirical.FDR)=paste0(names(geneset.observed), "_eFDR_", nperm, "_", length(geneset))
  colnames(empirical.FDR)=colnames(scmat)
  
  a=scmat[["SingleR.label"]]
  table(a[rownames(a)%in%cells_assignment$MDS_signature$assignment,])
  table(a[empirical.FDR<0.05,])
  
  scmat[["empirical.FDR"]]=as.numeric(empirical.FDR)
  DimPlot(scmat, group.by = "empirical.FDR")
  
  scmat[["y"]]=as.numeric(y)>0.04
  DimPlot(scmat, group.by = "y")
  
  return(list(empirical.FDR, y))
}

get.only.legend=function(colorv, group, position="right", direction="vertical"){
  library(ggplot2)
  
  dat=data.frame(group, colorv)
  levels(dat$group)=group
  myColors <- colorv
  names(myColors) <- levels(group)
  colScale <- scale_colour_manual(name = "group",values = myColors)
  dat$x=dim(dat)[1]
  dat$y=dim(dat)[1]
  
  legend <- cowplot::get_legend(ggplot(dat, aes(x, y, colour = group)) + theme_bw() +
                                  geom_point(size=0.6) + colScale +theme(legend.position = position, legend.direction = direction, legend.title = element_blank(), 
                                                                         legend.key=element_blank(), legend.box.background=element_blank(),
                                                                         legend.text=element_text(size = 10, face = "bold", family="Helvetica"))+
                                  theme(plot.margin=unit(c(1,1,1,1), "mm"))+
                                  guides(colour = guide_legend(override.aes = list(size=3))))
  return(legend)
}


statistical.comparisons.scRNA=function(i, s.anno, genelist.statistical.analysis=NULL, test.use="wilcox"){
  # plot proportions as a barplot next to 2D points
  load(s.anno[i,2])
  coords=Embeddings(scmat[["umap"]])
  identityVector=gsub(" ", "", unlist(strsplit(s.anno$Type[i], ", |,")))
  clusters=gsub(" ", "", unlist(strsplit(s.anno$Clusters[i], ", |,")))
  clusters.samples=as.character(Idents(scmat))
  identityVector.samples=clusters.samples
  
  for(j in seq(clusters)){
    identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j]
  }
  
  name.sample=s.anno$Sample[i]
  
  if(is.null(genelist.statistical.analysis))genelist.statistical.analysis=rownames(scmat)
  
  # Find the DE genes per factor and within factor:
  statistical.analysis=do.call(rbind, mclapply(unique(identityVector.samples), function(id){
    clusters2test=unique(clusters.samples[identityVector.samples%in%id])
    
    # clusters2test vs Rest:
    markers=FindMarkers(scmat, ident.1 = clusters2test, group.by = "seurat_clusters", test.use = test.use,  logfc.threshold =0.01, only.pos = T, features = genelist.statistical.analysis)
    
    #DE in clusters within category
    cluster.markers=FindAllMarkers(scmat[,clusters.samples%in%clusters2test], test.use = test.use, logfc.threshold = 0.01,  only.pos = T, features = genelist.statistical.analysis)
    
    # make genelists with annotation:
    markers$test=paste0(id, " vs. Rest")
    markers$gene=rownames(markers)
    cluster.markers$test=paste0(id, " ", cluster.markers$cluster, " vs. Rest")
    cluster.markers=cluster.markers[,!colnames(cluster.markers)%in%"cluster"]
    df.statistics=plyr::rbind.fill(list(markers, cluster.markers))
    df.statistics$test.used=test.use
    return(df.statistics)
  }))
  
  return(statistical.analysis)
}


#' @param seurat.object Seurat class object or data matrix that can be converted to Seurat object. Can also be a list of data matrices (RNA+citeseq etc.), Must be named by assay type with four uppercase letters (PROT, etc) (also found from seurat object with this name). 
#' @param regress.cell.label Character vector with as many columns as seurat.object Contains batch information for each cell. Uses fastMNN/SCTtransform to batch correct the data (usually different samples or experiments).
#' @param check.pcs Plot jackstraw and elbowplot to optimize the number of PCA componens
#' @param plot.umap Umap plot for clusters and for sample ID, if batch corrected also batch clustering
#' @param resolution Resolution parameter for clustering
#' @param nFeature.min Filter cells with < nFeature.min
#' @param nFeature.max Filter cells with > nFeature.max
#' @param percent.mitoDNA Filter cells with mitochondrial genes > percent.mitoDNA
#' @param singleR Annotate each cell using singleR built in annotations. Mainly uses Encode+blueprint annotations.
#' @param cores Number of cores to use for Seurat/singleR/MNNcorrect
#' @param add.gm.score Adds geneset score (using geometric mean) to fm, must be a named list of genes. Geneset name is added to fm and seurat object.
#' @param ... Pass parameters to Seurat tools
sc.data.analysis=function(scmat, name="scRNA_object", nr.pcs=30, regress.cell.label=NULL, batch.correction.method=c("SCTtransform","MNNcorrect"), auto.order=F, normalize.input=T, check.pcs=T, plot.umap=T, resolution=0.5, nFeature.min=0, nFeature.max=10000, percent.mitoDNA=25, singleR=T, singleR.reference=c("blueprint_encode", "HPCA"), cores=6, add.gm.score=NULL, ...){
  
  # determine computational resources:
  future::plan("multiprocess", workers = cores)
  options(future.globals.maxSize= 5e+09)
  
  # set method for batch correction
  batch.correction.method=match.arg(batch.correction.method)
  
  # list of data matrix, can be citeseq data:
  if(class(scmat)=="list"){
    cat("Input is list, splitting to data matrix by type and adding to Seurat", sep="\n\n")
    
    list.additional=lapply(2:length(scmat), function(i)CreateAssayObject(scmat[[i]]))
    names(list.additional)=names(scmat)[2:length(scmat)]
    scmat <- CreateSeuratObject(scmat[[1]])
  }else{
    list.additional=NULL
    # if class is not seurat object
    if(!class(scmat)=="Seurat"){
      cat("Input is data matrix, converting to Seurat object", sep="\n\n")
      scmat <- CreateSeuratObject(scmat)
    }else{
      cat("Input is Seurat object", sep="\n\n")
    }
  }
  DefaultAssay(object = scmat) <- "RNA"
  
  # is it a vector of pcs or single value?
  if(length(nr.pcs)==1)nr.pcs=seq(nr.pcs)
  
  # QC
  scmat=PercentageFeatureSet(scmat, pattern = "^MT-", col.name = "percent.mt")
  r1=VlnPlot(scmat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
  ggplot2::ggsave(r1, filename = paste0(name, "_QC.pdf"), width = 12)
  
  # Filtering
  nFeature_RNA <- FetchData(object = scmat, vars = "nFeature_RNA")
  pct.mt <- FetchData(object = scmat, vars = "percent.mt")
  filt=which(x = nFeature_RNA > nFeature.min & nFeature_RNA < nFeature.max & pct.mt < percent.mitoDNA)
  cat(paste("Dimension before/after filtering:", paste(paste(dim(scmat), collapse="x"), paste(dim(scmat[, filt]), collapse="x"), collapse = "/")), sep="\n\n")
  scmat=scmat[, filt]
  
  if(normalize.input){
    assay="SCT"
    scmat=SCTransform(scmat, variable.features.n = 3000) #,...)
    
    # seurat standard processing
    scmat=RunPCA(scmat, npcs = 100)
    reduction="pca"
  }else{
    assay="RNA"
    
    # assume that the data is normalized
    scmat <- FindVariableFeatures(object = scmat)
    scmat <- ScaleData(object = scmat)
    scmat=RunPCA(scmat, npcs = 100)
    reduction="pca"
  }
  
  # set default assay to use:
  DefaultAssay(object = scmat) <- assay
  
  # use SCTransform to remove batch effect, used in umap and clustering
  # else use SCT to normalize data
  if(!is.null(regress.cell.label)&batch.correction.method=="SCTtransform"){
    batch.df=data.frame("batch"=regress.cell.label[filt])
    rownames(batch.df)=colnames(scmat)
    scmat[["batch"]] = batch.df
    scmat=SCTransform(scmat,vars.to.regress="batch", variable.features.n = 3000)
    
    # seurat standard processing
    scmat=RunPCA(scmat, npcs = 100)
    
    name=paste0(name, "_", batch.correction.method)
    reduction="pca"
  }
  
  # optimize pcs number:
  if(check.pcs){
    
    # setting here original RNA to define PCAs, SCT not working yet, wait for a fix
    # also the issue here how to deal with the mnnCorrect?
    # DefaultAssay(object = scmat) <- "RNA"
    # scmat <- NormalizeData(scmat,display.progress = FALSE)
    # scmat <- FindVariableFeatures(scmat,do.plot = F,display.progress = FALSE)
    # scmat <- ScaleData(scmat)
    # scmat=RunPCA(scmat, npcs = 100)
    
    # determine how many pcs should be used:
    r3=ElbowPlot(scmat, ndims = 100, reduction = "pca")
    ggplot2::ggsave(r3, filename = paste0(name, "_elbow.pdf"))
    
    # find optimal number of PCs to use:
    scmat <- JackStraw(scmat, num.replicate = 100, dims = 100, reduction = "pca")
    scmat <- ScoreJackStraw(scmat, dims = 1:100, reduction = "pca")
    r4=JackStrawPlot(scmat, dims = 1:100)
    ggplot2::ggsave(r4, filename = paste0(name, "_Jackstraw.pdf"), width = 12)
    
    cat("PCs checking done", sep="\n\n")
  }
  
  # use MNN to remove batch effect, used in umap and clustering:
  if(!is.null(regress.cell.label)&batch.correction.method=="MNNcorrect"){
    library(BiocParallel)
    library(batchelor)
    
    cat("FastMNN batch correction...", sep="\n\n")
    
    if(is.factor(regress.cell.label)&!auto.order){
      order=levels(regress.cell.label)
      cat("FastMNN batch correction order:", sep="\n\n")
      cat(order, sep="-->")
      cat("", sep="\n\n")
    }
    
    if(!is.factor(regress.cell.label)&!auto.order){
      order=unique(regress.cell.label)
      cat("FastMNN batch correction order:", sep="\n\n")
      cat(order, sep="-->")
      cat("", sep="\n\n")
    }
    
    batch.df=data.frame("batch"=regress.cell.label[filt])
    rownames(batch.df)=colnames(scmat)
    scmat[["batch"]] = batch.df
    
    # http://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/fast_mnn.html
    scmat <- NormalizeData(scmat)
    scmat <- FindVariableFeatures(scmat)
    
    object.list = SplitObject(scmat, split.by = "batch")[order(order)]
    
    scmat <- SeuratWrappers::RunFastMNN(object.list, reduction.name = "mnnCorrect", auto.order=auto.order, BPPARAM=MulticoreParam(cores))
    
    # set defaults 
    reduction="mnnCorrect"
    cat("FastMNN batch correction... done", sep="\n\n")
    
  }
  
  # Basic seurat processing with umap:
  scmat=RunUMAP(scmat, dims = nr.pcs, reduction = reduction)
  scmat=FindNeighbors(scmat, dims = nr.pcs, reduction = reduction)
  scmat=FindClusters(scmat, resolution = resolution)
  
  # add celltype automatically
  if(singleR){
    cat("Annotating with singleR", sep="\n\n")
    
    if(!file.exists(paste0(name, "_singler_object.Rdata"))){
      library("SingleR")
      
      counts <- GetAssayData(scmat, assay = assay, slot = "counts")
      
      if(singleR.reference=="blueprint_encode"){
        if(dim(counts)[2]<100000){
          singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(blueprint_encode), project.name=name, fine.tune = T,do.signatures = T, numCores=cores)
        }else{
          cat("SingleR in batches", sep="\n\n")
          singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat),  N=30000, ref.list = list(blueprint_encode), xy = Embeddings(scmat[["umap"]]), project.name=name, fine.tune = T, do.signatures = T, numCores=cores)
        }
      }
      
      if(singleR.reference=="HPCA"){
        if(dim(counts)[2]<100000){
          singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores)
        }else{
          cat("SingleR in batches", sep="\n\n")
          singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat), N=30000, ref.list = list(hpca), project.name=name, xy = Embeddings(scmat[["umap"]]), fine.tune = T,do.signatures = T, numCores=cores)
        }
      }
      
      # add original identifiers
      singler$meta.data$orig.ident = scmat@meta.data$orig.ident # the original identities, if not supplied in 'annot'
      
      ## if using Seurat v3.0 and over use:
      singler$meta.data$xy = scmat@reductions$umap@cell.embeddings # the tSNE coordinates
      singler$meta.data$clusters = scmat@active.ident # the Seurat clusters (if 'clusters' not provided)
      
      # add annot from hpca
      # singler2 = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores)
      # singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells"
      # singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells"
      # singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells"
      # singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells"
      
      # these names are not intuitive, replace them, mentioned that these are actually naive http://comphealth.ucsf.edu/SingleR/SupplementaryInformation2.html:
      singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD4+ T-cells"]="CD4+ Tn"
      singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD8+ T-cells"]="CD8+ Tn"
      singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD4+ T-cells"]="CD4+ Tn"
      singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD8+ T-cells"]="CD8+ Tn"
      
      save(singler, file=paste0(name, "_singler_object.Rdata"))
      
    }else{
      library("SingleR")
      load(paste0(name, "_singler_object.Rdata"))
      cat(paste0("Loaded existing SingleR object (", paste0(name, "_singler_object.Rdata"), "), remove/rename it if you want to re-compute."), sep="\n\n")
      
    }
    # can be added as cluster id
    cluster=singler$singler[[1]]$SingleR.clusters$labels
    cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels
    
    # order based on cell type and add celltype to cluster:
    lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes")
    lineage=unique(c(lineage,blueprint_encode$types, hpca$types))
    lineage=lineage[lineage%in%singler$singler[[1]]$SingleR.single$labels]
    
    scmat[["SingleR.label.main"]]=singler$singler[[1]]$SingleR.single.main$labels
    scmat[["SingleR.label.cluster"]]=paste(singler$singler[[1]]$SingleR.single$labels, Idents(scmat))
    scmat[["SingleR.label"]]=factor(singler$singler[[1]]$SingleR.single$labels, levels=lineage[lineage%in%unique(singler$singler[[1]]$SingleR.single$labels)])
    
    identityVector.samples=as.character(Idents(scmat))
    clusters.samples=Idents(scmat)
    
    for(j in seq(cluster)){
      identityVector.samples[clusters.samples%in%rownames(cluster)[j]]=paste0(cluster[j], ":", rownames(cluster)[j])
    }
    
    # order and cluster identity;
    cluster=cluster[order(match(cluster[,1],lineage)),,drop=F]
    cat(paste(levels(Idents(scmat)), collapse=","), paste(cluster, collapse=","), sep="\t\t")   
    
    scmat[["SingleR.cluster"]]=gsub(":.*.", "", Idents(scmat)) # just the "cluster label" per cell
    
    # order seurat clusters too:
    Idents(scmat)=factor(identityVector.samples, levels=paste0(cluster, ":", rownames(cluster)))
    
    r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE,  pt.size = 0.5, repel = T)  + NoLegend() + NoAxes()
    ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6)
    
    cat("\nSingleR done", sep="\n\n")
    
    
    # new singleR:
    # counts <- GetAssayData(scmat, assay = "RNA", slot = "counts")
    # 
    # library(SingleR)
    # library(scater)
    # 
    # ref=NovershternHematopoieticData()	
    # 
    # common <- intersect(rownames(counts), rownames(ref))
    # ref <- ref[common,]
    # counts <- counts[common,]
    # counts.log <- logNormCounts(counts)
    # 
    # singler <- SingleR(test = counts.log, ref = ref, labels = ref$label.main, numCores=cores)
    # 
    # # order based on cell type and add celltype to cluster:
    # lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes")
    # lineage=unique(c(lineage,ref$label.main))
    # 
    # scmat[["SingleR.label.main"]]=singler$first.labels
    # scmat[["SingleR.label"]]=factor(singler$labels, levels=lineage[lineage%in%unique(singler$labels)])
    # 
    # r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE,  pt.size = 0.5, repel = T)  + NoLegend() + NoAxes()
    # ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6)
    # 
    # cat("\nSingleR done", sep="\n\n")
    
  }
  
  # plot clusters and samples
  if(plot.umap){
    if(!is.null(regress.cell.label)){
      r6=DimPlot(scmat, group.by = c("batch"), ncol = 2, label = T, repel = T)  + NoLegend() + NoAxes()
      ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_batch.pdf"), width = 6, height = 6)
      r6=DimPlot(scmat, group.by = c("ident"), ncol = 2, label = T, repel = T)  + NoLegend() + NoAxes()
      ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6)
    }else{
      r6=DimPlot(scmat, group.by = c("ident"), ncol = 1, label = T, repel = T)  + NoLegend() + NoAxes()
      ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6)
    }
  }
  
  # add immunoscores to FM format data matrix
  fm.f <- GetAssayData(scmat)
  
  # add gm based score:
  add.scores=list(HLAIScore=c("B2M", "HLA-A", "HLA-B","HLA-C"), HLAIIScore=c("HLA-DMA","HLA-DMB","HLA-DPA1","HLA-DPB1","HLA-DRA","HLA-DRB1"), CytolyticScore=c("GZMA", "PRF1", "GNLY", "GZMH", "GZMM"))
  if(!is.null(add.gm.score))add.scores=append(add.scores, add.gm.score)
  
  gm.objects=do.call(rbind, lapply(seq(add.scores), function(i){
    dat3=fm.f[rownames(fm.f)%in%add.scores[[i]],]
    gm=t(apply(dat3, 2, gm_mean)) # done to normalized values
    rownames(gm)=names(add.scores)[i]
    return(gm)
  }))
  
  # also add to seurat object:
  for(i in seq(add.scores)){
    scmat[[names(add.scores)[i]]] <- gm.objects[i,]
  }
  
  # gene expression and scores to fm:
  rownames(fm.f)=paste0("N:GEXP:", rownames(fm.f))
  rownames(gm.objects)=paste0("N:SAMP:", rownames(gm.objects))
  fm=rbind(gm.objects, fm.f)
  
  # DE analysis:
  markers.all=FindAllMarkers(scmat, only.pos = T)#, ...)
  
  # go through each data matrix, add to seurat and fm if more matrices
  if(!is.null(list.additional)){
    
    for(i in seq(list.additional)){
      mat.add=list.additional[[i]]
      mat.add=subset(mat.add,cells = colnames(scmat))
      scmat[[names(list.additional)[i]]] <- mat.add
      scmat <- NormalizeData(scmat, assay = names(list.additional)[i], normalization.method = "CLR") # test scttransform too
      scmat <- ScaleData(scmat, assay = names(list.additional)[i])
      
      add.data.normalized <- data.matrix(GetAssayData(scmat, assay = names(list.additional)[i], slot = "data"))
      rownames(add.data.normalized)=paste0("N:",names(list.additional)[i], ":", rownames(add.data.normalized))
      fm=rbind(fm, add.data.normalized)
    }
  }
  
  save(list=c("fm", "scmat", "markers.all"), file=paste0(name, "_scRNA.Rdata"))
  
  return(paste0(name, "_scRNA.Rdata"))
}

# make sure  data is on a linear non-log scale
# check 0 values, if a lot between 0-1 better to use add.one. If counts, remove works too quite well
# zero fix methods: https://arxiv.org/pdf/1806.06403.pdf
gm_mean=function(x, na.rm=TRUE, zero.handle=c("remove", "add.one", "zero.propagate")){
  zero.handle=match.arg(zero.handle)
  if(any(x < 0, na.rm = TRUE)){
    warning("Negative values produced NaN - is the data on linear - non-log scale?")
    return(NaN)
  }
  
  if(zero.handle=="remove"){
    return(exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)))
  }
  
  if(zero.handle=="add.one"){
    return(exp(mean(log(x+1), na.rm = na.rm))-1)
  }
  
  if(zero.handle=="zero.propagate"){
    if(any(x == 0, na.rm = TRUE)){
      return(0)
    }
    return(exp(mean(log(x), na.rm = na.rm)))
  }
  
}

get.cluster.label.singleR=function(singler){
  
  # how to add this?
  cluster=singler$singler[[1]]$SingleR.clusters$labels
  cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels
  
  # order based on cell type:
  # cat(paste0("'", colors.group[,1], "'"), sep=",")
  lineage=c('HSC','MPP','CLP','CMP','GMP','MEP','Monocytes','DC','Macrophages','Macrophages M1','Macrophages M2','CD4+ T-cells','CD8+ T-cells','Tregs','T_cell:gamma-delta','T_cell:CD4+_Naive','T_cell:CD8+_naive','T_cell:Treg:Naive','CD4+ Tcm','CD4+ Tem','CD8+ Tcm','CD8+ Tem','NK cells','naive B-cells','Memory B-cells','B-cells','Class-switched memory B-cells','Plasma cells','Endothelial cells','Neutrophils','Eosinophils','Fibroblasts','Smooth muscle','Erythroblast','Erythrocytes','Megakaryocytes')
  
  cluster=cluster[order(match(cluster[,1],lineage)),,drop=F]
  
  # order and cluster identity;
  cat(paste(rownames(cluster), collapse=","), paste(cluster, collapse=","), sep="\t")
  cat("", sep="\n")
  return(cluster)
}

# run cellphoneDB
run.cellphonedb=function(scmat, celltype, out=getwd(), cores=10, threshold=0.05){
  
  if(!dir.exists(out))dir.create(out, recursive = T)
  
  if(is.null(celltype)){
    celltype=gsub(" ", "_", as.character(Idents(scmat)))
  }
  
  setwd(out)
  # input1:
  df=data.frame("Cell"=gsub("\\.", "_", colnames(scmat)), "cell_type"=celltype, stringsAsFactors = F)
  df$cell_type[scmat[["SingleR.label"]]=="Tregs"]="Tregs"
  
  meta=file.path(out, "meta.tsv")
  write.table(df, meta, row.names = F, col.names = T, quote = F, sep="\t")
  
  # input2:
  counts <- data.matrix(GetAssayData(scmat, assay = "SCT", slot = "data"))
  countsfile=tempfile()
  
  # gene symbol was not working...
  library("org.Hs.eg.db") # remember to install it if you don't have it already
  ensambleid <- mapIds(org.Hs.eg.db, keys = rownames(counts), keytype = "SYMBOL", column="ENSEMBL")
  rownames(counts)=ensambleid
  
  counts=counts[!is.na(rownames(counts)),]
  
  countsfile=file.path(out, "counts.tsv")
  
  write.table(t(c("Gene", gsub("\\.", "_", colnames(counts)))), countsfile, row.names = F, col.names = F, quote = F, sep="\t")
  write.table(counts, countsfile, row.names = T, col.names = F, quote = F, sep="\t", append=T)
  
  system(paste0("cellphonedb method statistical_analysis ", meta, " ", countsfile, " --iterations=1000 --threads=", cores, " --threshold=", threshold))
  
  # make heatmap of interactions:
  system(paste0("cellphonedb plot heatmap_plot ", meta))
  
  deconvoluted=data.table::fread(file.path(out, "out/deconvoluted.txt"), data.table = F)
  pvalues=data.table::fread(file.path(out, "out/pvalues.txt"), data.table = F)
  significant_means.txt=data.table::fread(file.path(out, "out/significant_means.txt"), data.table = F)
  means=data.table::fread(file.path(out, "out/means.txt"), data.table = F)
  
  unlink(countsfile)
  unlink(meta)
  
  return(list("deconvoluted"=deconvoluted, "pvalues"=pvalues, "significant_means"=significant_means.txt, "means"=means))
}

# found bug in reference list, was missing, also min genes was 200 not 0 causing errors...
CreateBigSingleRObject.custom=function (counts, annot = NULL, project.name, xy, clusters, N = 10000,
                                        min.genes = 0, technology = "10X", species = "Human", citation = "",
                                        ref.list = list(), normalize.gene.length = F, variable.genes = "de",
                                        fine.tune = T, reduce.file.size = T, do.signatures = F, do.main.types = T,
                                        temp.dir = getwd(), numCores = SingleR.numCores){
  n = ncol(counts)
  s = seq(1, n, by = N)
  dir.create(paste0(temp.dir, "/singler.temp/"), showWarnings = FALSE)
  for (i in s) {
    print(i)
    A = seq(i, min(i + N - 1, n))
    
    singler = CreateSinglerObject(counts[, A], annot = annot[A], ref.list = ref.list,
                                  project.name = project.name, min.genes = min.genes,
                                  technology = technology, species = species, citation = citation,
                                  do.signatures = do.signatures, clusters = NULL, numCores = numCores)
    
    
    save(singler, file = paste0(temp.dir, "/singler.temp/",
                                project.name, ".", i, ".RData"))
  }
  singler.objects.file <- list.files(paste0(temp.dir, "/singler.temp/"),
                                     pattern = "RData", full.names = T)
  singler.objects = list()
  for (i in 1:length(singler.objects.file)) {
    load(singler.objects.file[[i]])
    singler.objects[[i]] = singler
  }
  singler = SingleR.Combine.custom(singler.objects, order = colnames(counts),expr = counts, clusters = clusters, xy = xy)
  singler
}

# bug also in this...
SingleR.Combine.custom=function (singler.list, order = NULL, clusters = NULL, expr = NULL, xy = NULL) 
{
  singler = c()
  singler$singler = singler.list[[1]]$singler
  for (j in 1:length(singler.list[[1]]$singler)) {
    singler$singler[[j]]$SingleR.cluster = c()
    singler$singler[[j]]$SingleR.cluster.main = c()
    singler$singler[[j]]$SingleR.single$clusters = c()
  }
  singler$meta.data = singler.list[[1]]$meta.data
  singler$meta.data$clusters = c()
  singler$meta.data$xy = c()
  singler$meta.data$data.sets = rep(singler$meta.data$project.name, 
                                    length(singler$meta.data$orig.ident))
  for (i in 2:length(singler.list)) {
    for (j in 1:length(singler$singler)) {
      if (singler.list[[i]]$singler[[j]]$about$RefData != 
          singler.list[[1]]$singler[[j]]$about$RefData) {
        stop("The objects are not ordered by the same reference data.")
      }
      singler$singler[[j]]$about$Organism = c(singler$singler[[j]]$about$Organism, 
                                              singler.list[[i]]$singler[[j]]$about$Organism)
      singler$singler[[j]]$about$Citation = c(singler$singler[[j]]$about$Citation, 
                                              singler.list[[i]]$singler[[j]]$about$Citation)
      singler$singler[[j]]$about$Technology = c(singler$singler[[j]]$about$Technology, 
                                                singler.list[[i]]$singler[[j]]$about$Technology)
      singler$singler[[j]]$SingleR.single$labels = rbind(singler$singler[[j]]$SingleR.single$labels, 
                                                         singler.list[[i]]$singler[[j]]$SingleR.single$labels)
      if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) {
        singler$singler[[j]]$SingleR.single$labels1 = rbind(singler$singler[[j]]$SingleR.single$labels1, 
                                                            singler.list[[i]]$singler[[j]]$SingleR.single$labels1)
      }
      singler$singler[[j]]$SingleR.single$scores = rbind(singler$singler[[j]]$SingleR.single$scores, 
                                                         singler.list[[i]]$singler[[j]]$SingleR.single$scores)
      singler$singler[[j]]$SingleR.single.main$labels = rbind(singler$singler[[j]]$SingleR.single.main$labels, 
                                                              singler.list[[i]]$singler[[j]]$SingleR.single.main$labels)
      if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) {
        singler$singler[[j]]$SingleR.single.main$labels1 = rbind(singler$singler[[j]]$SingleR.single.main$labels1, 
                                                                 singler.list[[i]]$singler[[j]]$SingleR.single.main$labels1)
      }
      singler$singler[[j]]$SingleR.single.main$scores = rbind(singler$singler[[j]]$SingleR.single.main$scores, 
                                                              singler.list[[i]]$singler[[j]]$SingleR.single.main$scores)
      singler$singler[[j]]$SingleR.single$cell.names = c(singler$singler[[j]]$SingleR.single$cell.names, 
                                                         singler.list[[i]]$singler[[j]]$SingleR.single$cell.names)
      singler$singler[[j]]$SingleR.single.main$cell.names = c(singler$singler[[j]]$SingleR.single.main$cell.names, 
                                                              singler.list[[i]]$singler[[j]]$SingleR.single.main$cell.names)
      if (!is.null(singler$singler[[j]]$SingleR.single.main$pval)) {
        singler$singler[[j]]$SingleR.single.main$pval = c(singler$singler[[j]]$SingleR.single.main$pval, 
                                                          singler.list[[i]]$singler[[j]]$SingleR.single.main$pval)
      }
      if (!is.null(singler$singler[[j]]$SingleR.single$pval)) {
        singler$singler[[j]]$SingleR.single$pval = c(singler$singler[[j]]$SingleR.single$pval, 
                                                     singler.list[[i]]$singler[[j]]$SingleR.single$pval)
      }
    }
    singler$meta.data$project.name = paste(singler$meta.data$project.name, 
                                           singler.list[[i]]$meta.data$project.name, sep = "+")
    singler$meta.data$orig.ident = c(singler$meta.data$orig.ident, 
                                     singler.list[[i]]$meta.data$orig.ident)
    singler$meta.data$data.sets = c(singler$meta.data$data.sets, 
                                    rep(singler.list[[i]]$meta.data$project.name, length(singler.list[[i]]$meta.data$orig.ident)))
  }
  for (j in 1:length(singler$singler)) {
    if (!is.null(order)) {
      singler$singler[[j]]$SingleR.single$labels = singler$singler[[j]]$SingleR.single$labels[order, 
                                                                                              ]
      if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) {
        singler$singler[[j]]$SingleR.single$labels1 = singler$singler[[j]]$SingleR.single$labels1[order, 
                                                                                                  ]
      }
      singler$singler[[j]]$SingleR.single$scores = singler$singler[[j]]$SingleR.single$scores[order, 
                                                                                              ]
      singler$singler[[j]]$SingleR.single$cell.names = singler$singler[[j]]$SingleR.single$cell.names[order]
      singler$singler[[j]]$SingleR.single.main$labels = singler$singler[[j]]$SingleR.single.main$labels[order, 
                                                                                                        ]
      if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) {
        singler$singler[[j]]$SingleR.single.main$labels1 = singler$singler[[j]]$SingleR.single.main$labels1[order, 
                                                                                                            ]
      }
      singler$singler[[j]]$SingleR.single.main$scores = singler$singler[[j]]$SingleR.single.main$scores[order, 
                                                                                                        ]
      singler$singler[[j]]$SingleR.single.main$cell.names = singler$singler[[j]]$SingleR.single.main$cell.names[order]
      if (!is.null(singler$singler[[j]]$SingleR.single$pval)) {
        singler$singler[[j]]$SingleR.single$pval = singler$singler[[j]]$SingleR.single$pval[order]
        singler$singler[[j]]$SingleR.single.main$pval = singler$singler[[j]]$SingleR.single.main$pval[order]
      }
    }
  }
  if (!is.null(clusters) && !is.null(expr)) {
    for (j in 1:length(singler$singler)) {
      if (is.character(singler$singler[[j]]$about$RefData)) {
        ref = get(tolower(singler$singler[[j]]$about$RefData))
      }
      else {
        ref = singler$singler[[j]]$about$RefData
      }
      singler$singler[[j]]$SingleR.clusters = SingleR("cluster", 
                                                      expr, ref$data, types = ref$types, clusters = factor(clusters), 
                                                      sd.thres = ref$sd.thres, genes = "de", fine.tune = T)
      singler$singler[[j]]$SingleR.clusters.main = SingleR("cluster", 
                                                           expr, ref$data, types = ref$main_types, clusters = factor(clusters), 
                                                           sd.thres = ref$sd.thres, genes = "de", fine.tune = T)
    }
    singler$meta.data$clusters = clusters
    if (!is.null(xy)) {
      singler$meta.data$xy = xy
    }
  }
  singler
}