--- a +++ b/common_scripts/scRNA/functions.scRNA.analysis.R @@ -0,0 +1,1011 @@ +#' +#' +plot.scRNA.features.complexHeatmap=function(s.anno, features, name,width=300, cores){ + log=mclapply(seq(dim(s.anno)[1]), plot.scRNA.features, s.anno,features, name, mc.cores = cores, width=width) +} + +plot.scRNA.features=function(i, s.anno, features, name, add.annotation=NULL, width=300){ + # eccite PBMC dataset: + load(s.anno[i,2]) + + #HSC, myeloid, B-solu, T-solu, erythroi + # STEP1 find the cluster order, and set them based on lineage: + clusters=Idents(scmat) + if(s.anno[i,3]!=""){ + order.clu=as.numeric(unlist(strsplit(s.anno[i,3], ",|, | ,"))) # MODIFY + + ord.names=names(clusters[order(match(clusters,order.clu))]) + + }else{ + + order.clu=levels(clusters) + + # order samples within cluster by umap component with more spread + # this way there is still information within cluster, for heatmaps + coords=Embeddings(scmat[["umap"]]) + fit.cluster=levels(Idents(scmat)) + + ord.names=unlist(lapply(fit.cluster, function(clu){ + d=coords[Idents(scmat)%in%clu,] + ind=which.max(c(max(d[,1])-min(d[,1]), max(d[,2])-min(d[,2]))) # take the axis with most spread + names(sort(d[,ind])) + })) + + } + + + + features=data.frame(features, stringsAsFactors = F) + + if(!grepl("^B:|^N:", features[1,1])){ + features[,1]=paste0("N:GEXP:", features[,1]) + allfeats=rownames(fm)[!rowSums(fm>0)==0] + + }else{ + allfeats=gsub("N:GEXP:|N:RPPA:", "", rownames(fm)[!rowSums(fm>0)==0]) + } + + # text annot created if more columns than 1. + if(dim(features)[2]>1){ + features=features[features[,1]%in%allfeats,] + feats=features[,1] + t.annot=apply(features[,2:dim(features)[2], drop=F], 1, paste, collapse=" ") + names(t.annot)=feats + }else{ + t.annot=NULL + feats=features[features[,1]%in%allfeats,1] + } + + # make a data frame for annotations on top of the heatmap + annotdf=data.frame("cluster"=clusters,"HLAIScore"=as.numeric(fm["N:SAMP:HLAIScore",]), "HLAIIScore"=as.numeric(fm["N:SAMP:HLAIIScore",]), "CytolyticScore"=as.numeric(fm["N:SAMP:CytolyticScore",])) + + if(!is.null(add.annotation))annotdf$annotation=add.annotation + + name=gsub("__", "_", gsub("[[:punct:]]", "_", paste(name, s.anno[i,1], sep="_"))) + + # plot using a complexheatmap package using a featurematrix format and custom plotting function + r1=plot.complexHM.fm(feats = feats, text_annot = t.annot, fm.f = fm, annotdf = annotdf, feats.barplot = c("HLAIScore", "HLAIIScore", "CytolyticScore"), order_columns=ord.names, order_rows=F, split.columns = T, plotting.param="/research/groups/sysgen/PROJECTS/students/HEMAP_IMMUNOLOGY/minna_work/scRNA_genelist_plotting/plotting_param_fm.txt", NAME=name, WIDTH = width) +} + +plot.gene.seurat=function(s.anno, feature, name,cols=c("grey75", "red"), max=1, min=0, cores=1){ + plot.feature=function(i, feature){ + # plot proportions as a barplot next to 2D points + load(s.anno[i,2]) + + if(!feature%in%rownames(scmat))return(NULL) + + p <- FeaturePlot(scmat, features = feature, cols = cols, max.cutoff = max, min.cutoff = min, order = T, combine = FALSE) + p=lapply(p, `+`, labs(title = s.anno[i,1])) + + if(i>1){ + for(i in 1:length(p)) { + p[[i]] <- p[[i]] + NoLegend() + NoAxes() + } + }else{ + p[[1]] <- p[[1]] + NoAxes() + } + + cowplot::plot_grid(plotlist = p) + } + + # plot here lineage proportion per patient + plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores) + plot.list=plot.list[!sapply(plot.list, is.null)] + + if(!length(plot.list))return(NULL) + + # 74mm * 99mm per panelwidth = 77, height = 74 + figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) +} + +plot.fm.feature=function(s.anno, feature, name, max=3, min=0, cores=1){ + plot.feature=function(i, feature){ + # plot proportions as a barplot next to 2D points + load(s.anno[i,2]) + + scmat[[feature]]=fm[rownames(fm)%in%feature,] + + p <- FeaturePlot(scmat, features = feature, cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE) + p=lapply(p, `+`, labs(title = s.anno[i,1])) + + if(i>1){ + for(i in 1:length(p)) { + p[[i]] <- p[[i]] + NoLegend() + NoAxes() + } + }else{ + p[[1]] <- p[[1]] + NoAxes() + } + + return(p) + } + + # plot here lineage proportion per patient + plot.list=unlist(mclapply(seq(dim(s.anno)[1]), plot.feature, feature, mc.cores=cores), recursive = F) + + # 74mm * 99mm per panelwidth = 77, height = 74 + figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + # 74mm * 99mm per panelwidth = 77, height = 74 + figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) +} + +plot.several.fm.feature=function(s.anno.object, features, name, min=0, max=5, cores=5){ + load(s.anno.object) + + features=features[features%in%rownames(fm)] + + plot.feature=function(i, features){ + # plot proportions as a barplot next to 2D points + + scmat[[features[i]]]=fm[rownames(fm)%in%features[i],] + + p <- FeaturePlot(scmat, features = features[i], cols = c("grey75", "red"), max.cutoff = max, min.cutoff = min, order = T, combine = FALSE) + p=lapply(p, `+`, labs(title = features[i])) + + if(i>1){ + for(i in 1:length(p)) { + p[[i]] <- p[[i]] + NoLegend() + NoAxes() + } + }else{ + p[[1]] <- p[[1]] + NoAxes() + } + return(p) + } + + # plot here lineage proportion per patient + plot.list=unlist(mclapply(seq(features), plot.feature, features, mc.cores=cores), recursive = F) + + # 74mm * 99mm per panelwidth = 77, height = 74 + figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) +} + +plot.seurat.ident=function(s.anno, colorv, name, cores=1){ + plot.feature=function(i){ + # plot proportions as a barplot next to 2D points + load(s.anno[i,2]) + + cells=gsub(" \\d+$", "", levels(Idents(scmat))) + + p=DimPlot(scmat, group.by = c("ident"), cols = colorv[match(cells, colorv[,1]),2], ncol = 1, label = T, repel = T) + NoLegend() + NoAxes() + } + + # plot here lineage proportion per patient + plot.list=mclapply(seq(dim(s.anno)[1]), plot.feature, mc.cores=cores) + + # 74mm * 99mm per panelwidth = 77, height = 74 + figure <-multipanelfigure:: multi_panel_figure(width = 170, height = ceiling(length(plot.list)/2)*75+75, rows = ceiling(length(plot.list)/2)+1, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) +} + + +#' +plot.multi.scatter.scRNA=function(s.anno, scmat=NULL, colors.group, identityVector.samples.list=NULL, seurat.feature=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height = 74, text.size=10){ + # tools: + library(Matrix) + library(Seurat) + source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R") + library(data.table) + library(ComplexHeatmap) + library(circlize) + library(parallel) + library(ggplot2) + + + # plot here lineage proportion per patient + plot.list=mclapply(seq(dim(s.anno)[1]), function(i){ + # plot proportions as a barplot next to 2D points + load(s.anno[i,2]) + coords=Embeddings(scmat[["umap"]]) + identityVector=unlist(strsplit(s.anno$Type[i], ", |,")) + clusters=unlist(strsplit(s.anno$Clusters[i], ", |,")) + clusters.samples=as.character(Idents(scmat)) + + if(is.null(identityVector.samples.list)){ + identityVector.samples=clusters.samples + + for(j in seq(clusters)){ + identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j] + } + }else{ + identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list + } + + if(!is.null(seurat.feature)){ + identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list + } + + name.sample=s.anno$Sample[i] + + # take the colors: + colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2] + + a=plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, namev=name, main = name, add.proportions = T, SIZE = SIZE, add.legend=F, rasterize=rasterize, width=width, height = height, text.size=text.size) + return(a) + }, mc.cores=cores) + + # add legend last + add.legend=get.only.legend(group = colors.group[,1], colorv = colors.group[,2]) + + nrows=ceiling(length(plot.list)/2)+1 + # 74mm * 99mm per panelwidth = 77, height = 74 + figure <-multipanelfigure:: multi_panel_figure(width = 2*width+width*0.2, height = ceiling(length(plot.list)/2)*height+120, rows = nrows, columns = 2, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + rowi=1 + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + figure <- multipanelfigure::fill_panel(figure,row = nrows,column = 1:2, add.legend) + multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) +} + +plot.scatter.seurat=function(scmat, colors.group,clusters="", identityVector="", identityVector.samples.list=NULL, seurat.feature=NULL, lv=NULL, name="scRNA", cores=7, SIZE=0.1, rasterize=T, width=77, height=74, text.size=10, add.proportions=T, add.density=F, add.labels=F){ + # tools: + library(Matrix) + library(Seurat) + source("/research/users/ppolonen/git_home/common_scripts/visualisation/plotting_functions.R") + library(data.table) + library(ComplexHeatmap) + library(circlize) + library(parallel) + library(ggplot2) + + coords=Embeddings(scmat[["umap"]]) + clusters.samples=as.character(Idents(scmat)) + + if(is.null(identityVector.samples.list)){ + identityVector.samples=clusters.samples + + for(j in seq(clusters)){ + identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j] + } + }else{ + identityVector.samples=as.character(identityVector.samples.list[[i]]) # must be in a list + } + + if(!is.null(seurat.feature)){ + identityVector.samples=as.character(scmat[[seurat.feature]][,1]) # must be in a list + + ind=order(match(identityVector.samples, colors.group[,1])) + + # order everything: + identityVector.samples=identityVector.samples[ind] + coords=coords[ind,] + lv=lv[ind] + } + + # take the colors: + colorv=colors.group[match(unique(identityVector.samples), colors.group[,1]),2] + + plot.list=list(plot.scatter(x = coords[,1], y = coords[,2], group = identityVector.samples, colorv = colorv, lv = lv, namev=name, main = name, add.proportions = add.proportions, add.density = add.density, add.labels = add.labels, SIZE = SIZE, add.legend=T, rasterize=rasterize, width=width, height = height, text.size=text.size)) + + figure <-multipanelfigure:: multi_panel_figure(width = width+50, height = height, rows = 1, columns = 1, panel_label_type = "none", unit = "mm", row_spacing = unit(1, "mm"), column_spacing = unit(4, "mm")) + + for(i in seq(plot.list)){ + figure <- multipanelfigure::fill_panel(figure, plot.list[[i]]) + } + + multipanelfigure::save_multi_panel_figure(figure, filename=paste0(name, "_fm_scatterplot.pdf"), limitsize = FALSE) +} + + +#' +#' +compute.FDR.scRNA=function(i, s.anno, nperm=1000, geneset, nCores=6){ + library(AUCell) + library(Matrix) + library(Seurat) + + load(s.anno[i,2]) + + # take only genes that are in the matrix + geneset=geneset[geneset%in%rownames(scmat)] + + # Random + geneset.random=lapply(seq(nperm), function(i, genes){ + sample(rownames(scmat), length(geneset)) + }) + + names(geneset.random)=paste0("Random",seq(nperm)) + + geneset.observed=list("MDS_signature"=geneset) + + data_n <- data.matrix(GetAssayData(scmat, assay = "RNA")) + + cells_rankings <- AUCell_buildRankings(data_n, nCores=nCores, plotStats=TRUE) + AUC.random <- AUCell_calcAUC(geneset.random, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05) + AUC.observed <- AUCell_calcAUC(geneset.observed, cells_rankings, nCores = nCores, nrow(cells_rankings)*0.05) + + # Alternative: assign cells according to the 'automatic' threshold + cells_assignment <- AUCell_exploreThresholds(AUC.observed, plotHist=T, assignCells=TRUE) + + # how many times random higher than observed, divide by the number of permutations == eFDR + y=as.numeric(getAUC(AUC.observed)) + x=getAUC(AUC.random) + + FDR=sapply(seq(y), function(i){ + sum(x[,i]>y[i])/nperm + }) + + empirical.FDR=data.frame(t(FDR), check.names = F) + rownames(empirical.FDR)=paste0(names(geneset.observed), "_eFDR_", nperm, "_", length(geneset)) + colnames(empirical.FDR)=colnames(scmat) + + a=scmat[["SingleR.label"]] + table(a[rownames(a)%in%cells_assignment$MDS_signature$assignment,]) + table(a[empirical.FDR<0.05,]) + + scmat[["empirical.FDR"]]=as.numeric(empirical.FDR) + DimPlot(scmat, group.by = "empirical.FDR") + + scmat[["y"]]=as.numeric(y)>0.04 + DimPlot(scmat, group.by = "y") + + return(list(empirical.FDR, y)) +} + +get.only.legend=function(colorv, group, position="right", direction="vertical"){ + library(ggplot2) + + dat=data.frame(group, colorv) + levels(dat$group)=group + myColors <- colorv + names(myColors) <- levels(group) + colScale <- scale_colour_manual(name = "group",values = myColors) + dat$x=dim(dat)[1] + dat$y=dim(dat)[1] + + legend <- cowplot::get_legend(ggplot(dat, aes(x, y, colour = group)) + theme_bw() + + geom_point(size=0.6) + colScale +theme(legend.position = position, legend.direction = direction, legend.title = element_blank(), + legend.key=element_blank(), legend.box.background=element_blank(), + legend.text=element_text(size = 10, face = "bold", family="Helvetica"))+ + theme(plot.margin=unit(c(1,1,1,1), "mm"))+ + guides(colour = guide_legend(override.aes = list(size=3)))) + return(legend) +} + + +statistical.comparisons.scRNA=function(i, s.anno, genelist.statistical.analysis=NULL, test.use="wilcox"){ + # plot proportions as a barplot next to 2D points + load(s.anno[i,2]) + coords=Embeddings(scmat[["umap"]]) + identityVector=gsub(" ", "", unlist(strsplit(s.anno$Type[i], ", |,"))) + clusters=gsub(" ", "", unlist(strsplit(s.anno$Clusters[i], ", |,"))) + clusters.samples=as.character(Idents(scmat)) + identityVector.samples=clusters.samples + + for(j in seq(clusters)){ + identityVector.samples[clusters.samples%in%clusters[j]]=identityVector[j] + } + + name.sample=s.anno$Sample[i] + + if(is.null(genelist.statistical.analysis))genelist.statistical.analysis=rownames(scmat) + + # Find the DE genes per factor and within factor: + statistical.analysis=do.call(rbind, mclapply(unique(identityVector.samples), function(id){ + clusters2test=unique(clusters.samples[identityVector.samples%in%id]) + + # clusters2test vs Rest: + markers=FindMarkers(scmat, ident.1 = clusters2test, group.by = "seurat_clusters", test.use = test.use, logfc.threshold =0.01, only.pos = T, features = genelist.statistical.analysis) + + #DE in clusters within category + cluster.markers=FindAllMarkers(scmat[,clusters.samples%in%clusters2test], test.use = test.use, logfc.threshold = 0.01, only.pos = T, features = genelist.statistical.analysis) + + # make genelists with annotation: + markers$test=paste0(id, " vs. Rest") + markers$gene=rownames(markers) + cluster.markers$test=paste0(id, " ", cluster.markers$cluster, " vs. Rest") + cluster.markers=cluster.markers[,!colnames(cluster.markers)%in%"cluster"] + df.statistics=plyr::rbind.fill(list(markers, cluster.markers)) + df.statistics$test.used=test.use + return(df.statistics) + })) + + return(statistical.analysis) +} + + +#' @param seurat.object Seurat class object or data matrix that can be converted to Seurat object. Can also be a list of data matrices (RNA+citeseq etc.), Must be named by assay type with four uppercase letters (PROT, etc) (also found from seurat object with this name). +#' @param regress.cell.label Character vector with as many columns as seurat.object Contains batch information for each cell. Uses fastMNN/SCTtransform to batch correct the data (usually different samples or experiments). +#' @param check.pcs Plot jackstraw and elbowplot to optimize the number of PCA componens +#' @param plot.umap Umap plot for clusters and for sample ID, if batch corrected also batch clustering +#' @param resolution Resolution parameter for clustering +#' @param nFeature.min Filter cells with < nFeature.min +#' @param nFeature.max Filter cells with > nFeature.max +#' @param percent.mitoDNA Filter cells with mitochondrial genes > percent.mitoDNA +#' @param singleR Annotate each cell using singleR built in annotations. Mainly uses Encode+blueprint annotations. +#' @param cores Number of cores to use for Seurat/singleR/MNNcorrect +#' @param add.gm.score Adds geneset score (using geometric mean) to fm, must be a named list of genes. Geneset name is added to fm and seurat object. +#' @param ... Pass parameters to Seurat tools +sc.data.analysis=function(scmat, name="scRNA_object", nr.pcs=30, regress.cell.label=NULL, batch.correction.method=c("SCTtransform","MNNcorrect"), auto.order=F, normalize.input=T, check.pcs=T, plot.umap=T, resolution=0.5, nFeature.min=0, nFeature.max=10000, percent.mitoDNA=25, singleR=T, singleR.reference=c("blueprint_encode", "HPCA"), cores=6, add.gm.score=NULL, ...){ + + # determine computational resources: + future::plan("multiprocess", workers = cores) + options(future.globals.maxSize= 5e+09) + + # set method for batch correction + batch.correction.method=match.arg(batch.correction.method) + + # list of data matrix, can be citeseq data: + if(class(scmat)=="list"){ + cat("Input is list, splitting to data matrix by type and adding to Seurat", sep="\n\n") + + list.additional=lapply(2:length(scmat), function(i)CreateAssayObject(scmat[[i]])) + names(list.additional)=names(scmat)[2:length(scmat)] + scmat <- CreateSeuratObject(scmat[[1]]) + }else{ + list.additional=NULL + # if class is not seurat object + if(!class(scmat)=="Seurat"){ + cat("Input is data matrix, converting to Seurat object", sep="\n\n") + scmat <- CreateSeuratObject(scmat) + }else{ + cat("Input is Seurat object", sep="\n\n") + } + } + DefaultAssay(object = scmat) <- "RNA" + + # is it a vector of pcs or single value? + if(length(nr.pcs)==1)nr.pcs=seq(nr.pcs) + + # QC + scmat=PercentageFeatureSet(scmat, pattern = "^MT-", col.name = "percent.mt") + r1=VlnPlot(scmat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) + ggplot2::ggsave(r1, filename = paste0(name, "_QC.pdf"), width = 12) + + # Filtering + nFeature_RNA <- FetchData(object = scmat, vars = "nFeature_RNA") + pct.mt <- FetchData(object = scmat, vars = "percent.mt") + filt=which(x = nFeature_RNA > nFeature.min & nFeature_RNA < nFeature.max & pct.mt < percent.mitoDNA) + cat(paste("Dimension before/after filtering:", paste(paste(dim(scmat), collapse="x"), paste(dim(scmat[, filt]), collapse="x"), collapse = "/")), sep="\n\n") + scmat=scmat[, filt] + + if(normalize.input){ + assay="SCT" + scmat=SCTransform(scmat, variable.features.n = 3000) #,...) + + # seurat standard processing + scmat=RunPCA(scmat, npcs = 100) + reduction="pca" + }else{ + assay="RNA" + + # assume that the data is normalized + scmat <- FindVariableFeatures(object = scmat) + scmat <- ScaleData(object = scmat) + scmat=RunPCA(scmat, npcs = 100) + reduction="pca" + } + + # set default assay to use: + DefaultAssay(object = scmat) <- assay + + # use SCTransform to remove batch effect, used in umap and clustering + # else use SCT to normalize data + if(!is.null(regress.cell.label)&batch.correction.method=="SCTtransform"){ + batch.df=data.frame("batch"=regress.cell.label[filt]) + rownames(batch.df)=colnames(scmat) + scmat[["batch"]] = batch.df + scmat=SCTransform(scmat,vars.to.regress="batch", variable.features.n = 3000) + + # seurat standard processing + scmat=RunPCA(scmat, npcs = 100) + + name=paste0(name, "_", batch.correction.method) + reduction="pca" + } + + # optimize pcs number: + if(check.pcs){ + + # setting here original RNA to define PCAs, SCT not working yet, wait for a fix + # also the issue here how to deal with the mnnCorrect? + # DefaultAssay(object = scmat) <- "RNA" + # scmat <- NormalizeData(scmat,display.progress = FALSE) + # scmat <- FindVariableFeatures(scmat,do.plot = F,display.progress = FALSE) + # scmat <- ScaleData(scmat) + # scmat=RunPCA(scmat, npcs = 100) + + # determine how many pcs should be used: + r3=ElbowPlot(scmat, ndims = 100, reduction = "pca") + ggplot2::ggsave(r3, filename = paste0(name, "_elbow.pdf")) + + # find optimal number of PCs to use: + scmat <- JackStraw(scmat, num.replicate = 100, dims = 100, reduction = "pca") + scmat <- ScoreJackStraw(scmat, dims = 1:100, reduction = "pca") + r4=JackStrawPlot(scmat, dims = 1:100) + ggplot2::ggsave(r4, filename = paste0(name, "_Jackstraw.pdf"), width = 12) + + cat("PCs checking done", sep="\n\n") + } + + # use MNN to remove batch effect, used in umap and clustering: + if(!is.null(regress.cell.label)&batch.correction.method=="MNNcorrect"){ + library(BiocParallel) + library(batchelor) + + cat("FastMNN batch correction...", sep="\n\n") + + if(is.factor(regress.cell.label)&!auto.order){ + order=levels(regress.cell.label) + cat("FastMNN batch correction order:", sep="\n\n") + cat(order, sep="-->") + cat("", sep="\n\n") + } + + if(!is.factor(regress.cell.label)&!auto.order){ + order=unique(regress.cell.label) + cat("FastMNN batch correction order:", sep="\n\n") + cat(order, sep="-->") + cat("", sep="\n\n") + } + + batch.df=data.frame("batch"=regress.cell.label[filt]) + rownames(batch.df)=colnames(scmat) + scmat[["batch"]] = batch.df + + # http://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/fast_mnn.html + scmat <- NormalizeData(scmat) + scmat <- FindVariableFeatures(scmat) + + object.list = SplitObject(scmat, split.by = "batch")[order(order)] + + scmat <- SeuratWrappers::RunFastMNN(object.list, reduction.name = "mnnCorrect", auto.order=auto.order, BPPARAM=MulticoreParam(cores)) + + # set defaults + reduction="mnnCorrect" + cat("FastMNN batch correction... done", sep="\n\n") + + } + + # Basic seurat processing with umap: + scmat=RunUMAP(scmat, dims = nr.pcs, reduction = reduction) + scmat=FindNeighbors(scmat, dims = nr.pcs, reduction = reduction) + scmat=FindClusters(scmat, resolution = resolution) + + # add celltype automatically + if(singleR){ + cat("Annotating with singleR", sep="\n\n") + + if(!file.exists(paste0(name, "_singler_object.Rdata"))){ + library("SingleR") + + counts <- GetAssayData(scmat, assay = assay, slot = "counts") + + if(singleR.reference=="blueprint_encode"){ + if(dim(counts)[2]<100000){ + singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(blueprint_encode), project.name=name, fine.tune = T,do.signatures = T, numCores=cores) + }else{ + cat("SingleR in batches", sep="\n\n") + singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat), N=30000, ref.list = list(blueprint_encode), xy = Embeddings(scmat[["umap"]]), project.name=name, fine.tune = T, do.signatures = T, numCores=cores) + } + } + + if(singleR.reference=="HPCA"){ + if(dim(counts)[2]<100000){ + singler = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores) + }else{ + cat("SingleR in batches", sep="\n\n") + singler = CreateBigSingleRObject.custom(counts, annot = NULL, clusters=Idents(scmat), N=30000, ref.list = list(hpca), project.name=name, xy = Embeddings(scmat[["umap"]]), fine.tune = T,do.signatures = T, numCores=cores) + } + } + + # add original identifiers + singler$meta.data$orig.ident = scmat@meta.data$orig.ident # the original identities, if not supplied in 'annot' + + ## if using Seurat v3.0 and over use: + singler$meta.data$xy = scmat@reductions$umap@cell.embeddings # the tSNE coordinates + singler$meta.data$clusters = scmat@active.ident # the Seurat clusters (if 'clusters' not provided) + + # add annot from hpca + # singler2 = CreateSinglerObject(counts, annot = NULL, clusters=Idents(scmat), ref.list = list(hpca), project.name=name, fine.tune = T,do.signatures = T, numCores=cores) + # singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells" + # singler$singler[[1]]$SingleR.single$labels[singler2$singler[[1]]$SingleR.single$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells" + # singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pre-B_cell_CD34-"]="pre-B-cells" + # singler$singler[[1]]$SingleR.clusters$labels[singler2$singler[[1]]$SingleR.clusters$labels%in%"Pro-B_cell_CD34+"]="pro-B-cells" + + # these names are not intuitive, replace them, mentioned that these are actually naive http://comphealth.ucsf.edu/SingleR/SupplementaryInformation2.html: + singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD4+ T-cells"]="CD4+ Tn" + singler$singler[[1]]$SingleR.single$labels[singler$singler[[1]]$SingleR.single$labels%in%"CD8+ T-cells"]="CD8+ Tn" + singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD4+ T-cells"]="CD4+ Tn" + singler$singler[[1]]$SingleR.clusters$labels[singler$singler[[1]]$SingleR.clusters$labels%in%"CD8+ T-cells"]="CD8+ Tn" + + save(singler, file=paste0(name, "_singler_object.Rdata")) + + }else{ + library("SingleR") + load(paste0(name, "_singler_object.Rdata")) + cat(paste0("Loaded existing SingleR object (", paste0(name, "_singler_object.Rdata"), "), remove/rename it if you want to re-compute."), sep="\n\n") + + } + # can be added as cluster id + cluster=singler$singler[[1]]$SingleR.clusters$labels + cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels + + # order based on cell type and add celltype to cluster: + lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes") + lineage=unique(c(lineage,blueprint_encode$types, hpca$types)) + lineage=lineage[lineage%in%singler$singler[[1]]$SingleR.single$labels] + + scmat[["SingleR.label.main"]]=singler$singler[[1]]$SingleR.single.main$labels + scmat[["SingleR.label.cluster"]]=paste(singler$singler[[1]]$SingleR.single$labels, Idents(scmat)) + scmat[["SingleR.label"]]=factor(singler$singler[[1]]$SingleR.single$labels, levels=lineage[lineage%in%unique(singler$singler[[1]]$SingleR.single$labels)]) + + identityVector.samples=as.character(Idents(scmat)) + clusters.samples=Idents(scmat) + + for(j in seq(cluster)){ + identityVector.samples[clusters.samples%in%rownames(cluster)[j]]=paste0(cluster[j], ":", rownames(cluster)[j]) + } + + # order and cluster identity; + cluster=cluster[order(match(cluster[,1],lineage)),,drop=F] + cat(paste(levels(Idents(scmat)), collapse=","), paste(cluster, collapse=","), sep="\t\t") + + scmat[["SingleR.cluster"]]=gsub(":.*.", "", Idents(scmat)) # just the "cluster label" per cell + + # order seurat clusters too: + Idents(scmat)=factor(identityVector.samples, levels=paste0(cluster, ":", rownames(cluster))) + + r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE, pt.size = 0.5, repel = T) + NoLegend() + NoAxes() + ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6) + + cat("\nSingleR done", sep="\n\n") + + + # new singleR: + # counts <- GetAssayData(scmat, assay = "RNA", slot = "counts") + # + # library(SingleR) + # library(scater) + # + # ref=NovershternHematopoieticData() + # + # common <- intersect(rownames(counts), rownames(ref)) + # ref <- ref[common,] + # counts <- counts[common,] + # counts.log <- logNormCounts(counts) + # + # singler <- SingleR(test = counts.log, ref = ref, labels = ref$label.main, numCores=cores) + # + # # order based on cell type and add celltype to cluster: + # lineage=c("HSC","MPP","CLP","CMP","GMP","MEP","Monocytes","DC","Macrophages","Macrophages M1","Macrophages M2","CD4+ Tn","CD4+ T-cells", "CD4+ Tcm", "CD4+ Tem","CD8+ Tn","CD8+ T-cells","CD8+ Tcm","CD8+ Tem","NK cells","Tregs","naive B-cells","Memory B-cells","Class-switched memory B-cells","Plasma cells","Endothelial cells","Neutrophils","Eosinophils","Fibroblasts","Smooth muscle","Erythrocytes","Megakaryocytes") + # lineage=unique(c(lineage,ref$label.main)) + # + # scmat[["SingleR.label.main"]]=singler$first.labels + # scmat[["SingleR.label"]]=factor(singler$labels, levels=lineage[lineage%in%unique(singler$labels)]) + # + # r4=DimPlot(object = scmat, group.by = "SingleR.label", reduction = 'umap' , label = TRUE, pt.size = 0.5, repel = T) + NoLegend() + NoAxes() + # ggplot2::ggsave(r4, filename = paste0(name, "_UMAP_singleR.pdf"), width = 6, height = 6) + # + # cat("\nSingleR done", sep="\n\n") + + } + + # plot clusters and samples + if(plot.umap){ + if(!is.null(regress.cell.label)){ + r6=DimPlot(scmat, group.by = c("batch"), ncol = 2, label = T, repel = T) + NoLegend() + NoAxes() + ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_batch.pdf"), width = 6, height = 6) + r6=DimPlot(scmat, group.by = c("ident"), ncol = 2, label = T, repel = T) + NoLegend() + NoAxes() + ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6) + }else{ + r6=DimPlot(scmat, group.by = c("ident"), ncol = 1, label = T, repel = T) + NoLegend() + NoAxes() + ggplot2::ggsave(r6, filename = paste0(name, "_UMAP_clusters.pdf"), width = 6, height = 6) + } + } + + # add immunoscores to FM format data matrix + fm.f <- GetAssayData(scmat) + + # add gm based score: + add.scores=list(HLAIScore=c("B2M", "HLA-A", "HLA-B","HLA-C"), HLAIIScore=c("HLA-DMA","HLA-DMB","HLA-DPA1","HLA-DPB1","HLA-DRA","HLA-DRB1"), CytolyticScore=c("GZMA", "PRF1", "GNLY", "GZMH", "GZMM")) + if(!is.null(add.gm.score))add.scores=append(add.scores, add.gm.score) + + gm.objects=do.call(rbind, lapply(seq(add.scores), function(i){ + dat3=fm.f[rownames(fm.f)%in%add.scores[[i]],] + gm=t(apply(dat3, 2, gm_mean)) # done to normalized values + rownames(gm)=names(add.scores)[i] + return(gm) + })) + + # also add to seurat object: + for(i in seq(add.scores)){ + scmat[[names(add.scores)[i]]] <- gm.objects[i,] + } + + # gene expression and scores to fm: + rownames(fm.f)=paste0("N:GEXP:", rownames(fm.f)) + rownames(gm.objects)=paste0("N:SAMP:", rownames(gm.objects)) + fm=rbind(gm.objects, fm.f) + + # DE analysis: + markers.all=FindAllMarkers(scmat, only.pos = T)#, ...) + + # go through each data matrix, add to seurat and fm if more matrices + if(!is.null(list.additional)){ + + for(i in seq(list.additional)){ + mat.add=list.additional[[i]] + mat.add=subset(mat.add,cells = colnames(scmat)) + scmat[[names(list.additional)[i]]] <- mat.add + scmat <- NormalizeData(scmat, assay = names(list.additional)[i], normalization.method = "CLR") # test scttransform too + scmat <- ScaleData(scmat, assay = names(list.additional)[i]) + + add.data.normalized <- data.matrix(GetAssayData(scmat, assay = names(list.additional)[i], slot = "data")) + rownames(add.data.normalized)=paste0("N:",names(list.additional)[i], ":", rownames(add.data.normalized)) + fm=rbind(fm, add.data.normalized) + } + } + + save(list=c("fm", "scmat", "markers.all"), file=paste0(name, "_scRNA.Rdata")) + + return(paste0(name, "_scRNA.Rdata")) +} + +# make sure data is on a linear non-log scale +# check 0 values, if a lot between 0-1 better to use add.one. If counts, remove works too quite well +# zero fix methods: https://arxiv.org/pdf/1806.06403.pdf +gm_mean=function(x, na.rm=TRUE, zero.handle=c("remove", "add.one", "zero.propagate")){ + zero.handle=match.arg(zero.handle) + if(any(x < 0, na.rm = TRUE)){ + warning("Negative values produced NaN - is the data on linear - non-log scale?") + return(NaN) + } + + if(zero.handle=="remove"){ + return(exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))) + } + + if(zero.handle=="add.one"){ + return(exp(mean(log(x+1), na.rm = na.rm))-1) + } + + if(zero.handle=="zero.propagate"){ + if(any(x == 0, na.rm = TRUE)){ + return(0) + } + return(exp(mean(log(x), na.rm = na.rm))) + } + +} + +get.cluster.label.singleR=function(singler){ + + # how to add this? + cluster=singler$singler[[1]]$SingleR.clusters$labels + cluster.main=singler$singler[[1]]$SingleR.clusters.main$labels + + # order based on cell type: + # cat(paste0("'", colors.group[,1], "'"), sep=",") + lineage=c('HSC','MPP','CLP','CMP','GMP','MEP','Monocytes','DC','Macrophages','Macrophages M1','Macrophages M2','CD4+ T-cells','CD8+ T-cells','Tregs','T_cell:gamma-delta','T_cell:CD4+_Naive','T_cell:CD8+_naive','T_cell:Treg:Naive','CD4+ Tcm','CD4+ Tem','CD8+ Tcm','CD8+ Tem','NK cells','naive B-cells','Memory B-cells','B-cells','Class-switched memory B-cells','Plasma cells','Endothelial cells','Neutrophils','Eosinophils','Fibroblasts','Smooth muscle','Erythroblast','Erythrocytes','Megakaryocytes') + + cluster=cluster[order(match(cluster[,1],lineage)),,drop=F] + + # order and cluster identity; + cat(paste(rownames(cluster), collapse=","), paste(cluster, collapse=","), sep="\t") + cat("", sep="\n") + return(cluster) +} + +# run cellphoneDB +run.cellphonedb=function(scmat, celltype, out=getwd(), cores=10, threshold=0.05){ + + if(!dir.exists(out))dir.create(out, recursive = T) + + if(is.null(celltype)){ + celltype=gsub(" ", "_", as.character(Idents(scmat))) + } + + setwd(out) + # input1: + df=data.frame("Cell"=gsub("\\.", "_", colnames(scmat)), "cell_type"=celltype, stringsAsFactors = F) + df$cell_type[scmat[["SingleR.label"]]=="Tregs"]="Tregs" + + meta=file.path(out, "meta.tsv") + write.table(df, meta, row.names = F, col.names = T, quote = F, sep="\t") + + # input2: + counts <- data.matrix(GetAssayData(scmat, assay = "SCT", slot = "data")) + countsfile=tempfile() + + # gene symbol was not working... + library("org.Hs.eg.db") # remember to install it if you don't have it already + ensambleid <- mapIds(org.Hs.eg.db, keys = rownames(counts), keytype = "SYMBOL", column="ENSEMBL") + rownames(counts)=ensambleid + + counts=counts[!is.na(rownames(counts)),] + + countsfile=file.path(out, "counts.tsv") + + write.table(t(c("Gene", gsub("\\.", "_", colnames(counts)))), countsfile, row.names = F, col.names = F, quote = F, sep="\t") + write.table(counts, countsfile, row.names = T, col.names = F, quote = F, sep="\t", append=T) + + system(paste0("cellphonedb method statistical_analysis ", meta, " ", countsfile, " --iterations=1000 --threads=", cores, " --threshold=", threshold)) + + # make heatmap of interactions: + system(paste0("cellphonedb plot heatmap_plot ", meta)) + + deconvoluted=data.table::fread(file.path(out, "out/deconvoluted.txt"), data.table = F) + pvalues=data.table::fread(file.path(out, "out/pvalues.txt"), data.table = F) + significant_means.txt=data.table::fread(file.path(out, "out/significant_means.txt"), data.table = F) + means=data.table::fread(file.path(out, "out/means.txt"), data.table = F) + + unlink(countsfile) + unlink(meta) + + return(list("deconvoluted"=deconvoluted, "pvalues"=pvalues, "significant_means"=significant_means.txt, "means"=means)) +} + +# found bug in reference list, was missing, also min genes was 200 not 0 causing errors... +CreateBigSingleRObject.custom=function (counts, annot = NULL, project.name, xy, clusters, N = 10000, + min.genes = 0, technology = "10X", species = "Human", citation = "", + ref.list = list(), normalize.gene.length = F, variable.genes = "de", + fine.tune = T, reduce.file.size = T, do.signatures = F, do.main.types = T, + temp.dir = getwd(), numCores = SingleR.numCores){ + n = ncol(counts) + s = seq(1, n, by = N) + dir.create(paste0(temp.dir, "/singler.temp/"), showWarnings = FALSE) + for (i in s) { + print(i) + A = seq(i, min(i + N - 1, n)) + + singler = CreateSinglerObject(counts[, A], annot = annot[A], ref.list = ref.list, + project.name = project.name, min.genes = min.genes, + technology = technology, species = species, citation = citation, + do.signatures = do.signatures, clusters = NULL, numCores = numCores) + + + save(singler, file = paste0(temp.dir, "/singler.temp/", + project.name, ".", i, ".RData")) + } + singler.objects.file <- list.files(paste0(temp.dir, "/singler.temp/"), + pattern = "RData", full.names = T) + singler.objects = list() + for (i in 1:length(singler.objects.file)) { + load(singler.objects.file[[i]]) + singler.objects[[i]] = singler + } + singler = SingleR.Combine.custom(singler.objects, order = colnames(counts),expr = counts, clusters = clusters, xy = xy) + singler +} + +# bug also in this... +SingleR.Combine.custom=function (singler.list, order = NULL, clusters = NULL, expr = NULL, xy = NULL) +{ + singler = c() + singler$singler = singler.list[[1]]$singler + for (j in 1:length(singler.list[[1]]$singler)) { + singler$singler[[j]]$SingleR.cluster = c() + singler$singler[[j]]$SingleR.cluster.main = c() + singler$singler[[j]]$SingleR.single$clusters = c() + } + singler$meta.data = singler.list[[1]]$meta.data + singler$meta.data$clusters = c() + singler$meta.data$xy = c() + singler$meta.data$data.sets = rep(singler$meta.data$project.name, + length(singler$meta.data$orig.ident)) + for (i in 2:length(singler.list)) { + for (j in 1:length(singler$singler)) { + if (singler.list[[i]]$singler[[j]]$about$RefData != + singler.list[[1]]$singler[[j]]$about$RefData) { + stop("The objects are not ordered by the same reference data.") + } + singler$singler[[j]]$about$Organism = c(singler$singler[[j]]$about$Organism, + singler.list[[i]]$singler[[j]]$about$Organism) + singler$singler[[j]]$about$Citation = c(singler$singler[[j]]$about$Citation, + singler.list[[i]]$singler[[j]]$about$Citation) + singler$singler[[j]]$about$Technology = c(singler$singler[[j]]$about$Technology, + singler.list[[i]]$singler[[j]]$about$Technology) + singler$singler[[j]]$SingleR.single$labels = rbind(singler$singler[[j]]$SingleR.single$labels, + singler.list[[i]]$singler[[j]]$SingleR.single$labels) + if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) { + singler$singler[[j]]$SingleR.single$labels1 = rbind(singler$singler[[j]]$SingleR.single$labels1, + singler.list[[i]]$singler[[j]]$SingleR.single$labels1) + } + singler$singler[[j]]$SingleR.single$scores = rbind(singler$singler[[j]]$SingleR.single$scores, + singler.list[[i]]$singler[[j]]$SingleR.single$scores) + singler$singler[[j]]$SingleR.single.main$labels = rbind(singler$singler[[j]]$SingleR.single.main$labels, + singler.list[[i]]$singler[[j]]$SingleR.single.main$labels) + if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) { + singler$singler[[j]]$SingleR.single.main$labels1 = rbind(singler$singler[[j]]$SingleR.single.main$labels1, + singler.list[[i]]$singler[[j]]$SingleR.single.main$labels1) + } + singler$singler[[j]]$SingleR.single.main$scores = rbind(singler$singler[[j]]$SingleR.single.main$scores, + singler.list[[i]]$singler[[j]]$SingleR.single.main$scores) + singler$singler[[j]]$SingleR.single$cell.names = c(singler$singler[[j]]$SingleR.single$cell.names, + singler.list[[i]]$singler[[j]]$SingleR.single$cell.names) + singler$singler[[j]]$SingleR.single.main$cell.names = c(singler$singler[[j]]$SingleR.single.main$cell.names, + singler.list[[i]]$singler[[j]]$SingleR.single.main$cell.names) + if (!is.null(singler$singler[[j]]$SingleR.single.main$pval)) { + singler$singler[[j]]$SingleR.single.main$pval = c(singler$singler[[j]]$SingleR.single.main$pval, + singler.list[[i]]$singler[[j]]$SingleR.single.main$pval) + } + if (!is.null(singler$singler[[j]]$SingleR.single$pval)) { + singler$singler[[j]]$SingleR.single$pval = c(singler$singler[[j]]$SingleR.single$pval, + singler.list[[i]]$singler[[j]]$SingleR.single$pval) + } + } + singler$meta.data$project.name = paste(singler$meta.data$project.name, + singler.list[[i]]$meta.data$project.name, sep = "+") + singler$meta.data$orig.ident = c(singler$meta.data$orig.ident, + singler.list[[i]]$meta.data$orig.ident) + singler$meta.data$data.sets = c(singler$meta.data$data.sets, + rep(singler.list[[i]]$meta.data$project.name, length(singler.list[[i]]$meta.data$orig.ident))) + } + for (j in 1:length(singler$singler)) { + if (!is.null(order)) { + singler$singler[[j]]$SingleR.single$labels = singler$singler[[j]]$SingleR.single$labels[order, + ] + if (!is.null(singler$singler[[j]]$SingleR.single$labels1)) { + singler$singler[[j]]$SingleR.single$labels1 = singler$singler[[j]]$SingleR.single$labels1[order, + ] + } + singler$singler[[j]]$SingleR.single$scores = singler$singler[[j]]$SingleR.single$scores[order, + ] + singler$singler[[j]]$SingleR.single$cell.names = singler$singler[[j]]$SingleR.single$cell.names[order] + singler$singler[[j]]$SingleR.single.main$labels = singler$singler[[j]]$SingleR.single.main$labels[order, + ] + if (!is.null(singler$singler[[j]]$SingleR.single.main$labels1)) { + singler$singler[[j]]$SingleR.single.main$labels1 = singler$singler[[j]]$SingleR.single.main$labels1[order, + ] + } + singler$singler[[j]]$SingleR.single.main$scores = singler$singler[[j]]$SingleR.single.main$scores[order, + ] + singler$singler[[j]]$SingleR.single.main$cell.names = singler$singler[[j]]$SingleR.single.main$cell.names[order] + if (!is.null(singler$singler[[j]]$SingleR.single$pval)) { + singler$singler[[j]]$SingleR.single$pval = singler$singler[[j]]$SingleR.single$pval[order] + singler$singler[[j]]$SingleR.single.main$pval = singler$singler[[j]]$SingleR.single.main$pval[order] + } + } + } + if (!is.null(clusters) && !is.null(expr)) { + for (j in 1:length(singler$singler)) { + if (is.character(singler$singler[[j]]$about$RefData)) { + ref = get(tolower(singler$singler[[j]]$about$RefData)) + } + else { + ref = singler$singler[[j]]$about$RefData + } + singler$singler[[j]]$SingleR.clusters = SingleR("cluster", + expr, ref$data, types = ref$types, clusters = factor(clusters), + sd.thres = ref$sd.thres, genes = "de", fine.tune = T) + singler$singler[[j]]$SingleR.clusters.main = SingleR("cluster", + expr, ref$data, types = ref$main_types, clusters = factor(clusters), + sd.thres = ref$sd.thres, genes = "de", fine.tune = T) + } + singler$meta.data$clusters = clusters + if (!is.null(xy)) { + singler$meta.data$xy = xy + } + } + singler +} \ No newline at end of file