Switch to side-by-side view

--- a
+++ b/Functions/Integrate.scRNA.scATAC.R
@@ -0,0 +1,125 @@
+#ref: Single cell transcriptional and chromatin accessibility profiling redefine cellular heterogeneity in the adult human kidney
+
+#'@param ref.npcs = 30, dims = 30, are the parameters in the FindTransferAnchors function, used to control the number of dimensions used by reference data and anchor search space respectively
+#'@param scATAC.assay = "ACTIVITY", scATAC.normalize = T, PC = 30, is used to process scATAC data.
+# scATAC.assay indicates the name of the assay where the gene activity is stored in the scATAC number
+# scATAC.normalize Whether the gene activity has been logNormalized
+# PC indicates the dimension of scATAC dimensionality reduction, which comes from the lsi of scATAC.object itself. Note that the default dimensionality reduction assay of scATAC.object is lsi
+
+Integrate.scRNA.scATAC <- function(scATAC.object, scRNA.object, class.label = "cellType", scRNA.assay = "RNA", ref.npcs = 30, dims = 30, scATAC.assay = "ACTIVITY", scATAC.normalize = T, PC = 30, nfeatures = 3000, SCT = F){
+
+    ##1. load data
+    #scRNA-seq data
+    DefaultAssay(scRNA.object) <- scRNA.assay
+    scRNA.object <- NormalizeData(scRNA.object)
+    if(SCT){
+        #Need to use sctransform to normalize data in advance
+        var.genes <- scRNA.object@assays$SCT@var.features[1:nfeatures]
+    }else{
+        var.genes <- VariableFeatures(scRNA.object)[1:nfeatures]
+    }
+    #scATAC-seq data
+    DefaultAssay(scATAC.object) <- scATAC.assay
+    if(!scATAC.normalize){
+        scATAC.object <- NormalizeData(scATAC.object)
+    }
+    scATAC.object <- ScaleData(scATAC.object, features = rownames(scATAC.object))
+
+    ##2. Identify anchors
+    #npcs = 30; dim = 1:30
+    transfer.anchors <- FindTransferAnchors(reference = scRNA.object, 
+                                            query = scATAC.object, 
+                                            features = var.genes, #SCT model:scRNA.merge@assays$SCT@var.features
+                                            reference.assay = scRNA.assay,
+                                            query.assay = scATAC.assay, 
+                                            reduction = "cca",
+                                            npcs = ref.npcs, # Number of PCs to compute on reference if reference
+                                            dims = 1:dims) # Which dimensions to use from the reduction to specify the neighbor search space
+    #saveRDS(transfer.anchors, file = file.path(saveDir, "transfer.anchors.rds"))
+
+    #The default dims = NULL, it is consistent with FindTransferAnchors
+    celltype.predictions <- TransferData(anchorset = transfer.anchors, 
+                                refdata = scRNA.object@meta.data[,class.label], 
+                                weight.reduction = scATAC.object[["lsi"]], 
+                                dims = 2:PC)    
+
+
+    scATAC.object <- AddMetaData(scATAC.object, metadata = celltype.predictions)
+    scATAC.object <- AddMetaData(scATAC.object, metadata = celltype.predictions$predicted.id, class.label)
+
+    p <- gghistogram(scATAC.object@meta.data, x = "prediction.score.max", y = "..density..", main = "Prediction Score for scATAC", add_density = TRUE)
+    print(p)    
+    p <- DimPlot(scATAC.object, group.by = class.label, label = TRUE, repel = TRUE, reduction = "umap") +
+        ggtitle("scATAC-seq Predicted Celltypes Before Harmony") + NoLegend()
+    print(p)
+    p <- DimPlot(scATAC.object, group.by = class.label, label = TRUE, repel = TRUE, reduction = "umap") +
+        ggtitle("scATAC-seq Predicted Celltypes Before Harmony")
+    print(p)    
+    p <- DimPlot(scATAC.object, group.by = class.label, label = TRUE, repel = TRUE, reduction = "tsne") +
+        ggtitle("scATAC-seq Predicted Celltypes Before Harmony") + NoLegend()
+    print(p)
+    p <- DimPlot(scATAC.object, group.by = class.label, label = TRUE, repel = TRUE, reduction = "umap") +
+        ggtitle("scATAC-seq Predicted Celltypes Before Harmony")
+    print(p)
+    return(list(transfer.anchors = transfer.anchors, scATAC.object = scATAC.object))
+}
+
+#' @param dims Indicates the dimensionality reduction used when coembedding
+Coembedding.scATAC.scRNA <- function(transfer.anchors, scATAC.object, scRNA.assay = "RNA", scRNA.object, class.label = "cellType", PC = 30, dims = 30){
+    
+    ####Co-embedding scRNA-seq and scATAC-seq datasets
+    # note that we restrict the imputation to variable genes from scRNA-seq, but could impute the
+    # full transcriptome if we wanted to
+    genes.use <- transfer.anchors@anchor.features
+    DefaultAssay(scRNA.object) <- scRNA.assay
+    scRNA.object <- NormalizeData(scRNA.object)
+    refdata <- GetAssayData(scRNA.object, assay = scRNA.assay, slot = "data")[genes.use, ]
+    # refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation
+    # (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
+    imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = scATAC.object[["lsi"]], dims = 2:PC)
+    scATAC.object[["RNA"]] <- imputation
+    DefaultAssay(scATAC.object) <- "RNA"     
+
+    scRNA.object <- AddMetaData(scRNA.object, metadata = rep("scRNA", nrow(scRNA.object@meta.data)), col.name = "type")
+    scATAC.object <- AddMetaData(scATAC.object, metadata = rep("scATAC", nrow(scATAC.object@meta.data)), col.name = "type")
+
+    coembed <- merge(x = scRNA.object, y = scATAC.object)
+    DefaultAssay(coembed) <- "RNA"
+    # Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both
+    # datasets
+    coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
+    coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
+    coembed <- RunHarmony(object = coembed, group.by.vars = "orig.ident", verbose = FALSE)
+    coembed <- RunUMAP(coembed, dims = 1:dims, reduction = 'harmony', verbose = FALSE)
+    coembed <- RunTSNE(coembed, dims = 1:dims, reduction = 'harmony', check_duplicates = FALSE, verbose = FALSE)
+    #saveRDS(coembed, file = file.path(saveDir, "coembed.rds"))
+
+    p <- ElbowPlot(object = coembed, ndims = dims)
+    print(p)
+    p <- DimPlot(coembed, group.by = c("orig.ident"), reduction = "umap", label = T) + NoLegend()
+    print(p)
+    p <- DimPlot(coembed, group.by = c("orig.ident"), reduction = "tsne", label = T) + NoLegend()
+    print(p)
+    p <- DimPlot(coembed, group.by = c("type"), reduction = "umap", label = T) + NoLegend()
+    print(p)
+    p <- DimPlot(coembed, group.by = class.label, reduction = "umap", label = T) + NoLegend()
+    print(p)
+    p <- DimPlot(coembed, group.by = c("type"), reduction = "tsne", label = T) + NoLegend()
+    print(p)
+    p <- DimPlot(coembed, group.by = class.label, reduction = "tsne", label = T) + NoLegend()
+    print(p)
+
+    p <- DimPlot(coembed, group.by = c("orig.ident"), reduction = "umap", label = T)
+    print(p)
+    p <- DimPlot(coembed, group.by = c("orig.ident"), reduction = "tsne", label = T)
+    print(p)
+    p <- DimPlot(coembed, group.by = c("type"), reduction = "umap", label = T)
+    print(p)
+    p <- DimPlot(coembed, group.by = class.label, reduction = "umap", label = T)
+    print(p)
+    p <- DimPlot(coembed, group.by = c("type"), reduction = "tsne", label = T)
+    print(p)
+    p <- DimPlot(coembed, group.by = class.label, reduction = "tsne", label = T)
+    print(p)
+    return(list(coembed = coembed, scATAC.object = scATAC.object))
+}