In this markdown, analysis and quality control steps are performed following chapters 8 and 9 of the book ‘Orchestrating Spatially-Resolved Transcriptomics Analysis with Bioconductor’ https://lmweber.org/OSTA-book/. Data is fetched and plotted, then QC metrics are calculated and data is filtered based on QC thresholds for sample 151507 only. After QC operations, 33538 genes and 4136 spots remain in the sample.
Data is sourced from spatialLIBD (http://spatial.libd.org/spatialLIBD/), a project containing LIBD human dorsolateral pre-frontal cortex (DLPFC) spatial transcriptomics data from the 10x Genomics Visium Platform (https://10xgenomics.com/products/spatial-gene-expression). The data contains 12 lowres samples each with a scale factor of 0.0450045. For the purposes of this project, only sample 151507 is visualized and manipulated to start with, so the SpatialExperiment object is filtered to only include sample 151507. In sample 151507, 33538 genes and 4662 spots are captured, each with a unique barcode.
library(spatialLIBD)
library(ggspavis)
library(scater)
# fetch data: 33538 genes, 47681 spots
spe <- fetch_data(type='spe')
# filter to 4226 spots pertaining to sample 151507
sample_id <- unique(spe$sample_id)[1]
spe_sub <- spe[, spe$sample_id == sample_id]
# data frame of 4226 rows holding colData
sample_df <- as.data.frame(colData(spe_sub), optional = TRUE)
Sample 151507 is visualized by transforming the image into a rasterGrob object and calling ggplot.
# get histology image
img <- SpatialExperiment::imgRaster(spe_sub)
## Transform to a rasterGrob object
grob <- grid::rasterGrob(img, width = grid::unit(1, "npc"), height = grid::unit(1, "npc"))
## Make a plot using geom_spatial
p <- ggplot2::ggplot(
sample_df,
ggplot2::aes(
x = pxl_col_in_fullres * SpatialExperiment::scaleFactors(spe_sub),
y = pxl_row_in_fullres * SpatialExperiment::scaleFactors(spe_sub),
)
) +
geom_spatial(
data = tibble::tibble(grob = list(grob)),
ggplot2::aes(grob = grob),
x = 0.5,
y = 0.5
)
## Show the plot
print(p)
We can identify LQ spots by:
1. Library size 2. Number of expressed genes 3. Proportion of
mitochondrial reads
4. Number of cells per spot
Note: all spots are already over tissue.
# identify mitochondrial genes
is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe_sub)$gene_name)
# add per spot QC metrics, including mitochondrial genes as a feature subset
spe_sub <- addPerCellQC(spe_sub, subsets = list(mito = is_mito))
Library size = sum of UMI (unique molecular identifier) counts per spot, remove low library size
# continuous variable containing total number of genes at each spot
hist(colData(spe_sub)$sum, breaks = 20)
# set filtering threshold for library size at 400
plotQC(spe_sub, type = "scatter",
metric_x = "cell_count", metric_y = "sum",
threshold_y = 600)
# check number of spots below threshold
qc_lib_size <- colData(spe_sub)$sum < 600
table(qc_lib_size)
## qc_lib_size
## FALSE TRUE
## 4182 44
# look for spatial pattern in discarded spots
colData(spe_sub)$qc_lib_size <- qc_lib_size
# plot spots
threshold_row <- colData(spe_sub)[colData(spe_sub)$qc_lib_size,][,'array_row']
threshold_col <- colData(spe_sub)[colData(spe_sub)$qc_lib_size,][,'array_col']
ggplot(data.frame(threshold_row, threshold_col), aes(x=threshold_row, y=threshold_col)) + geom_point()
Discard spots with a low number of expressed genes per spot
# continuous variable containing total number of genes at each spot
hist(colData(spe_sub)$detected, breaks = 20)
# set filtering threshold for library size at 500
plotQC(spe_sub, type = "scatter",
metric_x = "cell_count", metric_y = "detected",
threshold_y = 400)
# check number of spots below threshold
qc_detected <- colData(spe_sub)$detected < 400
table(qc_lib_size)
## qc_lib_size
## FALSE TRUE
## 4182 44
# look for spatial pattern in discarded spots
colData(spe_sub)$qc_detected <- qc_detected
# plot spots
threshold_row <- colData(spe_sub)[colData(spe_sub)$qc_detected,][,'array_row']
threshold_col <- colData(spe_sub)[colData(spe_sub)$qc_detected,][,'array_col']
ggplot(data.frame(threshold_row, threshold_col), aes(x=threshold_row, y=threshold_col)) + geom_point()
A high proportion of mitochondrial reads can indicate cell damage
# histogram of mitochondrial read proportions
hist(colData(spe_sub)$subsets_mito_percent, breaks = 20)
# plot mitochondrial read proportion vs. number of cells per spot
plotQC(spe_sub, type = "scatter",
metric_x = "cell_count", metric_y = "subsets_mito_percent",
threshold_y = 28)
# select QC threshold for mitochondrial read proportion
qc_mito <- colData(spe_sub)$subsets_mito_percent > 28
table(qc_mito)
## qc_mito
## FALSE TRUE
## 4197 29
# look for spatial pattern in discarded spots
colData(spe_sub)$qc_mito <- qc_mito
# plot spots
threshold_row <- colData(spe_sub)[colData(spe_sub)$qc_mito,][,'array_row']
threshold_col <- colData(spe_sub)[colData(spe_sub)$qc_mito,][,'array_col']
ggplot(data.frame(threshold_row, threshold_col), aes(x=threshold_row, y=threshold_col)) + geom_point()
Spots with high cell counts have low numbers of expressed genes (i.e. experiments failed)
# histogram of number of cells per spot
hist(colData(spe_sub)$cell_count, breaks = 20)
# distribution of cells per spot
tbl_cells_per_spot <- table(colData(spe_sub)$cell_count)
# plot number of expressed genes vs. number of cells per spot
plotQC(spe_sub, type = "scatter",
metric_x = "cell_count", metric_y = "detected",
threshold_x = 12)
# select QC threshold for number of cells per spot
qc_cell_count <- colData(spe_sub)$cell_count > 12
table(qc_cell_count)
## qc_cell_count
## FALSE TRUE
## 4206 20
colData(spe_sub)$qc_cell_count <- qc_cell_count
# plot spots
threshold_row <- colData(spe_sub)[colData(spe_sub)$qc_cell_count,][,'array_row']
threshold_col <- colData(spe_sub)[colData(spe_sub)$qc_cell_count,][,'array_col']
ggplot(data.frame(threshold_row, threshold_col), aes(x=threshold_row, y=threshold_col)) + geom_point()
# number of discarded spots for each metric
apply(cbind(qc_lib_size, qc_detected, qc_mito, qc_cell_count), 2, sum)
## qc_lib_size qc_detected qc_mito qc_cell_count
## 44 29 29 20
# combined set of discarded spots
discard <- qc_lib_size | qc_detected | qc_mito | qc_cell_count
table(discard)
## discard
## FALSE TRUE
## 4136 90
# store in object
colData(spe_sub)$discard <- discard
# check spatial pattern of combined set of discarded spots
# plot spots
threshold_row <- colData(spe_sub)[colData(spe_sub)$discard,][,'array_row']
threshold_col <- colData(spe_sub)[colData(spe_sub)$discard,][,'array_col']
ggplot(data.frame(threshold_row, threshold_col), aes(x=threshold_row, y=threshold_col)) + geom_point()
# remove combined set of low-quality spots
spe_sub <- spe_sub[, !colData(spe_sub)$discard]
dim(spe_sub)
## [1] 33538 4136