477 lines (476 with data), 12.4 kB
{
"cells": [
{
"cell_type": "markdown",
"id": "60d222bd",
"metadata": {},
"source": [
"# MultiVelo Template\n",
"\n",
"This is an example of basic workflow for 10X Cell Ranger ARC 2.0 output.\n",
"```\n",
".\n",
"|-- MultiVelo_Template.ipynb\n",
"|-- outs\n",
"| |-- analysis\n",
"| | `-- feature_linkage\n",
"| | `-- feature_linkage.bedpe\n",
"| |-- filtered_feature_bc_matrix\n",
"| | |-- barcodes.tsv.gz\n",
"| | |-- features.tsv.gz\n",
"| | `-- matrix.mtx.gz\n",
"| `-- atac_peak_annotation.tsv\n",
"|-- seurat_wnn\n",
"| |-- nn_cells.txt\n",
"| |-- nn_dist.txt\n",
"| `-- nn_idx.txt\n",
"`-- velocyto\n",
" `-- gex_possorted_bam_XXXXX.loom\n",
"```\n",
"Please replace ... with appropriate values."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53937c83",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import scipy\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import scvelo as scv\n",
"import multivelo as mv\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b412d727",
"metadata": {},
"outputs": [],
"source": [
"scv.settings.verbosity = 3\n",
"scv.settings.presenter_view = True\n",
"scv.set_figure_params('scvelo')\n",
"pd.set_option('display.max_columns', 100)\n",
"pd.set_option('display.max_rows', 200)\n",
"np.set_printoptions(suppress=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32217761",
"metadata": {},
"outputs": [],
"source": [
"# Read in RNA and filter\n",
"adata_rna = scv.read('velocyto/gex_possorted_bam_XXXXX.loom', cache=True)\n",
"adata_rna.obs_names = [x.split(':')[1][:-1] + '-1' for x in adata_rna.obs_names]\n",
"adata_rna.var_names_make_unique()\n",
"sc.pp.filter_cells(adata_rna, min_counts=...)\n",
"sc.pp.filter_cells(adata_rna, max_counts=...)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a114e43",
"metadata": {},
"outputs": [],
"source": [
"# Read in ATAC, gene aggregate, and filter\n",
"adata_atac = sc.read_10x_mtx('outs/filtered_feature_bc_matrix/', var_names='gene_symbols', cache=True, gex_only=False)\n",
"adata_atac = adata_atac[:,adata_atac.var['feature_types'] == \"Peaks\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9104058",
"metadata": {},
"outputs": [],
"source": [
"# Aggregate peaks around each gene as well as those that have high correlations with promoter peak or gene expression\n",
"adata_atac = mv.aggregate_peaks_10x(adata_atac, \n",
" 'outs/atac_peak_annotation.tsv', \n",
" 'outs/analysis/feature_linkage/feature_linkage.bedpe', \n",
" verbose=True) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e0dd29f",
"metadata": {},
"outputs": [],
"source": [
"sc.pp.filter_cells(adata_atac, min_counts=...)\n",
"sc.pp.filter_cells(adata_atac, max_counts=...)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6d6fb09",
"metadata": {},
"outputs": [],
"source": [
"# Find shared cells and genes between RNA and ATAC\n",
"shared_cells = pd.Index(np.intersect1d(adata_rna.obs_names, adata_atac.obs_names))\n",
"shared_genes = pd.Index(np.intersect1d(adata_rna.var_names, adata_atac.var_names))\n",
"len(shared_cells), len(shared_genes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73b3bf76",
"metadata": {},
"outputs": [],
"source": [
"adata_rna = adata_rna[shared_cells, shared_genes]\n",
"adata_atac = adata_atac[shared_cells, shared_genes]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9471abe5",
"metadata": {},
"outputs": [],
"source": [
"# Normalize RNA\n",
"scv.pp.filter_and_normalize(adata_rna, min_shared_counts=..., n_top_genes=...)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9702c6f7",
"metadata": {},
"outputs": [],
"source": [
"# Optionally, regress out the effects of cell cycle and/or scale RNA matrix if it gives better clustering results\n",
"# scv.tl.score_genes_cell_cycle(adata_rna)\n",
"# sc.pp.regress_out(adata_rna, ['S_score', 'G2M_score’])\n",
"# sc.pp.scale(adata_rna)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15b6ad99",
"metadata": {},
"outputs": [],
"source": [
"scv.pp.moments(adata_rna, n_pcs=..., n_neighbors=...)\n",
"scv.tl.umap(adata_rna) # compute UMAP embedding\n",
"sc.tl.leiden(adata_rna) # compute clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9832db8c",
"metadata": {},
"outputs": [],
"source": [
"# Identify cell types\n",
"new_cluster_names = [...]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5723c019",
"metadata": {},
"outputs": [],
"source": [
"adata_rna.rename_categories('leiden', new_cluster_names) # annotate clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d64eb4c",
"metadata": {},
"outputs": [],
"source": [
"scv.pl.umap(adata_rna, color='leiden')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abcd1405",
"metadata": {},
"outputs": [],
"source": [
"# Normalize ATAC and subset for the same set of cells and genes\n",
"mv.tfidf_norm(adata_atac)\n",
"adata_atac = adata_atac[adata_rna.obs_names, adata_rna.var_names]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f7344e0",
"metadata": {},
"outputs": [],
"source": [
"# Write out filtered cells and prepare to run Seurat WNN\n",
"adata_rna.obs_names.to_frame().to_csv('filtered_cells.txt', header=False, index=False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16b729ae",
"metadata": {},
"outputs": [],
"source": [
"# Run Seurat WNN (R script can be found on GitHub)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e404bc3b",
"metadata": {},
"outputs": [],
"source": [
"# Back in python, load the neighbors\n",
"nn_idx = np.loadtxt(\"seurat_wnn/nn_idx.txt\", delimiter=',')\n",
"nn_dist = np.loadtxt(\"seurat_wnn/nn_dist.txt\", delimiter=',')\n",
"nn_cells = pd.Index(pd.read_csv(\"seurat_wnn/nn_cells.txt\", header=None)[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45a754ae",
"metadata": {},
"outputs": [],
"source": [
"np.all(nn_cells == adata_atac.obs_names) # make sure cell names match"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2eb4ee30",
"metadata": {},
"outputs": [],
"source": [
"# WNN smooth the gene aggregated ATAC matrix, resulting in a new Mc matrix in adata_atac.layers\n",
"mv.knn_smooth_chrom(adata_atac, nn_idx, nn_dist) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55a6024c",
"metadata": {},
"outputs": [],
"source": [
"# Run MultiVelo main function\n",
"adata_result = mv.recover_dynamics_chrom(adata_rna,\n",
" adata_atac,\n",
" max_iter=5, # coordinate-descent like optimization\n",
" init_mode=\"invert\", # simple, invert, or grid\n",
" verbose=False,\n",
" parallel=True,\n",
" save_plot=False,\n",
" rna_only=False,\n",
" fit=True,\n",
" n_anchors=500,\n",
" extra_color_key='leiden' # used if save_plot=True\n",
" )\n",
"# Full argument list can be shown with help(mv.recover_dynamics_chrom)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbec0729",
"metadata": {},
"outputs": [],
"source": [
"adata_result.write(\"multivelo_result.h5ad\") # save the result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50c18538",
"metadata": {},
"outputs": [],
"source": [
"# adata_result = sc.read_h5ad('multivelo_result.h5ad')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2e7dc3d",
"metadata": {},
"outputs": [],
"source": [
"mv.pie_summary(adata_result) # gene type chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39b05cb9",
"metadata": {},
"outputs": [],
"source": [
"mv.switch_time_summary(adata_result) # switch time statistics"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6814aba",
"metadata": {},
"outputs": [],
"source": [
"mv.likelihood_plot(adata_result) # likelihood and model parameter statistics"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "829dd9bf",
"metadata": {},
"outputs": [],
"source": [
"mv.velocity_graph(adata_result)\n",
"mv.latent_time(adata_result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00d47fa4",
"metadata": {},
"outputs": [],
"source": [
"mv.velocity_embedding_stream(adata_result, basis='umap', color='leiden') # velocity streams"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "140c08c3",
"metadata": {},
"outputs": [],
"source": [
"scv.pl.scatter(adata_result, color='latent_time', color_map='gnuplot', size=80) # latent time prediction"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee0eca77",
"metadata": {},
"outputs": [],
"source": [
"# Some genes of interest\n",
"gene_list = [...]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c5ba7a8",
"metadata": {},
"outputs": [],
"source": [
"# Plot accessbility and expression against gene time or global latent time\n",
"mv.dynamic_plot(adata_result, gene_list, color_by='state', gene_time=True, axis_on=False, frame_on=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2ce7023",
"metadata": {},
"outputs": [],
"source": [
"# Phase portraits on the u-s, c-u, or 3-dimensional planes can be plotted\n",
"mv.scatter_plot(adata_result, gene_list, color_by='leiden', by='us', axis_on=False, frame_on=False) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": "48"
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}