--- a +++ b/Examples/MultiVelo_Template.ipynb @@ -0,0 +1,476 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "60d222bd", + "metadata": {}, + "source": [ + "# MultiVelo Template\n", + "\n", + "This is an example of basic workflow for 10X Cell Ranger ARC 2.0 output.\n", + "```\n", + ".\n", + "|-- MultiVelo_Template.ipynb\n", + "|-- outs\n", + "| |-- analysis\n", + "| | `-- feature_linkage\n", + "| | `-- feature_linkage.bedpe\n", + "| |-- filtered_feature_bc_matrix\n", + "| | |-- barcodes.tsv.gz\n", + "| | |-- features.tsv.gz\n", + "| | `-- matrix.mtx.gz\n", + "| `-- atac_peak_annotation.tsv\n", + "|-- seurat_wnn\n", + "| |-- nn_cells.txt\n", + "| |-- nn_dist.txt\n", + "| `-- nn_idx.txt\n", + "`-- velocyto\n", + " `-- gex_possorted_bam_XXXXX.loom\n", + "```\n", + "Please replace ... with appropriate values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53937c83", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import scipy\n", + "import numpy as np\n", + "import pandas as pd\n", + "import scanpy as sc\n", + "import scvelo as scv\n", + "import multivelo as mv\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b412d727", + "metadata": {}, + "outputs": [], + "source": [ + "scv.settings.verbosity = 3\n", + "scv.settings.presenter_view = True\n", + "scv.set_figure_params('scvelo')\n", + "pd.set_option('display.max_columns', 100)\n", + "pd.set_option('display.max_rows', 200)\n", + "np.set_printoptions(suppress=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32217761", + "metadata": {}, + "outputs": [], + "source": [ + "# Read in RNA and filter\n", + "adata_rna = scv.read('velocyto/gex_possorted_bam_XXXXX.loom', cache=True)\n", + "adata_rna.obs_names = [x.split(':')[1][:-1] + '-1' for x in adata_rna.obs_names]\n", + "adata_rna.var_names_make_unique()\n", + "sc.pp.filter_cells(adata_rna, min_counts=...)\n", + "sc.pp.filter_cells(adata_rna, max_counts=...)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a114e43", + "metadata": {}, + "outputs": [], + "source": [ + "# Read in ATAC, gene aggregate, and filter\n", + "adata_atac = sc.read_10x_mtx('outs/filtered_feature_bc_matrix/', var_names='gene_symbols', cache=True, gex_only=False)\n", + "adata_atac = adata_atac[:,adata_atac.var['feature_types'] == \"Peaks\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9104058", + "metadata": {}, + "outputs": [], + "source": [ + "# Aggregate peaks around each gene as well as those that have high correlations with promoter peak or gene expression\n", + "adata_atac = mv.aggregate_peaks_10x(adata_atac, \n", + " 'outs/atac_peak_annotation.tsv', \n", + " 'outs/analysis/feature_linkage/feature_linkage.bedpe', \n", + " verbose=True) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e0dd29f", + "metadata": {}, + "outputs": [], + "source": [ + "sc.pp.filter_cells(adata_atac, min_counts=...)\n", + "sc.pp.filter_cells(adata_atac, max_counts=...)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6d6fb09", + "metadata": {}, + "outputs": [], + "source": [ + "# Find shared cells and genes between RNA and ATAC\n", + "shared_cells = pd.Index(np.intersect1d(adata_rna.obs_names, adata_atac.obs_names))\n", + "shared_genes = pd.Index(np.intersect1d(adata_rna.var_names, adata_atac.var_names))\n", + "len(shared_cells), len(shared_genes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73b3bf76", + "metadata": {}, + "outputs": [], + "source": [ + "adata_rna = adata_rna[shared_cells, shared_genes]\n", + "adata_atac = adata_atac[shared_cells, shared_genes]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9471abe5", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize RNA\n", + "scv.pp.filter_and_normalize(adata_rna, min_shared_counts=..., n_top_genes=...)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9702c6f7", + "metadata": {}, + "outputs": [], + "source": [ + "# Optionally, regress out the effects of cell cycle and/or scale RNA matrix if it gives better clustering results\n", + "# scv.tl.score_genes_cell_cycle(adata_rna)\n", + "# sc.pp.regress_out(adata_rna, ['S_score', 'G2M_score’])\n", + "# sc.pp.scale(adata_rna)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15b6ad99", + "metadata": {}, + "outputs": [], + "source": [ + "scv.pp.moments(adata_rna, n_pcs=..., n_neighbors=...)\n", + "scv.tl.umap(adata_rna) # compute UMAP embedding\n", + "sc.tl.leiden(adata_rna) # compute clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9832db8c", + "metadata": {}, + "outputs": [], + "source": [ + "# Identify cell types\n", + "new_cluster_names = [...]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5723c019", + "metadata": {}, + "outputs": [], + "source": [ + "adata_rna.rename_categories('leiden', new_cluster_names) # annotate clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d64eb4c", + "metadata": {}, + "outputs": [], + "source": [ + "scv.pl.umap(adata_rna, color='leiden')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abcd1405", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize ATAC and subset for the same set of cells and genes\n", + "mv.tfidf_norm(adata_atac)\n", + "adata_atac = adata_atac[adata_rna.obs_names, adata_rna.var_names]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f7344e0", + "metadata": {}, + "outputs": [], + "source": [ + "# Write out filtered cells and prepare to run Seurat WNN\n", + "adata_rna.obs_names.to_frame().to_csv('filtered_cells.txt', header=False, index=False) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16b729ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Run Seurat WNN (R script can be found on GitHub)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e404bc3b", + "metadata": {}, + "outputs": [], + "source": [ + "# Back in python, load the neighbors\n", + "nn_idx = np.loadtxt(\"seurat_wnn/nn_idx.txt\", delimiter=',')\n", + "nn_dist = np.loadtxt(\"seurat_wnn/nn_dist.txt\", delimiter=',')\n", + "nn_cells = pd.Index(pd.read_csv(\"seurat_wnn/nn_cells.txt\", header=None)[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45a754ae", + "metadata": {}, + "outputs": [], + "source": [ + "np.all(nn_cells == adata_atac.obs_names) # make sure cell names match" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2eb4ee30", + "metadata": {}, + "outputs": [], + "source": [ + "# WNN smooth the gene aggregated ATAC matrix, resulting in a new Mc matrix in adata_atac.layers\n", + "mv.knn_smooth_chrom(adata_atac, nn_idx, nn_dist) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55a6024c", + "metadata": {}, + "outputs": [], + "source": [ + "# Run MultiVelo main function\n", + "adata_result = mv.recover_dynamics_chrom(adata_rna,\n", + " adata_atac,\n", + " max_iter=5, # coordinate-descent like optimization\n", + " init_mode=\"invert\", # simple, invert, or grid\n", + " verbose=False,\n", + " parallel=True,\n", + " save_plot=False,\n", + " rna_only=False,\n", + " fit=True,\n", + " n_anchors=500,\n", + " extra_color_key='leiden' # used if save_plot=True\n", + " )\n", + "# Full argument list can be shown with help(mv.recover_dynamics_chrom)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbec0729", + "metadata": {}, + "outputs": [], + "source": [ + "adata_result.write(\"multivelo_result.h5ad\") # save the result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50c18538", + "metadata": {}, + "outputs": [], + "source": [ + "# adata_result = sc.read_h5ad('multivelo_result.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2e7dc3d", + "metadata": {}, + "outputs": [], + "source": [ + "mv.pie_summary(adata_result) # gene type chart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b05cb9", + "metadata": {}, + "outputs": [], + "source": [ + "mv.switch_time_summary(adata_result) # switch time statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6814aba", + "metadata": {}, + "outputs": [], + "source": [ + "mv.likelihood_plot(adata_result) # likelihood and model parameter statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "829dd9bf", + "metadata": {}, + "outputs": [], + "source": [ + "mv.velocity_graph(adata_result)\n", + "mv.latent_time(adata_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00d47fa4", + "metadata": {}, + "outputs": [], + "source": [ + "mv.velocity_embedding_stream(adata_result, basis='umap', color='leiden') # velocity streams" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "140c08c3", + "metadata": {}, + "outputs": [], + "source": [ + "scv.pl.scatter(adata_result, color='latent_time', color_map='gnuplot', size=80) # latent time prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee0eca77", + "metadata": {}, + "outputs": [], + "source": [ + "# Some genes of interest\n", + "gene_list = [...]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c5ba7a8", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot accessbility and expression against gene time or global latent time\n", + "mv.dynamic_plot(adata_result, gene_list, color_by='state', gene_time=True, axis_on=False, frame_on=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2ce7023", + "metadata": {}, + "outputs": [], + "source": [ + "# Phase portraits on the u-s, c-u, or 3-dimensional planes can be plotted\n", + "mv.scatter_plot(adata_result, gene_list, color_by='leiden', by='us', axis_on=False, frame_on=False) " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": "48" + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}