[0ad989]: / Examples / MultiVelo_Template.ipynb

Download this file

477 lines (476 with data), 12.4 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "60d222bd",
   "metadata": {},
   "source": [
    "# MultiVelo Template\n",
    "\n",
    "This is an example of basic workflow for 10X Cell Ranger ARC 2.0 output.\n",
    "```\n",
    ".\n",
    "|-- MultiVelo_Template.ipynb\n",
    "|-- outs\n",
    "|   |-- analysis\n",
    "|   |   `-- feature_linkage\n",
    "|   |       `-- feature_linkage.bedpe\n",
    "|   |-- filtered_feature_bc_matrix\n",
    "|   |   |-- barcodes.tsv.gz\n",
    "|   |   |-- features.tsv.gz\n",
    "|   |   `-- matrix.mtx.gz\n",
    "|   `-- atac_peak_annotation.tsv\n",
    "|-- seurat_wnn\n",
    "|   |-- nn_cells.txt\n",
    "|   |-- nn_dist.txt\n",
    "|   `-- nn_idx.txt\n",
    "`-- velocyto\n",
    "    `-- gex_possorted_bam_XXXXX.loom\n",
    "```\n",
    "Please replace ... with appropriate values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53937c83",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import scipy\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scanpy as sc\n",
    "import scvelo as scv\n",
    "import multivelo as mv\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b412d727",
   "metadata": {},
   "outputs": [],
   "source": [
    "scv.settings.verbosity = 3\n",
    "scv.settings.presenter_view = True\n",
    "scv.set_figure_params('scvelo')\n",
    "pd.set_option('display.max_columns', 100)\n",
    "pd.set_option('display.max_rows', 200)\n",
    "np.set_printoptions(suppress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32217761",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read in RNA and filter\n",
    "adata_rna = scv.read('velocyto/gex_possorted_bam_XXXXX.loom', cache=True)\n",
    "adata_rna.obs_names = [x.split(':')[1][:-1] + '-1' for x in adata_rna.obs_names]\n",
    "adata_rna.var_names_make_unique()\n",
    "sc.pp.filter_cells(adata_rna, min_counts=...)\n",
    "sc.pp.filter_cells(adata_rna, max_counts=...)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a114e43",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read in ATAC, gene aggregate, and filter\n",
    "adata_atac = sc.read_10x_mtx('outs/filtered_feature_bc_matrix/', var_names='gene_symbols', cache=True, gex_only=False)\n",
    "adata_atac = adata_atac[:,adata_atac.var['feature_types'] == \"Peaks\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9104058",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Aggregate peaks around each gene as well as those that have high correlations with promoter peak or gene expression\n",
    "adata_atac = mv.aggregate_peaks_10x(adata_atac, \n",
    "                                    'outs/atac_peak_annotation.tsv', \n",
    "                                    'outs/analysis/feature_linkage/feature_linkage.bedpe', \n",
    "                                    verbose=True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e0dd29f",
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.filter_cells(adata_atac, min_counts=...)\n",
    "sc.pp.filter_cells(adata_atac, max_counts=...)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6d6fb09",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Find shared cells and genes between RNA and ATAC\n",
    "shared_cells = pd.Index(np.intersect1d(adata_rna.obs_names, adata_atac.obs_names))\n",
    "shared_genes = pd.Index(np.intersect1d(adata_rna.var_names, adata_atac.var_names))\n",
    "len(shared_cells), len(shared_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73b3bf76",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata_rna = adata_rna[shared_cells, shared_genes]\n",
    "adata_atac = adata_atac[shared_cells, shared_genes]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9471abe5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Normalize RNA\n",
    "scv.pp.filter_and_normalize(adata_rna, min_shared_counts=..., n_top_genes=...)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9702c6f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Optionally, regress out the effects of cell cycle and/or scale RNA matrix if it gives better clustering results\n",
    "# scv.tl.score_genes_cell_cycle(adata_rna)\n",
    "# sc.pp.regress_out(adata_rna, ['S_score', 'G2M_score’])\n",
    "# sc.pp.scale(adata_rna)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15b6ad99",
   "metadata": {},
   "outputs": [],
   "source": [
    "scv.pp.moments(adata_rna, n_pcs=..., n_neighbors=...)\n",
    "scv.tl.umap(adata_rna) # compute UMAP embedding\n",
    "sc.tl.leiden(adata_rna) # compute clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9832db8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Identify cell types\n",
    "new_cluster_names = [...]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5723c019",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata_rna.rename_categories('leiden', new_cluster_names) # annotate clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d64eb4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "scv.pl.umap(adata_rna, color='leiden')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abcd1405",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Normalize ATAC and subset for the same set of cells and genes\n",
    "mv.tfidf_norm(adata_atac)\n",
    "adata_atac = adata_atac[adata_rna.obs_names, adata_rna.var_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f7344e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Write out filtered cells and prepare to run Seurat WNN\n",
    "adata_rna.obs_names.to_frame().to_csv('filtered_cells.txt', header=False, index=False) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16b729ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run Seurat WNN (R script can be found on GitHub)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e404bc3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Back in python, load the neighbors\n",
    "nn_idx = np.loadtxt(\"seurat_wnn/nn_idx.txt\", delimiter=',')\n",
    "nn_dist = np.loadtxt(\"seurat_wnn/nn_dist.txt\", delimiter=',')\n",
    "nn_cells = pd.Index(pd.read_csv(\"seurat_wnn/nn_cells.txt\", header=None)[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45a754ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.all(nn_cells == adata_atac.obs_names) # make sure cell names match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2eb4ee30",
   "metadata": {},
   "outputs": [],
   "source": [
    "# WNN smooth the gene aggregated ATAC matrix, resulting in a new Mc matrix in adata_atac.layers\n",
    "mv.knn_smooth_chrom(adata_atac, nn_idx, nn_dist) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55a6024c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run MultiVelo main function\n",
    "adata_result = mv.recover_dynamics_chrom(adata_rna,\n",
    "                                         adata_atac,\n",
    "                                         max_iter=5, # coordinate-descent like optimization\n",
    "                                         init_mode=\"invert\", # simple, invert, or grid\n",
    "                                         verbose=False,\n",
    "                                         parallel=True,\n",
    "                                         save_plot=False,\n",
    "                                         rna_only=False,\n",
    "                                         fit=True,\n",
    "                                         n_anchors=500,\n",
    "                                         extra_color_key='leiden' # used if save_plot=True\n",
    "                                        )\n",
    "# Full argument list can be shown with help(mv.recover_dynamics_chrom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbec0729",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata_result.write(\"multivelo_result.h5ad\") # save the result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50c18538",
   "metadata": {},
   "outputs": [],
   "source": [
    "# adata_result = sc.read_h5ad('multivelo_result.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2e7dc3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "mv.pie_summary(adata_result) # gene type chart"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39b05cb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "mv.switch_time_summary(adata_result) # switch time statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6814aba",
   "metadata": {},
   "outputs": [],
   "source": [
    "mv.likelihood_plot(adata_result) # likelihood and model parameter statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "829dd9bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "mv.velocity_graph(adata_result)\n",
    "mv.latent_time(adata_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00d47fa4",
   "metadata": {},
   "outputs": [],
   "source": [
    "mv.velocity_embedding_stream(adata_result, basis='umap', color='leiden') # velocity streams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "140c08c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "scv.pl.scatter(adata_result, color='latent_time', color_map='gnuplot', size=80) # latent time prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee0eca77",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Some genes of interest\n",
    "gene_list = [...]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c5ba7a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot accessbility and expression against gene time or global latent time\n",
    "mv.dynamic_plot(adata_result, gene_list, color_by='state', gene_time=True, axis_on=False, frame_on=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2ce7023",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Phase portraits on the u-s, c-u, or 3-dimensional planes can be plotted\n",
    "mv.scatter_plot(adata_result, gene_list, color_by='leiden', by='us', axis_on=False, frame_on=False) "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": "48"
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}