MultiVelo / Git / Diff of /Examples/MultiVelo

Models:

AlyssaS/

MultiVelo

Downloads: 1

Diff of /Examples/MultiVelo_Template.ipynb [000000] .. [5d6472]

Switch to unified view

 b/Examples/MultiVelo_Template.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "60d222bd",
+   "metadata": {},
+   "source": [
+    "# MultiVelo Template\n",
+    "\n",
+    "This is an example of basic workflow for 10X Cell Ranger ARC 2.0 output.\n",
+    "```\n",
+    ".\n",
+    "|-- MultiVelo_Template.ipynb\n",
+    "|-- outs\n",
+    "|   |-- analysis\n",
+    "|   |   `-- feature_linkage\n",
+    "|   |       `-- feature_linkage.bedpe\n",
+    "|   |-- filtered_feature_bc_matrix\n",
+    "|   |   |-- barcodes.tsv.gz\n",
+    "|   |   |-- features.tsv.gz\n",
+    "|   |   `-- matrix.mtx.gz\n",
+    "|   `-- atac_peak_annotation.tsv\n",
+    "|-- seurat_wnn\n",
+    "|   |-- nn_cells.txt\n",
+    "|   |-- nn_dist.txt\n",
+    "|   `-- nn_idx.txt\n",
+    "`-- velocyto\n",
+    "    `-- gex_possorted_bam_XXXXX.loom\n",
+    "```\n",
+    "Please replace ... with appropriate values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53937c83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import scipy\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scanpy as sc\n",
+    "import scvelo as scv\n",
+    "import multivelo as mv\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b412d727",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scv.settings.verbosity = 3\n",
+    "scv.settings.presenter_view = True\n",
+    "scv.set_figure_params('scvelo')\n",
+    "pd.set_option('display.max_columns', 100)\n",
+    "pd.set_option('display.max_rows', 200)\n",
+    "np.set_printoptions(suppress=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "32217761",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read in RNA and filter\n",
+    "adata_rna = scv.read('velocyto/gex_possorted_bam_XXXXX.loom', cache=True)\n",
+    "adata_rna.obs_names = [x.split(':')[1][:-1] + '-1' for x in adata_rna.obs_names]\n",
+    "adata_rna.var_names_make_unique()\n",
+    "sc.pp.filter_cells(adata_rna, min_counts=...)\n",
+    "sc.pp.filter_cells(adata_rna, max_counts=...)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a114e43",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read in ATAC, gene aggregate, and filter\n",
+    "adata_atac = sc.read_10x_mtx('outs/filtered_feature_bc_matrix/', var_names='gene_symbols', cache=True, gex_only=False)\n",
+    "adata_atac = adata_atac[:,adata_atac.var['feature_types'] == \"Peaks\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9104058",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate peaks around each gene as well as those that have high correlations with promoter peak or gene expression\n",
+    "adata_atac = mv.aggregate_peaks_10x(adata_atac, \n",
+    "                                    'outs/atac_peak_annotation.tsv', \n",
+    "                                    'outs/analysis/feature_linkage/feature_linkage.bedpe', \n",
+    "                                    verbose=True) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e0dd29f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sc.pp.filter_cells(adata_atac, min_counts=...)\n",
+    "sc.pp.filter_cells(adata_atac, max_counts=...)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6d6fb09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find shared cells and genes between RNA and ATAC\n",
+    "shared_cells = pd.Index(np.intersect1d(adata_rna.obs_names, adata_atac.obs_names))\n",
+    "shared_genes = pd.Index(np.intersect1d(adata_rna.var_names, adata_atac.var_names))\n",
+    "len(shared_cells), len(shared_genes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "73b3bf76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata_rna = adata_rna[shared_cells, shared_genes]\n",
+    "adata_atac = adata_atac[shared_cells, shared_genes]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9471abe5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize RNA\n",
+    "scv.pp.filter_and_normalize(adata_rna, min_shared_counts=..., n_top_genes=...)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9702c6f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optionally, regress out the effects of cell cycle and/or scale RNA matrix if it gives better clustering results\n",
+    "# scv.tl.score_genes_cell_cycle(adata_rna)\n",
+    "# sc.pp.regress_out(adata_rna, ['S_score', 'G2M_score’])\n",
+    "# sc.pp.scale(adata_rna)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15b6ad99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scv.pp.moments(adata_rna, n_pcs=..., n_neighbors=...)\n",
+    "scv.tl.umap(adata_rna) # compute UMAP embedding\n",
+    "sc.tl.leiden(adata_rna) # compute clusters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9832db8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Identify cell types\n",
+    "new_cluster_names = [...]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5723c019",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata_rna.rename_categories('leiden', new_cluster_names) # annotate clusters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1d64eb4c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scv.pl.umap(adata_rna, color='leiden')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abcd1405",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize ATAC and subset for the same set of cells and genes\n",
+    "mv.tfidf_norm(adata_atac)\n",
+    "adata_atac = adata_atac[adata_rna.obs_names, adata_rna.var_names]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f7344e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write out filtered cells and prepare to run Seurat WNN\n",
+    "adata_rna.obs_names.to_frame().to_csv('filtered_cells.txt', header=False, index=False) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16b729ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run Seurat WNN (R script can be found on GitHub)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e404bc3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Back in python, load the neighbors\n",
+    "nn_idx = np.loadtxt(\"seurat_wnn/nn_idx.txt\", delimiter=',')\n",
+    "nn_dist = np.loadtxt(\"seurat_wnn/nn_dist.txt\", delimiter=',')\n",
+    "nn_cells = pd.Index(pd.read_csv(\"seurat_wnn/nn_cells.txt\", header=None)[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45a754ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.all(nn_cells == adata_atac.obs_names) # make sure cell names match"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2eb4ee30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# WNN smooth the gene aggregated ATAC matrix, resulting in a new Mc matrix in adata_atac.layers\n",
+    "mv.knn_smooth_chrom(adata_atac, nn_idx, nn_dist) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55a6024c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run MultiVelo main function\n",
+    "adata_result = mv.recover_dynamics_chrom(adata_rna,\n",
+    "                                         adata_atac,\n",
+    "                                         max_iter=5, # coordinate-descent like optimization\n",
+    "                                         init_mode=\"invert\", # simple, invert, or grid\n",
+    "                                         verbose=False,\n",
+    "                                         parallel=True,\n",
+    "                                         save_plot=False,\n",
+    "                                         rna_only=False,\n",
+    "                                         fit=True,\n",
+    "                                         n_anchors=500,\n",
+    "                                         extra_color_key='leiden' # used if save_plot=True\n",
+    "                                        )\n",
+    "# Full argument list can be shown with help(mv.recover_dynamics_chrom)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbec0729",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata_result.write(\"multivelo_result.h5ad\") # save the result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50c18538",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# adata_result = sc.read_h5ad('multivelo_result.h5ad')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2e7dc3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mv.pie_summary(adata_result) # gene type chart"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39b05cb9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mv.switch_time_summary(adata_result) # switch time statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6814aba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mv.likelihood_plot(adata_result) # likelihood and model parameter statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "829dd9bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mv.velocity_graph(adata_result)\n",
+    "mv.latent_time(adata_result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "00d47fa4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mv.velocity_embedding_stream(adata_result, basis='umap', color='leiden') # velocity streams"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "140c08c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scv.pl.scatter(adata_result, color='latent_time', color_map='gnuplot', size=80) # latent time prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee0eca77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Some genes of interest\n",
+    "gene_list = [...]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c5ba7a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot accessbility and expression against gene time or global latent time\n",
+    "mv.dynamic_plot(adata_result, gene_list, color_by='state', gene_time=True, axis_on=False, frame_on=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2ce7023",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Phase portraits on the u-s, c-u, or 3-dimensional planes can be plotted\n",
+    "mv.scatter_plot(adata_result, gene_list, color_by='leiden', by='us', axis_on=False, frame_on=False) "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": "48"
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}