Switch to unified view

a b/Examples/MultiVelo_Template.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "id": "60d222bd",
6
   "metadata": {},
7
   "source": [
8
    "# MultiVelo Template\n",
9
    "\n",
10
    "This is an example of basic workflow for 10X Cell Ranger ARC 2.0 output.\n",
11
    "```\n",
12
    ".\n",
13
    "|-- MultiVelo_Template.ipynb\n",
14
    "|-- outs\n",
15
    "|   |-- analysis\n",
16
    "|   |   `-- feature_linkage\n",
17
    "|   |       `-- feature_linkage.bedpe\n",
18
    "|   |-- filtered_feature_bc_matrix\n",
19
    "|   |   |-- barcodes.tsv.gz\n",
20
    "|   |   |-- features.tsv.gz\n",
21
    "|   |   `-- matrix.mtx.gz\n",
22
    "|   `-- atac_peak_annotation.tsv\n",
23
    "|-- seurat_wnn\n",
24
    "|   |-- nn_cells.txt\n",
25
    "|   |-- nn_dist.txt\n",
26
    "|   `-- nn_idx.txt\n",
27
    "`-- velocyto\n",
28
    "    `-- gex_possorted_bam_XXXXX.loom\n",
29
    "```\n",
30
    "Please replace ... with appropriate values."
31
   ]
32
  },
33
  {
34
   "cell_type": "code",
35
   "execution_count": null,
36
   "id": "53937c83",
37
   "metadata": {},
38
   "outputs": [],
39
   "source": [
40
    "import os\n",
41
    "import scipy\n",
42
    "import numpy as np\n",
43
    "import pandas as pd\n",
44
    "import scanpy as sc\n",
45
    "import scvelo as scv\n",
46
    "import multivelo as mv\n",
47
    "import matplotlib.pyplot as plt"
48
   ]
49
  },
50
  {
51
   "cell_type": "code",
52
   "execution_count": null,
53
   "id": "b412d727",
54
   "metadata": {},
55
   "outputs": [],
56
   "source": [
57
    "scv.settings.verbosity = 3\n",
58
    "scv.settings.presenter_view = True\n",
59
    "scv.set_figure_params('scvelo')\n",
60
    "pd.set_option('display.max_columns', 100)\n",
61
    "pd.set_option('display.max_rows', 200)\n",
62
    "np.set_printoptions(suppress=True)"
63
   ]
64
  },
65
  {
66
   "cell_type": "code",
67
   "execution_count": null,
68
   "id": "32217761",
69
   "metadata": {},
70
   "outputs": [],
71
   "source": [
72
    "# Read in RNA and filter\n",
73
    "adata_rna = scv.read('velocyto/gex_possorted_bam_XXXXX.loom', cache=True)\n",
74
    "adata_rna.obs_names = [x.split(':')[1][:-1] + '-1' for x in adata_rna.obs_names]\n",
75
    "adata_rna.var_names_make_unique()\n",
76
    "sc.pp.filter_cells(adata_rna, min_counts=...)\n",
77
    "sc.pp.filter_cells(adata_rna, max_counts=...)"
78
   ]
79
  },
80
  {
81
   "cell_type": "code",
82
   "execution_count": null,
83
   "id": "0a114e43",
84
   "metadata": {},
85
   "outputs": [],
86
   "source": [
87
    "# Read in ATAC, gene aggregate, and filter\n",
88
    "adata_atac = sc.read_10x_mtx('outs/filtered_feature_bc_matrix/', var_names='gene_symbols', cache=True, gex_only=False)\n",
89
    "adata_atac = adata_atac[:,adata_atac.var['feature_types'] == \"Peaks\"]"
90
   ]
91
  },
92
  {
93
   "cell_type": "code",
94
   "execution_count": null,
95
   "id": "e9104058",
96
   "metadata": {},
97
   "outputs": [],
98
   "source": [
99
    "# Aggregate peaks around each gene as well as those that have high correlations with promoter peak or gene expression\n",
100
    "adata_atac = mv.aggregate_peaks_10x(adata_atac, \n",
101
    "                                    'outs/atac_peak_annotation.tsv', \n",
102
    "                                    'outs/analysis/feature_linkage/feature_linkage.bedpe', \n",
103
    "                                    verbose=True) "
104
   ]
105
  },
106
  {
107
   "cell_type": "code",
108
   "execution_count": null,
109
   "id": "6e0dd29f",
110
   "metadata": {},
111
   "outputs": [],
112
   "source": [
113
    "sc.pp.filter_cells(adata_atac, min_counts=...)\n",
114
    "sc.pp.filter_cells(adata_atac, max_counts=...)"
115
   ]
116
  },
117
  {
118
   "cell_type": "code",
119
   "execution_count": null,
120
   "id": "a6d6fb09",
121
   "metadata": {},
122
   "outputs": [],
123
   "source": [
124
    "# Find shared cells and genes between RNA and ATAC\n",
125
    "shared_cells = pd.Index(np.intersect1d(adata_rna.obs_names, adata_atac.obs_names))\n",
126
    "shared_genes = pd.Index(np.intersect1d(adata_rna.var_names, adata_atac.var_names))\n",
127
    "len(shared_cells), len(shared_genes)"
128
   ]
129
  },
130
  {
131
   "cell_type": "code",
132
   "execution_count": null,
133
   "id": "73b3bf76",
134
   "metadata": {},
135
   "outputs": [],
136
   "source": [
137
    "adata_rna = adata_rna[shared_cells, shared_genes]\n",
138
    "adata_atac = adata_atac[shared_cells, shared_genes]"
139
   ]
140
  },
141
  {
142
   "cell_type": "code",
143
   "execution_count": null,
144
   "id": "9471abe5",
145
   "metadata": {},
146
   "outputs": [],
147
   "source": [
148
    "# Normalize RNA\n",
149
    "scv.pp.filter_and_normalize(adata_rna, min_shared_counts=..., n_top_genes=...)"
150
   ]
151
  },
152
  {
153
   "cell_type": "code",
154
   "execution_count": null,
155
   "id": "9702c6f7",
156
   "metadata": {},
157
   "outputs": [],
158
   "source": [
159
    "# Optionally, regress out the effects of cell cycle and/or scale RNA matrix if it gives better clustering results\n",
160
    "# scv.tl.score_genes_cell_cycle(adata_rna)\n",
161
    "# sc.pp.regress_out(adata_rna, ['S_score', 'G2M_score’])\n",
162
    "# sc.pp.scale(adata_rna)"
163
   ]
164
  },
165
  {
166
   "cell_type": "code",
167
   "execution_count": null,
168
   "id": "15b6ad99",
169
   "metadata": {},
170
   "outputs": [],
171
   "source": [
172
    "scv.pp.moments(adata_rna, n_pcs=..., n_neighbors=...)\n",
173
    "scv.tl.umap(adata_rna) # compute UMAP embedding\n",
174
    "sc.tl.leiden(adata_rna) # compute clusters"
175
   ]
176
  },
177
  {
178
   "cell_type": "code",
179
   "execution_count": null,
180
   "id": "9832db8c",
181
   "metadata": {},
182
   "outputs": [],
183
   "source": [
184
    "# Identify cell types\n",
185
    "new_cluster_names = [...]"
186
   ]
187
  },
188
  {
189
   "cell_type": "code",
190
   "execution_count": null,
191
   "id": "5723c019",
192
   "metadata": {},
193
   "outputs": [],
194
   "source": [
195
    "adata_rna.rename_categories('leiden', new_cluster_names) # annotate clusters"
196
   ]
197
  },
198
  {
199
   "cell_type": "code",
200
   "execution_count": null,
201
   "id": "1d64eb4c",
202
   "metadata": {},
203
   "outputs": [],
204
   "source": [
205
    "scv.pl.umap(adata_rna, color='leiden')"
206
   ]
207
  },
208
  {
209
   "cell_type": "code",
210
   "execution_count": null,
211
   "id": "abcd1405",
212
   "metadata": {},
213
   "outputs": [],
214
   "source": [
215
    "# Normalize ATAC and subset for the same set of cells and genes\n",
216
    "mv.tfidf_norm(adata_atac)\n",
217
    "adata_atac = adata_atac[adata_rna.obs_names, adata_rna.var_names]"
218
   ]
219
  },
220
  {
221
   "cell_type": "code",
222
   "execution_count": null,
223
   "id": "9f7344e0",
224
   "metadata": {},
225
   "outputs": [],
226
   "source": [
227
    "# Write out filtered cells and prepare to run Seurat WNN\n",
228
    "adata_rna.obs_names.to_frame().to_csv('filtered_cells.txt', header=False, index=False) "
229
   ]
230
  },
231
  {
232
   "cell_type": "code",
233
   "execution_count": null,
234
   "id": "16b729ae",
235
   "metadata": {},
236
   "outputs": [],
237
   "source": [
238
    "# Run Seurat WNN (R script can be found on GitHub)"
239
   ]
240
  },
241
  {
242
   "cell_type": "code",
243
   "execution_count": null,
244
   "id": "e404bc3b",
245
   "metadata": {},
246
   "outputs": [],
247
   "source": [
248
    "# Back in python, load the neighbors\n",
249
    "nn_idx = np.loadtxt(\"seurat_wnn/nn_idx.txt\", delimiter=',')\n",
250
    "nn_dist = np.loadtxt(\"seurat_wnn/nn_dist.txt\", delimiter=',')\n",
251
    "nn_cells = pd.Index(pd.read_csv(\"seurat_wnn/nn_cells.txt\", header=None)[0])"
252
   ]
253
  },
254
  {
255
   "cell_type": "code",
256
   "execution_count": null,
257
   "id": "45a754ae",
258
   "metadata": {},
259
   "outputs": [],
260
   "source": [
261
    "np.all(nn_cells == adata_atac.obs_names) # make sure cell names match"
262
   ]
263
  },
264
  {
265
   "cell_type": "code",
266
   "execution_count": null,
267
   "id": "2eb4ee30",
268
   "metadata": {},
269
   "outputs": [],
270
   "source": [
271
    "# WNN smooth the gene aggregated ATAC matrix, resulting in a new Mc matrix in adata_atac.layers\n",
272
    "mv.knn_smooth_chrom(adata_atac, nn_idx, nn_dist) "
273
   ]
274
  },
275
  {
276
   "cell_type": "code",
277
   "execution_count": null,
278
   "id": "55a6024c",
279
   "metadata": {},
280
   "outputs": [],
281
   "source": [
282
    "# Run MultiVelo main function\n",
283
    "adata_result = mv.recover_dynamics_chrom(adata_rna,\n",
284
    "                                         adata_atac,\n",
285
    "                                         max_iter=5, # coordinate-descent like optimization\n",
286
    "                                         init_mode=\"invert\", # simple, invert, or grid\n",
287
    "                                         verbose=False,\n",
288
    "                                         parallel=True,\n",
289
    "                                         save_plot=False,\n",
290
    "                                         rna_only=False,\n",
291
    "                                         fit=True,\n",
292
    "                                         n_anchors=500,\n",
293
    "                                         extra_color_key='leiden' # used if save_plot=True\n",
294
    "                                        )\n",
295
    "# Full argument list can be shown with help(mv.recover_dynamics_chrom)"
296
   ]
297
  },
298
  {
299
   "cell_type": "code",
300
   "execution_count": null,
301
   "id": "bbec0729",
302
   "metadata": {},
303
   "outputs": [],
304
   "source": [
305
    "adata_result.write(\"multivelo_result.h5ad\") # save the result"
306
   ]
307
  },
308
  {
309
   "cell_type": "code",
310
   "execution_count": null,
311
   "id": "50c18538",
312
   "metadata": {},
313
   "outputs": [],
314
   "source": [
315
    "# adata_result = sc.read_h5ad('multivelo_result.h5ad')"
316
   ]
317
  },
318
  {
319
   "cell_type": "code",
320
   "execution_count": null,
321
   "id": "f2e7dc3d",
322
   "metadata": {},
323
   "outputs": [],
324
   "source": [
325
    "mv.pie_summary(adata_result) # gene type chart"
326
   ]
327
  },
328
  {
329
   "cell_type": "code",
330
   "execution_count": null,
331
   "id": "39b05cb9",
332
   "metadata": {},
333
   "outputs": [],
334
   "source": [
335
    "mv.switch_time_summary(adata_result) # switch time statistics"
336
   ]
337
  },
338
  {
339
   "cell_type": "code",
340
   "execution_count": null,
341
   "id": "f6814aba",
342
   "metadata": {},
343
   "outputs": [],
344
   "source": [
345
    "mv.likelihood_plot(adata_result) # likelihood and model parameter statistics"
346
   ]
347
  },
348
  {
349
   "cell_type": "code",
350
   "execution_count": null,
351
   "id": "829dd9bf",
352
   "metadata": {},
353
   "outputs": [],
354
   "source": [
355
    "mv.velocity_graph(adata_result)\n",
356
    "mv.latent_time(adata_result)"
357
   ]
358
  },
359
  {
360
   "cell_type": "code",
361
   "execution_count": null,
362
   "id": "00d47fa4",
363
   "metadata": {},
364
   "outputs": [],
365
   "source": [
366
    "mv.velocity_embedding_stream(adata_result, basis='umap', color='leiden') # velocity streams"
367
   ]
368
  },
369
  {
370
   "cell_type": "code",
371
   "execution_count": null,
372
   "id": "140c08c3",
373
   "metadata": {},
374
   "outputs": [],
375
   "source": [
376
    "scv.pl.scatter(adata_result, color='latent_time', color_map='gnuplot', size=80) # latent time prediction"
377
   ]
378
  },
379
  {
380
   "cell_type": "code",
381
   "execution_count": null,
382
   "id": "ee0eca77",
383
   "metadata": {},
384
   "outputs": [],
385
   "source": [
386
    "# Some genes of interest\n",
387
    "gene_list = [...]"
388
   ]
389
  },
390
  {
391
   "cell_type": "code",
392
   "execution_count": null,
393
   "id": "6c5ba7a8",
394
   "metadata": {},
395
   "outputs": [],
396
   "source": [
397
    "# Plot accessbility and expression against gene time or global latent time\n",
398
    "mv.dynamic_plot(adata_result, gene_list, color_by='state', gene_time=True, axis_on=False, frame_on=False)"
399
   ]
400
  },
401
  {
402
   "cell_type": "code",
403
   "execution_count": null,
404
   "id": "e2ce7023",
405
   "metadata": {},
406
   "outputs": [],
407
   "source": [
408
    "# Phase portraits on the u-s, c-u, or 3-dimensional planes can be plotted\n",
409
    "mv.scatter_plot(adata_result, gene_list, color_by='leiden', by='us', axis_on=False, frame_on=False) "
410
   ]
411
  }
412
 ],
413
 "metadata": {
414
  "kernelspec": {
415
   "display_name": "Python 3 (ipykernel)",
416
   "language": "python",
417
   "name": "python3"
418
  },
419
  "language_info": {
420
   "codemirror_mode": {
421
    "name": "ipython",
422
    "version": 3
423
   },
424
   "file_extension": ".py",
425
   "mimetype": "text/x-python",
426
   "name": "python",
427
   "nbconvert_exporter": "python",
428
   "pygments_lexer": "ipython3",
429
   "version": "3.9.13"
430
  },
431
  "toc": {
432
   "base_numbering": 1,
433
   "nav_menu": {},
434
   "number_sections": true,
435
   "sideBar": true,
436
   "skip_h1_title": false,
437
   "title_cell": "Table of Contents",
438
   "title_sidebar": "Contents",
439
   "toc_cell": false,
440
   "toc_position": {},
441
   "toc_section_display": true,
442
   "toc_window_display": false
443
  },
444
  "varInspector": {
445
   "cols": {
446
    "lenName": 16,
447
    "lenType": 16,
448
    "lenVar": "48"
449
   },
450
   "kernels_config": {
451
    "python": {
452
     "delete_cmd_postfix": "",
453
     "delete_cmd_prefix": "del ",
454
     "library": "var_list.py",
455
     "varRefreshCmd": "print(var_dic_list())"
456
    },
457
    "r": {
458
     "delete_cmd_postfix": ") ",
459
     "delete_cmd_prefix": "rm(",
460
     "library": "var_list.r",
461
     "varRefreshCmd": "cat(var_dic_list()) "
462
    }
463
   },
464
   "types_to_exclude": [
465
    "module",
466
    "function",
467
    "builtin_function_or_method",
468
    "instance",
469
    "_Feature"
470
   ],
471
   "window_display": false
472
  }
473
 },
474
 "nbformat": 4,
475
 "nbformat_minor": 5
476
}