|
a |
|
b/singlecellmultiomics/bamProcessing/scmoConvert.py |
|
|
1 |
#!/usr/bin/env python |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
import pysam |
|
|
4 |
import argparse |
|
|
5 |
|
|
|
6 |
def convert_scmo_to_cellranger(scmo_bam, cellranger_bam, n_threads=4): |
|
|
7 |
""" |
|
|
8 |
Convert a SCMO bam file to a cellranger bam file |
|
|
9 |
Adds these tags: |
|
|
10 |
BC to CB |
|
|
11 |
RX to UB |
|
|
12 |
""" |
|
|
13 |
|
|
|
14 |
with pysam.AlignmentFile(scmo_bam,threads=n_threads) as al: |
|
|
15 |
with pysam.AlignmentFile(cellranger_bam, 'wb', header=al.header, threads=n_threads) as ao: |
|
|
16 |
for read in al: |
|
|
17 |
if read.has_tag('BC'): |
|
|
18 |
read.set_tag('CB',read.get_tag('BC')) |
|
|
19 |
if read.has_tag('RX'): |
|
|
20 |
read.set_tag('UB',read.get_tag('RX')) |
|
|
21 |
|
|
|
22 |
ao.write(read) |
|
|
23 |
|
|
|
24 |
pysam.index(cellranger_bam) |
|
|
25 |
|
|
|
26 |
|
|
|
27 |
def convert_scmo_to_dropseq(scmo_bam, dropseq_bam, n_threads=4): |
|
|
28 |
""" |
|
|
29 |
Convert a SCMO bam file to a dropseq bam file |
|
|
30 |
Adds these tags: |
|
|
31 |
BC to XC |
|
|
32 |
RX to XM |
|
|
33 |
""" |
|
|
34 |
with pysam.AlignmentFile(scmo_bam,threads=n_threads) as al: |
|
|
35 |
with pysam.AlignmentFile(dropseq_bam, 'wb', header=al.header, threads=n_threads) as ao: |
|
|
36 |
for read in al: |
|
|
37 |
if read.has_tag('BC'): |
|
|
38 |
read.set_tag('XC',read.get_tag('BC')) |
|
|
39 |
if read.has_tag('RX'): |
|
|
40 |
read.set_tag('XM',read.get_tag('RX')) |
|
|
41 |
ao.write(read) |
|
|
42 |
|
|
|
43 |
pysam.index(dropseq_bam) |
|
|
44 |
|
|
|
45 |
if __name__ == '__main__': |
|
|
46 |
argparser = argparse.ArgumentParser( |
|
|
47 |
formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
|
|
48 |
description="""Convert SCMO bam file to cellranger or dropseq bam file |
|
|
49 |
""") |
|
|
50 |
argparser.add_argument('inputbamfile', type=str) |
|
|
51 |
argparser.add_argument('convertedbamfile', type=str) |
|
|
52 |
|
|
|
53 |
og = argparser.add_argument_group("Output") |
|
|
54 |
#og.add_argument('-bed', type=str, help='Bed file to write methylation calls to') |
|
|
55 |
og.add_argument('-fmt', type=str, help='Format to convert to cellranger/dropseq', required=True) |
|
|
56 |
|
|
|
57 |
args = argparser.parse_args() |
|
|
58 |
|
|
|
59 |
if args.fmt == 'cellranger': |
|
|
60 |
convert_scmo_to_cellranger(args.inputbamfile, args.convertedbamfile) |
|
|
61 |
elif args.fmt == 'dropseq': |
|
|
62 |
convert_scmo_to_dropseq(args.inputbamfile, args.convertedbamfile) |