Switch to unified view

a b/singlecellmultiomics/bamProcessing/bamToCountTable.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
import singlecellmultiomics.modularDemultiplexer
4
import os
5
import sys
6
import pysam
7
import collections
8
import argparse
9
import pandas as pd
10
import numpy as np
11
import itertools
12
import singlecellmultiomics.modularDemultiplexer.baseDemultiplexMethods
13
import gzip  # for loading blacklist bedfiles
14
TagDefinitions = singlecellmultiomics.modularDemultiplexer.baseDemultiplexMethods.TagDefinitions
15
16
17
def coordinate_to_sliding_bin_locations(dp, bin_size, sliding_increment):
18
    """
19
    Convert a single value to a list of overlapping bins
20
21
    Parameters
22
    ----------
23
    point : int
24
        coordinate to look up
25
26
    bin_size : int
27
        bin size
28
29
    sliding_increment : int
30
        sliding window offset, this is the increment between bins
31
32
    Returns
33
    -------
34
    start : int
35
        the start coordinate of the first overlapping bin
36
    end :int
37
        the end of the last overlapping bin
38
39
    start_id : int
40
        the index of the first overlapping bin
41
    end_id : int
42
        the index of the last overlapping bin
43
44
    """
45
    start_id = int(np.ceil(((dp - bin_size) / sliding_increment)))
46
    start = start_id * sliding_increment
47
    end_id = int(np.floor(dp / sliding_increment))
48
    end = end_id * sliding_increment + bin_size
49
    return start, end, start_id, end_id
50
51
52
def coordinate_to_bins(point, bin_size, sliding_increment):
53
    """
54
    Convert a single value to a list of overlapping bins
55
56
    Parameters
57
    ----------
58
    point : int
59
        coordinate to look up
60
61
    bin_size : int
62
        bin size
63
64
    sliding_increment : int
65
        sliding window offset, this is the increment between bins
66
67
    Returns
68
    -------
69
    list: [(bin_start,bin_end), .. ]
70
71
    """
72
    start, end, start_id, end_id = coordinate_to_sliding_bin_locations(
73
        point, bin_size, sliding_increment)
74
    return [(i * sliding_increment, i * sliding_increment + bin_size)
75
            for i in range(start_id, end_id + 1)]
76
77
78
def read_has_alternative_hits_to_non_alts(read):
79
    if read.has_tag('XA'):
80
        for alt_align in read.get_tag('XA').split(';'):
81
            if len(alt_align) == 0:  # Sometimes this tag is empty for some reason
82
                continue
83
84
            hchrom, hpos, hcigar, hflag = alt_align.split(',')
85
            if not hchrom.endswith('_alt'):
86
                return True
87
    return False
88
89
90
def readTag(read, tag, defective='None'):
91
    try:
92
        value = singlecellmultiomics.modularDemultiplexer.metaFromRead(
93
            read, tag)
94
    except Exception as e:
95
        value = defective
96
    return value
97
98
# define if a read should be used
99
100
101
def read_should_be_counted(read, args, blacklist_dic = None):
102
    """
103
    Check if a read should be counted given the filter arguments
104
105
    Parameters
106
    ----------
107
    read : pysam.AlignedSegment or None
108
        read to check if it should be counted
109
110
    Returns
111
    -------
112
    bool
113
    """
114
115
    if args.r1only and read.is_read2:
116
        return False
117
    if args.r2only and read.is_read1:
118
        return False
119
120
    if args.filterMP:
121
        if not read.has_tag('mp'):
122
            return False
123
124
        if read.get_tag('mp')!='unique':
125
            return False
126
127
    if read is None or read.is_qcfail:
128
        return False
129
130
    # Mapping quality below threshold
131
    if read.mapping_quality < args.minMQ:
132
        return False
133
134
135
    if args.proper_pairs_only and not read.is_proper_pair:
136
        return False
137
138
    if args.no_indels and ('I' in read.cigarstring or 'D' in read.cigarstring):
139
        return False
140
141
    if args.max_base_edits is not None and read.has_tag('NM') and int(read.get_tag('NM'))>args.max_base_edits:
142
        return False
143
144
    if args.no_softclips and 'S' in read.cigarstring:
145
        return False
146
147
    # Read has alternative hits
148
    if args.filterXA:
149
        if read_has_alternative_hits_to_non_alts(read):
150
            return False
151
152
    # Read is a duplicate
153
    # (args.dedup and ( not read.has_tag('RC') or (read.has_tag('RC') and read.get_tag('RC')!=1))):
154
    if read.is_unmapped or (args.dedup and (
155
            read.has_tag("RR") or read.is_duplicate)):
156
        return False
157
158
    # Read is in blacklist
159
    if blacklist_dic is not None:
160
        if read.reference_name in blacklist_dic:
161
            # iterate through tuples of startend to check
162
            for startend in blacklist_dic[read.reference_name]:
163
                # start is 0-based inclusive, end is 0-based exclusive
164
                start_bad = read.reference_start >= startend[0] and read.reference_start < startend[1]
165
                end_bad = read.reference_end >= startend[0] and read.reference_end < startend[1]
166
                if start_bad or end_bad:
167
                    return False
168
169
    return True
170
171
172
def tagToHumanName(tag, TagDefinitions):
173
    if tag not in TagDefinitions:
174
        return tag
175
    return TagDefinitions[tag].humanName
176
177
178
def assignReads(
179
        read,
180
        countTable,
181
        args,
182
        joinFeatures,
183
        featureTags,
184
        sampleTags,
185
        more_args=[],
186
        blacklist_dic = None):
187
188
    assigned = 0
189
    if not read_should_be_counted(read, args, blacklist_dic):
190
        return assigned
191
192
    # Get the sample to which this read belongs
193
    sample = tuple(readTag(read, tag) for tag in sampleTags)
194
195
    # Decide how many counts this read yields
196
    countToAdd = 1
197
198
    if args.r1only or args.r2only:
199
        countToAdd = 1
200
    elif not args.doNotDivideFragments: # not not = True
201
        # IF the read is paired, and the mate mapped, we should count 0.5, and will end up
202
        # with 1 in total
203
204
        countToAdd = (0.5 if (read.is_paired and not read.mate_is_unmapped) else 1)
205
206
    assigned += 1
207
208
    if args.divideMultimapping:
209
        if read.has_tag('XA'):
210
            countToAdd = countToAdd / len(read.get_tag('XA').split(';'))
211
        elif read.has_tag('NH'):
212
            countToAdd = countToAdd / int(read.get_tag('NH'))
213
        else:
214
            countToAdd = countToAdd
215
216
    # Define what counts to add to what samples
217
    # [ (sample, feature, increment), .. ]
218
    count_increment = []
219
220
    feature_dict = {}
221
    joined_feature = []
222
    used_features = []
223
    for tag in featureTags:
224
        feat = str(readTag(read, tag))
225
        feature_dict[tag] = feat
226
        if args.bin is not None and args.binTag == tag:
227
            continue
228
        if args.byValue is not None and tag == args.byValue:
229
            continue
230
        joined_feature.append(feat)
231
        used_features.append(tag)
232
233
    if joinFeatures:
234
        if args.splitFeatures:
235
            # Obtain all key states:
236
            key_states = []
237
            for tag in featureTags:
238
                value = feature_dict.get(tag)
239
                key_states.append(value.split(args.featureDelimiter))
240
241
            for state in itertools.product(*key_states):
242
                joined_feature = []
243
                feature_dict = {
244
                    feature: value
245
                    for feature, value in zip(featureTags, state)
246
                }
247
                for feature, value in zip(featureTags, state):
248
                    if args.bin is not None and args.binTag == feature:
249
                        continue
250
251
                    if len(value) > 0:
252
                        joined_feature.append(value)
253
                    else:
254
                        joined_feature.append('None')
255
256
                if args.byValue is not None:
257
                    raise NotImplementedError(
258
                        'By value is not implemented for --splitFeatures')
259
260
                else:
261
                    count_increment.append({
262
                        'key': tuple(joined_feature),
263
                        'features': feature_dict,
264
                        'samples': [sample],
265
                        'increment': countToAdd})
266
                    joined_feature[0]
267
        else:
268
            if args.byValue is not None:
269
270
                try:
271
                    add = float(feature_dict.get(args.byValue, 0))
272
                except ValueError:
273
                    add = 0
274
275
                count_increment.append({
276
                    'key': tuple(joined_feature),
277
                    'features': feature_dict,
278
                    'samples': [sample],
279
                    'increment': add})
280
281
            else:
282
                count_increment.append({
283
                    'key': tuple(joined_feature),
284
                    'features': feature_dict,
285
                    'samples': [sample],
286
                    'increment': countToAdd})
287
    else:
288
        if args.bin is not None:
289
            raise NotImplementedError('Try using -joinedFeatureTags')
290
291
        for feature, value in feature_dict.items():
292
            if args.byValue is not None and feature == args.byValue:
293
294
                try:
295
                    add = float(feature_dict.get(args.byValue, 0))
296
                except ValueError:
297
                    add = 0
298
299
                count_increment.append({
300
                    'key': tuple(joined_feature),
301
                    'features': feature_dict,
302
                    'samples': [sample],
303
                    'increment': add})
304
305
            elif args.splitFeatures:
306
                for f in value.split(args.featureDelimiter):
307
                    count_increment.append({
308
                        'key': (f),
309
                        'features': {feature: f},
310
                        'samples': [sample],
311
                        'increment': countToAdd
312
                    })
313
314
            else:
315
                count_increment.append({
316
                    'key': (value, ),
317
                    'features': {feature: value},
318
                    'samples': [sample],
319
                    'increment': countToAdd})
320
321
    """
322
    Now we have a list of dicts:
323
    {
324
    'key':feature,
325
    'features':{feature:value},
326
    'samples':[sample],
327
    'increment':countToAdd})
328
    }
329
    """
330
331
    # increment the count table accordingly:
332
    if args.bin is not None:
333
        for dtable in count_increment:
334
            key = dtable['key']
335
            countToAdd = dtable['increment']
336
            samples = dtable['samples']
337
            value_to_be_binned = dtable['features'].get(args.binTag, None)
338
339
            if value_to_be_binned is None or value_to_be_binned == 'None':
340
                continue
341
342
            for start, end in coordinate_to_bins(
343
                    int(value_to_be_binned), args.bin, args.sliding):
344
                # Reject bins outside boundary
345
                if not args.keepOverBounds and (
346
                        start < 0 or end > args.ref_lengths[read.reference_name]):
347
                    continue
348
                for sample in samples:
349
                    countTable[sample][tuple(
350
                        list(key) + [start, end])] += countToAdd
351
352
    elif args.bedfile is not None:
353
354
        for dtable in count_increment:
355
            if args.byValue:
356
                key = [args.byValue]
357
            else:
358
                key = dtable['key']
359
            countToAdd = dtable['increment']
360
            samples = dtable['samples']
361
362
            # Get features from bedfile
363
            start, end, bname = more_args[0], more_args[1], more_args[2]
364
            jfeat = tuple(list(key) + [start, end, bname])
365
            if len(key):
366
                countTable[sample][jfeat] += countToAdd
367
            # else: this will also emit non assigned reads
368
            #    countTable[sample][ 'None' ] += countToAdd
369
370
    else:
371
        for dtable in count_increment:
372
            key = dtable['key']
373
            countToAdd = dtable['increment']
374
            samples = dtable['samples']
375
376
            for sample in samples:
377
                if len(key) == 1:
378
                    countTable[sample][key[0]] += countToAdd
379
                else:
380
                    countTable[sample][key] += countToAdd
381
382
    return assigned
383
384
385
def create_count_table(args, return_df=False):
386
387
    if len(args.alignmentfiles) == 0:
388
        raise ValueError("Supply at least one bam file")
389
    if args.bedfile is not None:
390
        assert(os.path.isfile(args.bedfile))
391
392
    if args.sliding is None:
393
        args.sliding = args.bin
394
395
    if not return_df and args.o is None and args.alignmentfiles is not None:
396
        args.showtags = True
397
398
    if args.showtags:
399
        # Find which tags are available in the file:
400
        head = 1000
401
        tagObs = collections.Counter()
402
        for bamFile in args.alignmentfiles:
403
            with pysam.AlignmentFile(bamFile) as f:
404
                for i, read in enumerate(f):
405
                    tagObs += collections.Counter([k for k,
406
                                                   v in read.get_tags(with_value_type=False)])
407
                    if i == (head - 1):
408
                        break
409
        import colorama
410
411
        print(
412
            f'{colorama.Style.BRIGHT}Tags seen in the supplied bam file(s):{colorama.Style.RESET_ALL}')
413
        for observedTag in tagObs:
414
            tag = observedTag
415
            if observedTag in TagDefinitions:
416
                t = TagDefinitions[observedTag]
417
                humanName = t.humanName
418
                isPhred = t.isPhred
419
            else:
420
                t = observedTag
421
                isPhred = False
422
                humanName = f'{colorama.Style.RESET_ALL}<No information available>'
423
424
            allReadsHaveTag = (tagObs[tag] == head)
425
            color = colorama.Fore.GREEN if allReadsHaveTag else colorama.Fore.YELLOW
426
            print(
427
                f'{color}{ colorama.Style.BRIGHT}{tag}{colorama.Style.RESET_ALL}\t{color+humanName}\t{colorama.Style.DIM}{"PHRED" if isPhred else ""}{colorama.Style.RESET_ALL}' +
428
                f'\t{"" if allReadsHaveTag else "Not all reads have this tag"}')
429
430
        print(f'{colorama.Style.BRIGHT}\nAVO lab tag list:{colorama.Style.RESET_ALL}')
431
        for tag, t in TagDefinitions.items():
432
            print(f'{colorama.Style.BRIGHT}{tag}{colorama.Style.RESET_ALL}\t{t.humanName}\t{colorama.Style.DIM}{"PHRED" if t.isPhred else ""}{colorama.Style.RESET_ALL}')
433
        exit()
434
435
    if args.o is None and not return_df:
436
        raise ValueError('Supply an output file')
437
438
    if args.alignmentfiles is None:
439
        raise ValueError('Supply alignment (BAM) files')
440
441
    joinFeatures = False
442
    featureTags = []
443
    if args.featureTags is not None:
444
        featureTags = args.featureTags.split(',')
445
446
    if args.joinedFeatureTags is not None:
447
        featureTags = args.joinedFeatureTags.split(',')
448
        joinFeatures = True
449
450
        # When -byValue is used, and joined feature tags are supplied, automatically append the args.byValue tag, otherwise it will get lost
451
        if args.byValue is not None and len(featureTags)>0 and args.byValue not in featureTags:
452
            featureTags.append(args.byValue)
453
454
455
    if args.bin is not None and args.binTag not in featureTags:
456
        print("The bin tag was not supplied as feature, automatically appending the bin feature.")
457
        featureTags.append(args.binTag)
458
459
    if len(featureTags) == 0:
460
        raise ValueError(
461
            "No features supplied! Please supply -featureTags -joinedFeatureTags and or -binTag")
462
463
    sampleTags = args.sampleTags.split(',')
464
    countTable = collections.defaultdict(
465
        collections.Counter)  # cell->feature->count
466
467
    if args.blacklist is not None:
468
        # create blacklist dictionary {chromosome : [ (start1, end1), ..., (startN, endN) ]}
469
        # used to check each read and exclude if it is within any of these start end sites
470
        #
471
        blacklist_dic = {}
472
        print("Creating blacklist dictionary:")
473
        with open(args.blacklist, mode='r') as blfile:
474
            for row in blfile:
475
                parts = row.strip().split()
476
                chromo, start, end = parts[0], int(parts[1]), int(parts[2])
477
                if chromo not in blacklist_dic:
478
                    # init chromo
479
                    blacklist_dic[chromo] = []  # list of tuples
480
                blacklist_dic[chromo].append( (start, end) )
481
        print(blacklist_dic)
482
    else:
483
        blacklist_dic = None
484
485
    assigned = 0
486
    for bamFile in args.alignmentfiles:
487
488
        with pysam.AlignmentFile(bamFile) as f:
489
            i = 0  # make sure i is defined
490
            if args.bin:
491
                # Obtain the reference sequence lengths
492
                ref_lengths = {
493
                    r: f.get_reference_length(r) for r in f.references}
494
                args.ref_lengths = ref_lengths
495
            if args.bedfile is None:
496
                # for adding counts associated with a tag OR with binning
497
                if args.contig is not None:
498
                    pysam_iterator = f.fetch(args.contig)
499
                else:
500
                    pysam_iterator = f
501
502
                for i, read in enumerate(pysam_iterator):
503
                    if i % 1_000_000 == 0:
504
                        print(
505
                            f"{bamFile} Processed {i} reads, assigned {assigned}, completion:{100*(i/(0.001+f.mapped+f.unmapped+f.nocoordinate))}%")
506
507
                    if args.head is not None and i > args.head:
508
                        break
509
510
                    assigned += assignReads(read,
511
                                            countTable,
512
                                            args,
513
                                            joinFeatures,
514
                                            featureTags,
515
                                            sampleTags,
516
                                            blacklist_dic = blacklist_dic)
517
            else:  # args.bedfile is not None
518
                # for adding counts associated with a bedfile
519
                with open(args.bedfile, "r") as bfile:
520
                    #breader = csv.reader(bfile, delimiter = "\t")
521
                    for row in bfile:
522
523
                        parts = row.strip().split()
524
                        chromo, start, end, bname = parts[0], int(float(
525
                            parts[1])), int(float(parts[2])), parts[3]
526
                        if args.contig is not None and chromo != args.contig:
527
                            continue
528
                        for i, read in enumerate(f.fetch(chromo, start, end)):
529
                            if i % 1_000_000 == 0:
530
                                print(
531
                                    f"{bamFile} Processed {i} reads, assigned {assigned}, completion:{100*(i/(0.001+f.mapped+f.unmapped+f.nocoordinate))}%")
532
                            assigned += assignReads(read,
533
                                                    countTable,
534
                                                    args,
535
                                                    joinFeatures,
536
                                                    featureTags,
537
                                                    sampleTags,
538
                                                    more_args=[start,
539
                                                               end,
540
                                                               bname],
541
                                                    blacklist_dic = blacklist_dic)
542
543
                            if args.head is not None and i > args.head:
544
                                break
545
546
            print(
547
                f"Finished: {bamFile} Processed {i} reads, assigned {assigned}")
548
    print(f"Finished counting, now exporting to {args.o}")
549
    df = pd.DataFrame.from_dict(countTable)
550
551
    # Set names of indices
552
    if not args.noNames:
553
        df.columns.set_names([tagToHumanName(t, TagDefinitions)
554
                              for t in sampleTags], inplace=True)
555
556
        try:
557
            if args.bin is not None:
558
                index_names = [tagToHumanName(
559
                    t, TagDefinitions) for t in featureTags if t != args.binTag] + ['start', 'end']
560
                df.index.set_names(index_names, inplace=True)
561
            elif args.bedfile is not None:
562
                index_names = [tagToHumanName(
563
                    t, TagDefinitions) for t in featureTags if t != args.binTag] + ['start', 'end', 'bname']
564
                df.index.set_names(index_names, inplace=True)
565
            elif joinFeatures:
566
                index_names = [
567
                    tagToHumanName(
568
                        t, TagDefinitions) for t in featureTags]
569
                df.index.set_names(index_names, inplace=True)
570
            else:
571
                index_names = ','.join(
572
                    [tagToHumanName(t, TagDefinitions) for t in featureTags])
573
                df.index.set_names(index_names, inplace=True)
574
        except Exception as e:
575
            pass
576
        print(index_names)
577
578
    if return_df:
579
        return df
580
581
    if args.bulk:
582
        sum_row = df.sum(axis=1)
583
        df = pd.DataFrame(sum_row)
584
        df.columns = ["Bulkseq"]
585
586
    if args.o.endswith('.pickle') or args.o.endswith('.pickle.gz'):
587
        df.to_pickle(args.o)
588
    else:
589
        df.to_csv(args.o)
590
    return args.o
591
    print("Finished export.")
592
593
594
if __name__ == '__main__':
595
    argparser = argparse.ArgumentParser(
596
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
597
        description='Convert BAM file(s) which has been annotated using featureCounts or otherwise to a count matrix')
598
    argparser.add_argument(
599
        '-o',
600
        type=str,
601
        help="output csv path, or pandas dataframe if path ends with pickle.gz",
602
        required=False)
603
    argparser.add_argument(
604
        '-featureTags',
605
        type=str,
606
        default=None,
607
        help='Tag(s) used for defining the COLUMNS of the matrix. Single dimension.')
608
    argparser.add_argument(
609
        '-joinedFeatureTags',
610
        type=str,
611
        default=None,
612
        help='These define the COLUMNS of your matrix. For example if you want allele (DA) and restriction site (DS) use DA,DS. If you want a column containing the chromosome mapped to use "chrom" as feature. Use this argument if you want to use a multidimensional index')
613
    argparser.add_argument(
614
        '-sampleTags',
615
        type=str,
616
        default='SM',
617
        help='Comma separated tags defining the names of the ROWS in the output matrix')
618
    argparser.add_argument('alignmentfiles', type=str, nargs='*')
619
    argparser.add_argument(
620
        '-head',
621
        type=int,
622
        help='Run the algorithm only on the first N reads to check if the result looks like what you expect.')
623
    
624
625
    argparser.add_argument(
626
        '--bulk',
627
        action='store_true',
628
        help='Include this when making a count table for bulkseq data. This operation will sum the counts of all sampleTags into a single column.')
629
    argparser.add_argument(
630
        '--r1only',
631
        action='store_true',
632
        help='Only count R1')
633
634
    argparser.add_argument(
635
        '--r2only',
636
        action='store_true',
637
        help='Only count R2')
638
639
    argparser.add_argument(
640
        '--splitFeatures',
641
        action='store_true',
642
        help='Split features by , . For example if a read has a feature Foo,Bar increase counts for both Foo and Bar')
643
    argparser.add_argument('-featureDelimiter', type=str, default=',')
644
    argparser.add_argument(
645
        '-contig',
646
        type=str,
647
        help='Run only on this chromosome')
648
649
    multimapping_args = argparser.add_argument_group('Multimapping', '')
650
    multimapping_args.add_argument(
651
        '--divideMultimapping',
652
        action='store_true',
653
        help='Divide multimapping reads over all targets. Requires the XA or NH tag to be set.')
654
    multimapping_args.add_argument(
655
        '--doNotDivideFragments',
656
        action='store_true',
657
        help='When used every read is counted once, a fragment will count as two reads. 0.5 otherwise')
658
659
660
    binning_args = argparser.add_argument_group('Binning', '')
661
    #binning_args.add_argument('-offset', type=int, default=0, help="Add offset to bin. If bin=1000, offset=200, f=1999 -> 1200. f=4199 -> 3200")
662
    binning_args.add_argument(
663
        '-sliding',
664
        type=int,
665
        help="make bins overlapping, the stepsize is equal to the supplied value here. If nothing is supplied this value equals the bin size")
666
    binning_args.add_argument(
667
        '--keepOverBounds',
668
        action='store_true',
669
        help="Keep bins which go over chromsome bounds (start<0) end > chromsome length")
670
671
    binning_args.add_argument(
672
        '-bin',
673
        type=int,
674
        help="Devide and floor to bin features. If bin=1000, f=1999 -> 1000.")
675
    # binning_args.add_argument('--showBinEnd', action='store_true', help="If
676
    # True, then show DS column as 120000-220000, otherwise 120000 only. This
677
    # specifies the bin range in which the read was counted" ) this is now
678
    # always on!
679
    binning_args.add_argument('-binTag', default='DS')
680
    binning_args.add_argument(
681
        '-byValue',
682
        type=str,
683
        help='Extract the value from the supplied tag and use this as count to add')
684
    binning_args.add_argument('-blacklist', metavar='BED FILE BLACKLIST TO FILTER OUT', help = 'Bedfile of blacklist regions to exclude', default=None)
685
    bed_args = argparser.add_argument_group('Bedfiles', '')
686
    bed_args.add_argument(
687
        '-bedfile',
688
        type=str,
689
        help="Bed file containing 3 columns, chromo, start, end to be read for fetching counts")
690
691
692
693
    filters = argparser.add_argument_group('Filters', '')
694
695
    filters.add_argument(
696
        '--proper_pairs_only',
697
        action='store_true',
698
        help='Only count reads mapped in a proper pair (within the expected insert size range)')
699
700
    filters.add_argument(
701
        '--no_softclips',
702
        action='store_true',
703
        help='Only count reads without softclips')
704
705
    filters.add_argument(
706
        '-max_base_edits',
707
        type=int,
708
        help='Count reads with at most this value of bases being different than the reference')
709
710
711
    filters.add_argument(
712
        '--no_indels',
713
        action='store_true',
714
        help='Only count reads without indels')
715
716
    filters.add_argument(
717
        '--dedup',
718
        action='store_true',
719
        help='Count only the first occurence of a molecule. Requires RC tag to be set. Reads without RC tag will be ignored!')
720
721
    filters.add_argument(
722
        '-minMQ',
723
        type=int,
724
        default=0,
725
        help="minimum mapping quality")
726
727
    filters.add_argument(
728
        '--filterMP',
729
        action= 'store_true',
730
        help="Filter reads which are not uniquely mappable, this is based on presence on the `mp` tag, which can be generated by bamtagmultiome.py ")
731
732
    filters.add_argument(
733
        '--filterXA',
734
        action='store_true',
735
        help="Do not count reads where the XA (alternative hits) tag has been set for a non-alternative locus.")
736
737
    argparser.add_argument(
738
        '--noNames',
739
        action='store_true',
740
        help='Do not set names of the index and columns, this simplifies the resulting CSV/pickle file')
741
    argparser.add_argument(
742
        '--showtags',
743
        action='store_true',
744
        help='Show a list of commonly used tags')
745
746
    args = argparser.parse_args()
747
    create_count_table(args)