Switch to unified view

a b/singlecellmultiomics/bamProcessing/pileup.py
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
from pysam.libcalignmentfile import IteratorColumnRegion
4
from collections import Counter
5
6
def pileup_truncated(bam,contig, start, stop,**kwargs):
7
    """
8
    Obtain Pysam columns only at selected region
9
10
    contig(str) : contig to perform pileup
11
    start(int) : coordinate of first colummn (zero based)
12
    stop(int) : coordinate of last column (exclusive)
13
    **kwargs : arguments to pass to pysam.AlignmentFile.IteratorColumn
14
    """
15
    has_coord, rtid, rstart, rstop = bam.parse_region(contig, start, stop )
16
    yield from IteratorColumnRegion(bam,
17
                                    tid=rtid,
18
                                    start=rstart,
19
                                    stop=rstop,truncate=True,**kwargs)
20
21
def has_variant_reads(pysam_alignment_file, chrom, pos, alt, min_reads=2, stepper='nofilter'):
22
    """
23
    Check if the alignment file contains evidence for the supplied base
24
25
    Args:
26
        pysam_alignment_file(pysam.AlignmentFile) : file to check location
27
28
        chrom(str): name of contig
29
30
        pos(int) : position to check (zero based)
31
32
        alt(str): base to check
33
    """
34
35
    obs = Counter()
36
    for pile in pileup_truncated(pysam_alignment_file,chrom,pos,pos+1,stepper=stepper):
37
        if pos!=pile.reference_pos:
38
            continue
39
        for read in pile.pileups:
40
            if not read.is_del and not read.is_refskip:
41
                obs[read.alignment.query_sequence[read.query_position]]+=1
42
    return obs[alt]>=min_reads
43
44
def mate_pileup(alignments, contig, position,**kwargs):
45
    """
46
    Extract all fragments (R1, and R2) which overlap with the supplied position
47
48
    Example:
49
        >>> alignments = pysam.AlignmentFile('example.bam')
50
        >>> piled_reads = dict()
51
        >>> obs = collections.Counter()
52
        >>> pile_pos = 8774
53
        >>> pile_contig = '1'
54
        >>> add_pile_mates(alignments, piled_reads, pile_contig, pile_pos, obs )
55
56
        [[<pysam.libcalignedsegment.AlignedSegment at 0x7f8a7eca4ac8>,
57
          <pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecc83a8>],
58
         [<pysam.libcalignedsegment.AlignedSegment at 0x7f8a7d10ce28>,
59
          <pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecc8468>],
60
         [<pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecc8348>,
61
          <pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecae4c8>],
62
         [<pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecc8948>,
63
          <pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecae588>],
64
         [<pysam.libcalignedsegment.AlignedSegment at 0x7f8a7ecc4ee8>, ...
65
66
    Args:
67
        alignments(pysam.AlignmentFile) : Alignment file to extract reads from
68
69
        contig(str) : contig to perform pileup
70
71
        position(int) : coordinate to perform pileup
72
73
    Returns:
74
        list
75
    """
76
77
    piled_reads = dict()
78
    _mate_pileup(alignments=alignments,
79
                 piled_reads=piled_reads,
80
                 contig=contig,
81
                 position=position, obs=None, add_missing_only=False,**kwargs)
82
    return list(piled_reads.values())
83
84
def _mate_pileup(alignments, piled_reads, contig, position, obs=None, add_missing_only=False, max_depth=50000, stepper='nofilter',ignore_overlaps=False,ignore_orphans=False):
85
    """
86
    Extract all fragments (R1, and R2) which overlap with the supplied position
87
88
    Args:
89
        alignments(pysam.AlignmentFile) : Alignment file to extract reads from
90
91
        piled_reads(dict) : (Empty) Dictionary to which the reads are written
92
93
        contig(str) : contig to perform pileup
94
95
        position(int) : coordinate to perform pileup
96
97
        obs(collections.Counter) : Store base observation frequencies at the location of the pileup in this dictionary
98
99
        add_missing_only(bool): Only add missing mates to the existing dictionary (piled_reads)
100
101
    """
102
    for pile in alignments.pileup(contig,position,position+1,stepper=stepper,ignore_overlaps=ignore_overlaps,ignore_orphans=ignore_orphans,max_depth=max_depth):
103
        if position!=pile.reference_pos:
104
            continue
105
106
        for read in pile.pileups:
107
108
            if not add_missing_only and (read.is_del or read.is_refskip):
109
                continue
110
111
            if read.alignment.is_supplementary:
112
                continue
113
114
            if add_missing_only and not read.alignment.qname in piled_reads:
115
                continue
116
117
            if not read.alignment.qname in piled_reads:
118
                piled_reads[read.alignment.qname] = [None, None]
119
120
            piled_reads[read.alignment.qname][read.alignment.is_read2] = read.alignment
121
122
            if obs is not None:
123
                obs[read.alignment.query_sequence[read.query_position]]+=1
124
125
    if not add_missing_only:
126
        find_missing_mates(alignments, piled_reads)
127
128
129
def find_missing_mates(alignments, piled_reads):
130
    """
131
    Obtain missing mates
132
133
    Args:
134
135
        piled_reads(dict) : { query_name:[R1, R2], ... }
136
137
    """
138
    added=True
139
    piled_locations = set()
140
    while added:
141
        added=False
142
        for qname, reads in piled_reads.items():
143
            R1, R2 = reads
144
145
            if R1 is None and not R2.mate_is_unmapped:
146
                # Find R1 ..
147
                if look_for_mate(alignments, R2, piled_reads, piled_locations):
148
                    added=True
149
                    break
150
151
            if R2 is None and not R1.mate_is_unmapped:
152
                # Find R2 ..
153
                if look_for_mate(alignments, R1, piled_reads, piled_locations):
154
                    added=True
155
                    break
156
157
158
def look_for_mate(alignments, read, piled_reads, piled_locations):
159
    """
160
161
    Given a read, find the mate and possibly other missing reads in the piled_reads dictionary.
162
         Skips coordinates in piled_locations set.
163
164
     Args:
165
        alignments(pysam.AlignmentFile) : Alignment file to extract reads from
166
167
        read(pysam.AlignedSegment) : read to find mate for
168
169
        piled_reads(dict) : Query dictionary  { query_name:[R1, R2], ... }
170
171
        piled_locations(set) : Locations which have already been searched
172
173
    Returns:
174
        has_searched(bool) : Boolean which indicates if the location of the mate was already present in piled_locations and the location was not checked again.
175
    """
176
    location_descriptor = (read.next_reference_name, read.next_reference_start)
177
    if location_descriptor in piled_locations:
178
        return False
179
180
    piled_locations.add( location_descriptor )
181
    _mate_pileup(
182
                    alignments,
183
                    piled_reads,
184
                    contig = location_descriptor[0],
185
                    position = location_descriptor[1],
186
                    add_missing_only=True
187
                )
188
189
    if piled_reads[read.qname ][ read.is_read1] is None:
190
        raise(ValueError( f'Failed to find {read.qname} at {location_descriptor}' ))
191
    return True