Switch to unified view

a b/singlecellmultiomics/molecule/chic.py
1
from singlecellmultiomics.molecule.molecule import Molecule
2
from singlecellmultiomics.molecule.featureannotatedmolecule import FeatureAnnotatedMolecule
3
import collections
4
5
class CHICMolecule(Molecule):
6
    """CHIC Molecule class
7
8
    Args:
9
        fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
10
        **kwargs: extra args
11
12
    """
13
14
    def __init__(self, fragment,
15
                 **kwargs):
16
17
        self.site_location = None
18
        Molecule.__init__(self, fragment, **kwargs)
19
        self.ligation_motif = None
20
21
        # Extracted from fragments:
22
        self.assignment_radius=None
23
24
25
26
    def _add_fragment(self,fragment):
27
        # Update the cut coordinate tho the (left most extreme value)
28
        self.assignment_radius =fragment.assignment_radius
29
30
        if fragment.site_location is not None:
31
32
            if self.site_location is None:
33
                self.site_location = [fragment.site_location[0],fragment.site_location[1]]
34
35
            elif fragment.strand: # Reverse:
36
                self.site_location[1] = max(fragment.site_location[1], self.site_location[1]) # this is the coordinate
37
            else:
38
                self.site_location[1] = min(fragment.site_location[1], self.site_location[1]) # this is the coordinate
39
40
        # else : writing a fragment which has no cut location associated
41
42
        Molecule._add_fragment(self, fragment)
43
44
45
    def get_cut_site(self):
46
        """For restriction based protocol data, obtain genomic location of cut site
47
48
        Returns:
49
            None if site is not available
50
51
            chromosome (str)
52
            position (int)
53
            strand (bool)
54
        """
55
        return (*self.site_location, self.strand)
56
57
    def write_tags(self):
58
59
        # Write DS tag when it could have been updated
60
        if self.assignment_radius is not None and self.assignment_radius>0 and len(self)>1:
61
            for frag in self:
62
                frag.set_meta('DS', self.site_location[1])
63
64
        Molecule.write_tags(self)
65
66
    def is_valid(self, set_rejection_reasons=False):
67
68
        try:
69
            chrom, start, strand = self.get_cut_site()
70
        except Exception as e:
71
            if set_rejection_reasons:
72
                self.set_rejection_reason(f'no_cut_site_found')
73
            return False
74
75
        return True
76
77
    def get_fragment_span_sequence(self, reference=None):
78
        """Obtain the sequence between the start and end of the molecule
79
80
        Args:
81
            reference(pysam.FastaFile) : reference  to use.
82
                If not specified `self.reference` is used
83
84
        Returns:
85
            sequence (str)
86
        """
87
        if reference is None:
88
            if self.reference is None:
89
                raise ValueError('Please supply a reference (PySAM.FastaFile)')
90
        reference = self.reference
91
        return reference.fetch(
92
            self.chromosome,
93
            self.spanStart,
94
            self.spanEnd).upper()
95
96
    def __finalise__(self):
97
        # Obtain ligation motif(s)
98
        self.update_ligation_motif()
99
        Molecule.__finalise__(self)
100
101
    def update_ligation_motif(self):
102
        """
103
        Extract lh tag from associated reads and set most common one to the RZ tag
104
        """
105
        ligation_motif_counts = collections.Counter()
106
        for fragment in self:
107
            if fragment.ligation_motif is not None:
108
                ligation_motif_counts[fragment.ligation_motif]+=1
109
        if len(ligation_motif_counts)>0:
110
            self.ligation_motif = ligation_motif_counts.most_common(1)[0][0]
111
            self.set_meta('RZ', self.ligation_motif)
112
113
114
class CHICNLAMolecule(Molecule):
115
    """CHIC NLA Molecule class
116
117
    Args:
118
        fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
119
        **kwargs: extra args
120
121
    """
122
    def get_upstream_site(self, scan_extra_bp=3):
123
        read = None
124
        for fragment in self:
125
            if fragment[0] is not None:
126
                read = fragment[0]
127
                break
128
129
        if read is None:
130
            raise ValueError() # Read 1 is not mapped
131
132
        if self.strand==0:
133
            start = read.reference_start
134
            return self.reference.fetch( self.chromosome, start-4-scan_extra_bp, start)
135
        else:
136
            start = read.reference_end
137
            return self.reference.fetch( self.chromosome, start, start+4+scan_extra_bp)
138
139
140
    def __init__(self, fragment, reference, **kwargs):
141
        self.reference = reference
142
        CHICMolecule.__init__(self, fragment, reference=reference, **kwargs)
143
144
    def write_tags(self):
145
        try:
146
            site = self.get_upstream_site()
147
            self.set_meta('RZ',site)
148
            if 'N' in site:
149
                self.set_meta('dt','UNK')
150
            elif 'CATG' in site:
151
                self.set_meta('dt','NLA')
152
            else:
153
                self.set_meta('dt','CHIC')
154
        except ValueError:
155
            self.set_meta('dt','UNK')
156
            self.set_meta('RR','No_R1')
157
158
159
160
161
class AnnotatedCHICMolecule(CHICMolecule, FeatureAnnotatedMolecule):
162
    """Chic Molecule which is annotated with features (genes/exons/introns, .. )
163
164
    Args:
165
        fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
166
        features (singlecellmultiomics.features.FeatureContainer) : container to use to obtain features from
167
        **kwargs: extra args
168
169
    """
170
171
    def write_tags(self):
172
        CHICMolecule.write_tags(self)
173
        FeatureAnnotatedMolecule.write_tags(self)
174
175
    def __init__(self, fragment, features, **kwargs):
176
        CHICMolecule.__init__(self, fragment, **kwargs)
177
        FeatureAnnotatedMolecule.__init__(self, fragment, features, **kwargs)