a b/singlecellmultiomics/molecule/fourthiouridine.py
1
from singlecellmultiomics.molecule import Molecule
2
3
4
class FourThiouridine(Molecule):
5
    def __init__(self, fragments=None, classifier=None, **kwargs):
6
        """ FourThiouridine Molecule class
7
8
        Args:
9
            fragments(list) :  list of fragments to associate with the Molecule
10
            classifier : fitted sklearn classifier, when supplied this classifier is used to obtain a consensus from which the methylation calls are generated.
11
12
        """
13
        Molecule.__init__(self, fragments=fragments, **kwargs)
14
        self.classifier = classifier
15
        self.gene = None
16
17
    def __finalise__(self):
18
        super().__finalise__()
19
        self.obtain_conversions(self.classifier)
20
21
        for frag in self:
22
            if frag.gene is not None:
23
                self.gene = frag.gene
24
25
    def is_valid(self, set_rejection_reasons=False):
26
        if not super().is_valid(set_rejection_reasons=set_rejection_reasons):
27
            return False
28
29
        try:
30
            consensus = self.get_consensus()
31
        except ValueError:
32
            if set_rejection_reasons:
33
                self.set_rejection_reason('no_consensus')
34
            return False
35
        except TypeError:
36
            if set_rejection_reasons:
37
                self.set_rejection_reason('getPairGenomicLocations_failed')
38
            return False
39
40
        return True
41
42
    def obtain_conversions(self, classifier=None):
43
        """ This methods obtains the amount of converted bases and stores them to self.converted_bases and the 4U tag
44
            Args:
45
                classifier : classifier used for consensus determination
46
            returns:
47
                None
48
        """
49
50
        # Find all aligned positions and corresponding reference bases:
51
        aligned_reference_positions = {}  # (chrom,pos)->base
52
        for read in self.iter_reads():
53
            for read_pos, ref_pos, ref_base in read.get_aligned_pairs(
54
                    with_seq=True, matches_only=True):
55
                aligned_reference_positions[(
56
                    read.reference_name, ref_pos)] = ref_base.upper()
57
58
        # Obtain consensus:
59
        try:
60
            consensus = self.get_consensus(classifier=classifier)
61
        except ValueError:
62
            raise ValueError(
63
                'Cannot obtain a safe consensus for this molecule')
64
65
        # look for T > C conversions
66
        self.converted_bases = 0
67
        conversions = {}
68
69
        for location, reference_base in aligned_reference_positions.items():
70
            if location not in consensus:
71
                continue
72
            if (not self.strand and reference_base == 'T' and consensus[location] == 'C') or \
73
                    self.strand and reference_base == 'A' and consensus[location] in 'G':
74
                conversions[location] = {
75
                    'ref': reference_base, 'obs': consensus[location]}
76
                self.converted_bases += 1
77
        self.set_meta('4U', self.converted_bases)