a b/singlecellmultiomics/molecule/scartrace.py
1
from singlecellmultiomics.molecule.molecule import Molecule
2
from singlecellmultiomics.utils.sequtils import phred_to_prob
3
import numpy as np
4
5
class ScarTraceMolecule(Molecule):
6
    """ScarTrace Molecule class
7
8
    Args:
9
        fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
10
11
        **kwargs: extra args
12
13
    """
14
15
    def __init__(self, fragment,
16
                 **kwargs):
17
        Molecule.__init__(self, fragment, **kwargs)
18
19
        # Run code to obtain scar description
20
        assert len(self) == 1, 'only implemented for molecules with a single associated fragment'
21
22
        scarDescription = set()
23
        qualities = []
24
        for read in self.iter_reads():
25
            if read is None:
26
                continue
27
            if read.is_unmapped:
28
                continue
29
            firstCigarOperation, firstCigarLen = read.cigartuples[0]
30
            insertPos = 0
31
            lastNonNoneRefPos = read.reference_start if firstCigarOperation != 4 else read.reference_start - firstCigarLen
32
33
            expandedCigar = []
34
            for cigarOperation, cigarLen in read.cigartuples:
35
                expandedCigar += [cigarOperation] * cigarLen
36
37
            for queryPos, referencePos in read.get_aligned_pairs(
38
                    matches_only=False):
39
                if queryPos is not None:
40
                    qualities.append(phred_to_prob(read.qual[queryPos]))
41
42
                if queryPos is None and referencePos is None:
43
                    continue
44
45
                if referencePos is not None:
46
                    lastNonNoneRefPos = referencePos
47
                    insertPos = 0
48
                    if queryPos is not None:  # If both the reference and query match, there is not scar information
49
                        continue
50
51
                if queryPos is None:
52
                    scarDescription.add(f'{referencePos}.D')
53
                elif referencePos is None:  # insert or clip:
54
                    operation = expandedCigar[queryPos]
55
                    if operation == 1:
56
                        if lastNonNoneRefPos is None:
57
                            raise ValueError('Unsolvable :(')
58
                        queryBase = read.seq[queryPos]
59
                        scarDescription.add(
60
                            f'{queryBase}.{insertPos+lastNonNoneRefPos}.I')
61
                        insertPos += 1
62
63
        scarDescription = ','.join(sorted(list(scarDescription)))
64
65
        # Add average base calling quality excluding primers:
66
        meanQ = np.mean(qualities) if len(qualities)>0 else 0
67
        for read in self.iter_reads():
68
            read.set_tag('SQ', 1 - meanQ)
69
            if len(scarDescription) == 0:
70
                scarDescription = 'WT'
71
            #@todo when collapsing into molecule:
72
            read.set_tag('SD', scarDescription)
73
            #@todo when collapsing into molecule:
74
        if self[0].get_R1() is not None:
75
            self.set_meta('DS', self[0].get_R1().reference_start)
76
            self.set_meta('RZ', 'SCAR')
77
            self.set_meta('DT', 'SCAR')
78
79
80
    def write_tags(self):
81
        Molecule.write_tags(self)
82
83
    def is_valid(self, set_rejection_reasons=False, reference=None):
84
        if not any( fragment.is_valid() for fragment in self ):
85
            self.set_rejection_reason('all_fragments_rejected')
86
            return False
87
88
        return True