|
a |
|
b/singlecellmultiomics/molecule/fourthiouridine.py |
|
|
1 |
from singlecellmultiomics.molecule import Molecule |
|
|
2 |
|
|
|
3 |
|
|
|
4 |
class FourThiouridine(Molecule): |
|
|
5 |
def __init__(self, fragments=None, classifier=None, **kwargs): |
|
|
6 |
""" FourThiouridine Molecule class |
|
|
7 |
|
|
|
8 |
Args: |
|
|
9 |
fragments(list) : list of fragments to associate with the Molecule |
|
|
10 |
classifier : fitted sklearn classifier, when supplied this classifier is used to obtain a consensus from which the methylation calls are generated. |
|
|
11 |
|
|
|
12 |
""" |
|
|
13 |
Molecule.__init__(self, fragments=fragments, **kwargs) |
|
|
14 |
self.classifier = classifier |
|
|
15 |
self.gene = None |
|
|
16 |
|
|
|
17 |
def __finalise__(self): |
|
|
18 |
super().__finalise__() |
|
|
19 |
self.obtain_conversions(self.classifier) |
|
|
20 |
|
|
|
21 |
for frag in self: |
|
|
22 |
if frag.gene is not None: |
|
|
23 |
self.gene = frag.gene |
|
|
24 |
|
|
|
25 |
def is_valid(self, set_rejection_reasons=False): |
|
|
26 |
if not super().is_valid(set_rejection_reasons=set_rejection_reasons): |
|
|
27 |
return False |
|
|
28 |
|
|
|
29 |
try: |
|
|
30 |
consensus = self.get_consensus() |
|
|
31 |
except ValueError: |
|
|
32 |
if set_rejection_reasons: |
|
|
33 |
self.set_rejection_reason('no_consensus') |
|
|
34 |
return False |
|
|
35 |
except TypeError: |
|
|
36 |
if set_rejection_reasons: |
|
|
37 |
self.set_rejection_reason('getPairGenomicLocations_failed') |
|
|
38 |
return False |
|
|
39 |
|
|
|
40 |
return True |
|
|
41 |
|
|
|
42 |
def obtain_conversions(self, classifier=None): |
|
|
43 |
""" This methods obtains the amount of converted bases and stores them to self.converted_bases and the 4U tag |
|
|
44 |
Args: |
|
|
45 |
classifier : classifier used for consensus determination |
|
|
46 |
returns: |
|
|
47 |
None |
|
|
48 |
""" |
|
|
49 |
|
|
|
50 |
# Find all aligned positions and corresponding reference bases: |
|
|
51 |
aligned_reference_positions = {} # (chrom,pos)->base |
|
|
52 |
for read in self.iter_reads(): |
|
|
53 |
for read_pos, ref_pos, ref_base in read.get_aligned_pairs( |
|
|
54 |
with_seq=True, matches_only=True): |
|
|
55 |
aligned_reference_positions[( |
|
|
56 |
read.reference_name, ref_pos)] = ref_base.upper() |
|
|
57 |
|
|
|
58 |
# Obtain consensus: |
|
|
59 |
try: |
|
|
60 |
consensus = self.get_consensus(classifier=classifier) |
|
|
61 |
except ValueError: |
|
|
62 |
raise ValueError( |
|
|
63 |
'Cannot obtain a safe consensus for this molecule') |
|
|
64 |
|
|
|
65 |
# look for T > C conversions |
|
|
66 |
self.converted_bases = 0 |
|
|
67 |
conversions = {} |
|
|
68 |
|
|
|
69 |
for location, reference_base in aligned_reference_positions.items(): |
|
|
70 |
if location not in consensus: |
|
|
71 |
continue |
|
|
72 |
if (not self.strand and reference_base == 'T' and consensus[location] == 'C') or \ |
|
|
73 |
self.strand and reference_base == 'A' and consensus[location] in 'G': |
|
|
74 |
conversions[location] = { |
|
|
75 |
'ref': reference_base, 'obs': consensus[location]} |
|
|
76 |
self.converted_bases += 1 |
|
|
77 |
self.set_meta('4U', self.converted_bases) |