Switch to side-by-side view

--- a
+++ b/singlecellmultiomics/molecule/fourthiouridine.py
@@ -0,0 +1,77 @@
+from singlecellmultiomics.molecule import Molecule
+
+
+class FourThiouridine(Molecule):
+    def __init__(self, fragments=None, classifier=None, **kwargs):
+        """ FourThiouridine Molecule class
+
+        Args:
+            fragments(list) :  list of fragments to associate with the Molecule
+            classifier : fitted sklearn classifier, when supplied this classifier is used to obtain a consensus from which the methylation calls are generated.
+
+        """
+        Molecule.__init__(self, fragments=fragments, **kwargs)
+        self.classifier = classifier
+        self.gene = None
+
+    def __finalise__(self):
+        super().__finalise__()
+        self.obtain_conversions(self.classifier)
+
+        for frag in self:
+            if frag.gene is not None:
+                self.gene = frag.gene
+
+    def is_valid(self, set_rejection_reasons=False):
+        if not super().is_valid(set_rejection_reasons=set_rejection_reasons):
+            return False
+
+        try:
+            consensus = self.get_consensus()
+        except ValueError:
+            if set_rejection_reasons:
+                self.set_rejection_reason('no_consensus')
+            return False
+        except TypeError:
+            if set_rejection_reasons:
+                self.set_rejection_reason('getPairGenomicLocations_failed')
+            return False
+
+        return True
+
+    def obtain_conversions(self, classifier=None):
+        """ This methods obtains the amount of converted bases and stores them to self.converted_bases and the 4U tag
+            Args:
+                classifier : classifier used for consensus determination
+            returns:
+                None
+        """
+
+        # Find all aligned positions and corresponding reference bases:
+        aligned_reference_positions = {}  # (chrom,pos)->base
+        for read in self.iter_reads():
+            for read_pos, ref_pos, ref_base in read.get_aligned_pairs(
+                    with_seq=True, matches_only=True):
+                aligned_reference_positions[(
+                    read.reference_name, ref_pos)] = ref_base.upper()
+
+        # Obtain consensus:
+        try:
+            consensus = self.get_consensus(classifier=classifier)
+        except ValueError:
+            raise ValueError(
+                'Cannot obtain a safe consensus for this molecule')
+
+        # look for T > C conversions
+        self.converted_bases = 0
+        conversions = {}
+
+        for location, reference_base in aligned_reference_positions.items():
+            if location not in consensus:
+                continue
+            if (not self.strand and reference_base == 'T' and consensus[location] == 'C') or \
+                    self.strand and reference_base == 'A' and consensus[location] in 'G':
+                conversions[location] = {
+                    'ref': reference_base, 'obs': consensus[location]}
+                self.converted_bases += 1
+        self.set_meta('4U', self.converted_bases)