Diff of /tests/test_fragment.py [000000] .. [2c420a]

Switch to side-by-side view

--- a
+++ b/tests/test_fragment.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import unittest
+import singlecellmultiomics.molecule
+import singlecellmultiomics.fragment
+import pysam
+import pysamiterators.iterators
+import os
+from singlecellmultiomics.fragment import Fragment, CHICFragment
+from singlecellmultiomics.utils import create_MD_tag
+from singlecellmultiomics.utils import complement
+"""
+These tests check if the Molecule module is working correctly
+"""
+class TestTAPs(unittest.TestCase):
+
+
+    def test_random_priming(self):
+        temp_folder = 'data'
+
+        enable_ref_write=False
+        ref_path = f'{temp_folder}/chic_ref.fa'
+        alignments_path = f'{temp_folder}/chic_test_alignments.bam'
+
+        if not os.path.exists(temp_folder):
+            os.makedirs(temp_folder)
+        # Create reference bam file
+
+        refseq = 'TTAATCATGAAACCGTGGAGGCAAATCGGAGTGTAAGGCTTGACTGGATTCCTACGTTGCGTAGGTTCATGGGGGG'
+        if enable_ref_write:
+            with open(ref_path, 'w') as f:
+                f.write(f">chr1\n{refseq}\n>chr2\n{complement(refseq)}\n""")
+
+            # This command needs to finish, which is not working properly during testing
+            pysam.faidx(ref_path)
+
+        # CATG at base 5
+        # Create BAM file with NLA fragment:
+
+        def get_reads():
+            with pysam.AlignmentFile(alignments_path_unsorted,'wb',reference_names=['chr1'],reference_lengths=[len(refseq)]) as bam:
+
+
+                ### Nla III mate pair example, containing 2 CpGs and 1 call on the wrong strand
+                read_A = pysam.AlignedSegment(bam.header)
+                read_A.reference_name = 'chr1'
+                read_A.reference_start = 5
+                # Before last A is a bogus G>A conversion to test strandness:
+                read_A.query_sequence = 'CATGAAACCGTGGAGGCAAATTGGAGTAT'
+                read_A.cigarstring = f'{len(read_A.query_sequence)}M'
+                read_A.qual = 'A'*len(read_A.query_sequence)
+                read_A.mapping_quality = 60
+                read_A.query_name = 'EX1_GA_CONV_2x_CpG_TAPS'
+                read_A.set_tag('SM', 'Cell_A')
+                read_A.is_read1 = True
+                read_A.is_read2 = False
+                read_A.set_tag('lh','TG')
+                # Set substitution tag:
+                read_A.set_tag('MD',
+                               create_MD_tag(
+                                       refseq[read_A.reference_start:read_A.reference_end], read_A.query_sequence))
+                read_A.is_paired = True
+                read_A.is_proper_pair = True
+
+                # Create a second read which is a mate of the previous
+                read_B = pysam.AlignedSegment(bam.header)
+                read_B.reference_name = 'chr1'
+                read_B.reference_start = 25
+                read_B.query_sequence = refseq[25:60].replace('TGT','TAT').replace('CG', 'TG')
+                read_B.cigarstring = f'{len(read_B.query_sequence)}M'
+                read_B.qual = 'A'*len(read_B.query_sequence)
+                read_B.mapping_quality = 60
+                read_B.is_read2 = True
+                read_B.is_read1 = False
+                read_B.is_reverse = True
+                read_B.query_name = 'EX1_GA_CONV_2x_CpG_TAPS'
+                read_B.set_tag('SM', 'Cell_A')
+                read_B.set_tag('lh','TG')
+                read_B.set_tag('MD',
+                           create_MD_tag(refseq[read_B.reference_start:read_B.reference_end],
+                                         read_B.query_sequence,
+                                   ))
+                read_B.is_paired = True
+                read_B.is_proper_pair = True
+
+                read_A.next_reference_id = read_B.reference_id
+                read_A.next_reference_start = read_B.reference_start
+                read_B.next_reference_id = read_A.reference_id
+                read_B.next_reference_start = read_A.reference_start
+
+                read_A.mate_is_reverse = read_B.is_reverse
+                read_B.mate_is_reverse = read_A.is_reverse
+
+                return read_A, read_B
+
+
+
+        alignments_path_unsorted = f'{alignments_path}.unsorted.bam'
+
+        read_A, read_B = get_reads()
+        read_B.set_tag('rS','AATTAA') # Set random primer sequence
+        ## Act on reads with random primer set:
+        frag = Fragment([read_A, read_B])
+        self.assertTrue(frag.R2_primer_length==0)
+        self.assertTrue(frag.unsafe_trimmed)
+
+        ## Act on reads without random primer
+        read_A, read_B = get_reads()
+        frag = Fragment([read_A, read_B], R2_primer_length=10)
+        self.assertEqual(frag.R2_primer_length, 10)
+        self.assertFalse(frag.unsafe_trimmed)
+
+        read_A, read_B = get_reads()
+        frag = CHICFragment([read_A, read_B], R2_primer_length=0)
+        self.assertEqual(frag.R2_primer_length, 0)
+        self.assertFalse(frag.unsafe_trimmed)
+
+        read_A, read_B = get_reads()
+        read_B.set_tag('rS','AATTAA') # Set random primer sequence
+        frag = CHICFragment([read_A, read_B], R2_primer_length=0)
+        self.assertEqual(frag.R2_primer_length, 0)
+        self.assertTrue(frag.unsafe_trimmed)
+
+        read_A, read_B = get_reads()
+        read_B.set_tag('MX','scCHIC384C8U3l')
+        frag = CHICFragment([read_A, read_B])
+        self.assertEqual(frag.R2_primer_length, 0)
+        self.assertFalse(frag.unsafe_trimmed)
+
+        read_A, read_B = get_reads()
+        read_B.set_tag('MX','scCHIC384C8U3')
+        frag = CHICFragment([read_A, read_B])
+        self.assertEqual(frag.R2_primer_length,6)
+        self.assertFalse(frag.unsafe_trimmed)
+
+    def test_init(self):
+        """Test if the fragment can be initialised"""
+        with pysam.AlignmentFile('./data/mini_nla_test.bam') as f:
+            for i,(R1,R2) in enumerate(pysamiterators.iterators.MatePairIterator(f)):
+                frag = singlecellmultiomics.fragment.Fragment([R1,R2])
+                self.assertIsNotNone(frag)
+
+    def test_get(self):
+        """Test if the fragment can be initialised"""
+        with pysam.AlignmentFile('./data/mini_nla_test.bam') as f:
+            for i,(R1,R2) in enumerate(pysamiterators.iterators.MatePairIterator(f)):
+                frag = singlecellmultiomics.fragment.Fragment([R1,R2])
+                if R1 is not None:
+                    self.assertEqual(frag.get_R1(), R1 )
+                    self.assertEqual(frag[0], R1 )
+                if R2 is not None:
+                    self.assertEqual(frag.get_R2(), R2 )
+                    self.assertEqual(frag[1], R2 )
+
+    def test_get_sample(self):
+        """Test if the sample name of the fragment can be obtained"""
+        with pysam.AlignmentFile('./data/mini_nla_test.bam') as f:
+            for i,(R1,R2) in enumerate(pysamiterators.iterators.MatePairIterator(f)):
+                frag = singlecellmultiomics.fragment.Fragment([R1,R2])
+                self.assertTrue( frag.get_sample().startswith('A') )
+
+    def test_set_sample(self):
+        """Test if the sample name of the fragment can be changed"""
+        with pysam.AlignmentFile('./data/mini_nla_test.bam') as f:
+            for i,(R1,R2) in enumerate(pysamiterators.iterators.MatePairIterator(f)):
+                frag = singlecellmultiomics.fragment.Fragment([R1,R2])
+                frag.set_sample(f'TEST{i}')
+                self.assertEqual(frag.get_sample(), f'TEST{i}' )
+
+    def test_strand(self):
+        """Test if the strand can be obtained (doesn't check validity)"""
+        with pysam.AlignmentFile('./data/mini_nla_test.bam') as f:
+            for i,(R1,R2) in enumerate(pysamiterators.iterators.MatePairIterator(f)):
+                frag = singlecellmultiomics.fragment.Fragment([R1,R2])
+                self.assertIn( frag.get_strand(), [None,0,1])
+                if frag.is_mapped:
+                    self.assertIn( frag.get_strand(), [0,1])
+
+
+if __name__ == '__main__':
+    unittest.main()