Diff of /tests/test_countTable.py [000000] .. [2c420a]

Switch to side-by-side view

--- a
+++ b/tests/test_countTable.py
@@ -0,0 +1,341 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import unittest
+from types import SimpleNamespace
+import singlecellmultiomics.bamProcessing.bamToCountTable
+
+from singlecellmultiomics.bamProcessing.bamBinCounts import range_contains_overlap,blacklisted_binning
+
+class TestIterables(unittest.TestCase):
+
+    def test_blacklisted_binning(self):
+        bin_size = 250
+        blacklist = [(450,1001),(1007,1019),(1550,1600),(2300,2510)]
+        blacklist = sorted(blacklist)
+
+        self.assertFalse(
+            range_contains_overlap( list( blacklisted_binning(0,2000,bin_size,blacklist) ) + blacklist)
+        )
+
+
+
+class TestCountTable(unittest.TestCase):
+
+    def test_total_read_counting(self):
+        """ Test if the amount of raw reads in a bam file is counted properly """
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                head=None,
+                o=None,
+                bin=None,
+                binTag='DS',
+                sliding=None,
+                bedfile=None,
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name',
+                byValue=None,
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=False,
+                divideMultimapping=False,
+                doNotDivideFragments=True,
+                contig=None,
+                blacklist=None,
+                r1only=False,
+                r2only=False,
+                filterMP=False,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3
+        self.assertEqual(df.loc['chr1'].sum(),563)
+
+
+    def test_total_read1_counting(self):
+        """ Test if the amount of valid deduped R1 reads in a bam file is counted properly
+            samtools view ./data/mini_nla_test.bam -f 64 -F 3840 | grep DS | wc -l : 210
+        """
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                head=None,
+                o=None,
+                bin=None,
+                binTag='DS',
+                sliding=None,
+                bedfile=None,
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name',
+                byValue=None,
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=True,
+                divideMultimapping=False,
+                doNotDivideFragments=True,
+                contig=None,
+                blacklist=None,
+                r1only=True,
+                r2only=False,
+                filterMP=False,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3
+        self.assertEqual(df.loc['chr1'].sum(),210)
+
+
+
+    def test_contig_selection(self):
+        """ Test if a contig is selected properly"""
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                head=None,
+                o=None,
+                bin=None,
+                binTag='DS',
+                sliding=None,
+                bedfile=None,
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name',
+                byValue=None,
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                contig='chr5',
+                minMQ=0,
+                filterXA=False,
+                dedup=False,
+                r1only=False,
+                r2only=False,
+                divideMultimapping=False,
+                doNotDivideFragments=True,
+                splitFeatures=False,
+                blacklist=None,
+                filterMP=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3
+        self.assertEqual(df.sum().sum(),0)
+
+    def test_total_molecule_counting(self):
+        """ Test if the amount of molecules in a bam file is counted properly """
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                o=None,
+                head=None,
+                bin=None,
+                binTag='DS',
+                byValue=None,
+                sliding=None,
+                bedfile=None,
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name',
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=True,
+                divideMultimapping=False,
+                doNotDivideFragments=True,
+                contig=None,
+                r1only=False,
+                r2only=False,
+                blacklist=None,
+                filterMP=False,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
+        self.assertEqual(df.loc['chr1'].sum(),383)
+
+    def test_singleFeatureTags_molecule_counting(self):
+        """ Test if the single feature counting feature works """
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                o=None,
+                head=None,
+                bin=None,
+                sliding=None,
+                binTag=None,
+                byValue=None,
+                bedfile=None,
+                showtags=False,
+                featureTags='reference_name,RC',
+                joinedFeatureTags=None,
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=False,
+                divideMultimapping=False,
+                contig=None,
+                r1only=False,
+                r2only=False,
+                keepOverBounds=False,
+                doNotDivideFragments=True,
+                blacklist=None,
+                filterMP=False,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
+        self.assertEqual(df.loc['chr1'].sum(),563)
+        self.assertEqual(df.loc['1'].sum(),383)
+
+        # Amount of RC:2 obs:
+        self.assertEqual(df.loc['2'].sum(),97)
+
+
+    def test_singleFeatureTags_molecule_counting_contig(self):
+        """ Test if the single feature counting feature works with -contig """
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                o=None,
+                head=None,
+                bin=None,
+                sliding=None,
+                binTag=None,
+                byValue=None,
+                bedfile=None,
+                showtags=False,
+                featureTags='reference_name,RC',
+                joinedFeatureTags=None,
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=False,
+                divideMultimapping=False,
+                contig='chr1',
+                r1only=False,
+                r2only=False,
+                keepOverBounds=False,
+                doNotDivideFragments=True,
+                blacklist=None,
+                filterMP=False,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
+        self.assertEqual(df.loc['chr1'].sum(),563)
+        self.assertEqual(df.loc['1'].sum(),383)
+
+        # Amount of RC:2 obs:
+        self.assertEqual(df.loc['2'].sum(),97)
+
+
+
+
+    def test_bed_counting(self):
+        """ Test if the bed feature counting feature works """
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                o=None,
+                head=None,
+                bin=None,
+                binTag='DS',
+                byValue=None,
+                sliding=None,
+                bedfile='./data/mini_test.bed',
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name',
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=True,
+                divideMultimapping=False,
+                doNotDivideFragments=True,
+                contig=None,
+                r1only=False,
+                r2only=False,
+                blacklist=None,
+                filterMP=False,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
+        self.assertEqual( df.xs( 'test4',level='bname', drop_level=False).iloc[0].sum() , 1)
+        self.assertEqual( df.xs( 'test3',level='bname', drop_level=False).iloc[0].sum() , 383)
+
+    def test_byValue(self):
+        """ Test if the by value counting feature works, this counts the value of a feature instead of its presence"""
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                o=None,
+                head=None,
+                bin=30,
+                sliding=None,
+                binTag='DS',
+                byValue='RC',
+                bedfile=None,
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name,RC',
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=False,
+                divideMultimapping=False,
+                contig=None,
+                blacklist=None,
+                r1only=False,
+                r2only=False,
+                filterMP=False,
+                keepOverBounds=False,
+                doNotDivideFragments=True,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+
+        self.assertEqual( df.sum(1).sum(), 765 )
+        self.assertEqual( df.loc[:,['A3-P15-1-1_25']].sum(skipna=True).sum(skipna=True), 12.0 )
+
+
+    def test_byValue_binned_autofill_joined(self):
+        """ Test if the by value counting feature works, this counts the value of a feature instead of its presence"""
+        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
+            SimpleNamespace(
+                alignmentfiles=['./data/mini_nla_test.bam'],
+                o=None,
+                head=None,
+                bin=30,
+                sliding=None,
+                binTag='DS',
+                byValue='RC',
+                bedfile=None,
+                showtags=False,
+                featureTags=None,
+                joinedFeatureTags='reference_name,RC',
+                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
+                minMQ=0,
+                filterXA=False,
+                dedup=False,
+                divideMultimapping=False,
+                contig=None,
+                blacklist=None,
+                r1only=False,
+                r2only=False,
+                filterMP=False,
+                keepOverBounds=False,
+                doNotDivideFragments=True,
+                splitFeatures=False,
+                feature_delimiter=',',
+                 noNames=False) , return_df=True)
+
+        self.assertEqual( df.sum(1).sum(), 765 )
+        self.assertEqual( df.loc[:,['A3-P15-1-1_25']].sum(skipna=True).sum(skipna=True), 12.0 )
+
+
+
+
+if __name__ == '__main__':
+    unittest.main()