--- a +++ b/tests/test_countTable.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import unittest +from types import SimpleNamespace +import singlecellmultiomics.bamProcessing.bamToCountTable + +from singlecellmultiomics.bamProcessing.bamBinCounts import range_contains_overlap,blacklisted_binning + +class TestIterables(unittest.TestCase): + + def test_blacklisted_binning(self): + bin_size = 250 + blacklist = [(450,1001),(1007,1019),(1550,1600),(2300,2510)] + blacklist = sorted(blacklist) + + self.assertFalse( + range_contains_overlap( list( blacklisted_binning(0,2000,bin_size,blacklist) ) + blacklist) + ) + + + +class TestCountTable(unittest.TestCase): + + def test_total_read_counting(self): + """ Test if the amount of raw reads in a bam file is counted properly """ + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + head=None, + o=None, + bin=None, + binTag='DS', + sliding=None, + bedfile=None, + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name', + byValue=None, + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=False, + divideMultimapping=False, + doNotDivideFragments=True, + contig=None, + blacklist=None, + r1only=False, + r2only=False, + filterMP=False, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3 + self.assertEqual(df.loc['chr1'].sum(),563) + + + def test_total_read1_counting(self): + """ Test if the amount of valid deduped R1 reads in a bam file is counted properly + samtools view ./data/mini_nla_test.bam -f 64 -F 3840 | grep DS | wc -l : 210 + """ + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + head=None, + o=None, + bin=None, + binTag='DS', + sliding=None, + bedfile=None, + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name', + byValue=None, + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=True, + divideMultimapping=False, + doNotDivideFragments=True, + contig=None, + blacklist=None, + r1only=True, + r2only=False, + filterMP=False, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3 + self.assertEqual(df.loc['chr1'].sum(),210) + + + + def test_contig_selection(self): + """ Test if a contig is selected properly""" + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + head=None, + o=None, + bin=None, + binTag='DS', + sliding=None, + bedfile=None, + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name', + byValue=None, + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + contig='chr5', + minMQ=0, + filterXA=False, + dedup=False, + r1only=False, + r2only=False, + divideMultimapping=False, + doNotDivideFragments=True, + splitFeatures=False, + blacklist=None, + filterMP=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3 + self.assertEqual(df.sum().sum(),0) + + def test_total_molecule_counting(self): + """ Test if the amount of molecules in a bam file is counted properly """ + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + o=None, + head=None, + bin=None, + binTag='DS', + byValue=None, + sliding=None, + bedfile=None, + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name', + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=True, + divideMultimapping=False, + doNotDivideFragments=True, + contig=None, + r1only=False, + r2only=False, + blacklist=None, + filterMP=False, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l + self.assertEqual(df.loc['chr1'].sum(),383) + + def test_singleFeatureTags_molecule_counting(self): + """ Test if the single feature counting feature works """ + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + o=None, + head=None, + bin=None, + sliding=None, + binTag=None, + byValue=None, + bedfile=None, + showtags=False, + featureTags='reference_name,RC', + joinedFeatureTags=None, + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=False, + divideMultimapping=False, + contig=None, + r1only=False, + r2only=False, + keepOverBounds=False, + doNotDivideFragments=True, + blacklist=None, + filterMP=False, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l + self.assertEqual(df.loc['chr1'].sum(),563) + self.assertEqual(df.loc['1'].sum(),383) + + # Amount of RC:2 obs: + self.assertEqual(df.loc['2'].sum(),97) + + + def test_singleFeatureTags_molecule_counting_contig(self): + """ Test if the single feature counting feature works with -contig """ + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + o=None, + head=None, + bin=None, + sliding=None, + binTag=None, + byValue=None, + bedfile=None, + showtags=False, + featureTags='reference_name,RC', + joinedFeatureTags=None, + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=False, + divideMultimapping=False, + contig='chr1', + r1only=False, + r2only=False, + keepOverBounds=False, + doNotDivideFragments=True, + blacklist=None, + filterMP=False, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l + self.assertEqual(df.loc['chr1'].sum(),563) + self.assertEqual(df.loc['1'].sum(),383) + + # Amount of RC:2 obs: + self.assertEqual(df.loc['2'].sum(),97) + + + + + def test_bed_counting(self): + """ Test if the bed feature counting feature works """ + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + o=None, + head=None, + bin=None, + binTag='DS', + byValue=None, + sliding=None, + bedfile='./data/mini_test.bed', + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name', + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=True, + divideMultimapping=False, + doNotDivideFragments=True, + contig=None, + r1only=False, + r2only=False, + blacklist=None, + filterMP=False, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l + self.assertEqual( df.xs( 'test4',level='bname', drop_level=False).iloc[0].sum() , 1) + self.assertEqual( df.xs( 'test3',level='bname', drop_level=False).iloc[0].sum() , 383) + + def test_byValue(self): + """ Test if the by value counting feature works, this counts the value of a feature instead of its presence""" + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + o=None, + head=None, + bin=30, + sliding=None, + binTag='DS', + byValue='RC', + bedfile=None, + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name,RC', + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=False, + divideMultimapping=False, + contig=None, + blacklist=None, + r1only=False, + r2only=False, + filterMP=False, + keepOverBounds=False, + doNotDivideFragments=True, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + + self.assertEqual( df.sum(1).sum(), 765 ) + self.assertEqual( df.loc[:,['A3-P15-1-1_25']].sum(skipna=True).sum(skipna=True), 12.0 ) + + + def test_byValue_binned_autofill_joined(self): + """ Test if the by value counting feature works, this counts the value of a feature instead of its presence""" + df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table( + SimpleNamespace( + alignmentfiles=['./data/mini_nla_test.bam'], + o=None, + head=None, + bin=30, + sliding=None, + binTag='DS', + byValue='RC', + bedfile=None, + showtags=False, + featureTags=None, + joinedFeatureTags='reference_name,RC', + sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False, + minMQ=0, + filterXA=False, + dedup=False, + divideMultimapping=False, + contig=None, + blacklist=None, + r1only=False, + r2only=False, + filterMP=False, + keepOverBounds=False, + doNotDivideFragments=True, + splitFeatures=False, + feature_delimiter=',', + noNames=False) , return_df=True) + + self.assertEqual( df.sum(1).sum(), 765 ) + self.assertEqual( df.loc[:,['A3-P15-1-1_25']].sum(skipna=True).sum(skipna=True), 12.0 ) + + + + +if __name__ == '__main__': + unittest.main()