a b/tests/test_countTable.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
import unittest
4
from types import SimpleNamespace
5
import singlecellmultiomics.bamProcessing.bamToCountTable
6
7
from singlecellmultiomics.bamProcessing.bamBinCounts import range_contains_overlap,blacklisted_binning
8
9
class TestIterables(unittest.TestCase):
10
11
    def test_blacklisted_binning(self):
12
        bin_size = 250
13
        blacklist = [(450,1001),(1007,1019),(1550,1600),(2300,2510)]
14
        blacklist = sorted(blacklist)
15
16
        self.assertFalse(
17
            range_contains_overlap( list( blacklisted_binning(0,2000,bin_size,blacklist) ) + blacklist)
18
        )
19
20
21
22
class TestCountTable(unittest.TestCase):
23
24
    def test_total_read_counting(self):
25
        """ Test if the amount of raw reads in a bam file is counted properly """
26
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
27
            SimpleNamespace(
28
                alignmentfiles=['./data/mini_nla_test.bam'],
29
                head=None,
30
                o=None,
31
                bin=None,
32
                binTag='DS',
33
                sliding=None,
34
                bedfile=None,
35
                showtags=False,
36
                featureTags=None,
37
                joinedFeatureTags='reference_name',
38
                byValue=None,
39
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
40
                minMQ=0,
41
                filterXA=False,
42
                dedup=False,
43
                divideMultimapping=False,
44
                doNotDivideFragments=True,
45
                contig=None,
46
                blacklist=None,
47
                r1only=False,
48
                r2only=False,
49
                filterMP=False,
50
                splitFeatures=False,
51
                feature_delimiter=',',
52
                 noNames=False) , return_df=True)
53
        # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3
54
        self.assertEqual(df.loc['chr1'].sum(),563)
55
56
57
    def test_total_read1_counting(self):
58
        """ Test if the amount of valid deduped R1 reads in a bam file is counted properly
59
            samtools view ./data/mini_nla_test.bam -f 64 -F 3840 | grep DS | wc -l : 210
60
        """
61
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
62
            SimpleNamespace(
63
                alignmentfiles=['./data/mini_nla_test.bam'],
64
                head=None,
65
                o=None,
66
                bin=None,
67
                binTag='DS',
68
                sliding=None,
69
                bedfile=None,
70
                showtags=False,
71
                featureTags=None,
72
                joinedFeatureTags='reference_name',
73
                byValue=None,
74
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
75
                minMQ=0,
76
                filterXA=False,
77
                dedup=True,
78
                divideMultimapping=False,
79
                doNotDivideFragments=True,
80
                contig=None,
81
                blacklist=None,
82
                r1only=True,
83
                r2only=False,
84
                filterMP=False,
85
                splitFeatures=False,
86
                feature_delimiter=',',
87
                 noNames=False) , return_df=True)
88
        # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3
89
        self.assertEqual(df.loc['chr1'].sum(),210)
90
91
92
93
    def test_contig_selection(self):
94
        """ Test if a contig is selected properly"""
95
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
96
            SimpleNamespace(
97
                alignmentfiles=['./data/mini_nla_test.bam'],
98
                head=None,
99
                o=None,
100
                bin=None,
101
                binTag='DS',
102
                sliding=None,
103
                bedfile=None,
104
                showtags=False,
105
                featureTags=None,
106
                joinedFeatureTags='reference_name',
107
                byValue=None,
108
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
109
                contig='chr5',
110
                minMQ=0,
111
                filterXA=False,
112
                dedup=False,
113
                r1only=False,
114
                r2only=False,
115
                divideMultimapping=False,
116
                doNotDivideFragments=True,
117
                splitFeatures=False,
118
                blacklist=None,
119
                filterMP=False,
120
                feature_delimiter=',',
121
                 noNames=False) , return_df=True)
122
        # !samtools idxstats ./data/mini_nla_test.bam | head -n 1 | cut -f 3
123
        self.assertEqual(df.sum().sum(),0)
124
125
    def test_total_molecule_counting(self):
126
        """ Test if the amount of molecules in a bam file is counted properly """
127
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
128
            SimpleNamespace(
129
                alignmentfiles=['./data/mini_nla_test.bam'],
130
                o=None,
131
                head=None,
132
                bin=None,
133
                binTag='DS',
134
                byValue=None,
135
                sliding=None,
136
                bedfile=None,
137
                showtags=False,
138
                featureTags=None,
139
                joinedFeatureTags='reference_name',
140
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
141
                minMQ=0,
142
                filterXA=False,
143
                dedup=True,
144
                divideMultimapping=False,
145
                doNotDivideFragments=True,
146
                contig=None,
147
                r1only=False,
148
                r2only=False,
149
                blacklist=None,
150
                filterMP=False,
151
                splitFeatures=False,
152
                feature_delimiter=',',
153
                 noNames=False) , return_df=True)
154
        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
155
        self.assertEqual(df.loc['chr1'].sum(),383)
156
157
    def test_singleFeatureTags_molecule_counting(self):
158
        """ Test if the single feature counting feature works """
159
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
160
            SimpleNamespace(
161
                alignmentfiles=['./data/mini_nla_test.bam'],
162
                o=None,
163
                head=None,
164
                bin=None,
165
                sliding=None,
166
                binTag=None,
167
                byValue=None,
168
                bedfile=None,
169
                showtags=False,
170
                featureTags='reference_name,RC',
171
                joinedFeatureTags=None,
172
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
173
                minMQ=0,
174
                filterXA=False,
175
                dedup=False,
176
                divideMultimapping=False,
177
                contig=None,
178
                r1only=False,
179
                r2only=False,
180
                keepOverBounds=False,
181
                doNotDivideFragments=True,
182
                blacklist=None,
183
                filterMP=False,
184
                splitFeatures=False,
185
                feature_delimiter=',',
186
                 noNames=False) , return_df=True)
187
        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
188
        self.assertEqual(df.loc['chr1'].sum(),563)
189
        self.assertEqual(df.loc['1'].sum(),383)
190
191
        # Amount of RC:2 obs:
192
        self.assertEqual(df.loc['2'].sum(),97)
193
194
195
    def test_singleFeatureTags_molecule_counting_contig(self):
196
        """ Test if the single feature counting feature works with -contig """
197
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
198
            SimpleNamespace(
199
                alignmentfiles=['./data/mini_nla_test.bam'],
200
                o=None,
201
                head=None,
202
                bin=None,
203
                sliding=None,
204
                binTag=None,
205
                byValue=None,
206
                bedfile=None,
207
                showtags=False,
208
                featureTags='reference_name,RC',
209
                joinedFeatureTags=None,
210
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
211
                minMQ=0,
212
                filterXA=False,
213
                dedup=False,
214
                divideMultimapping=False,
215
                contig='chr1',
216
                r1only=False,
217
                r2only=False,
218
                keepOverBounds=False,
219
                doNotDivideFragments=True,
220
                blacklist=None,
221
                filterMP=False,
222
                splitFeatures=False,
223
                feature_delimiter=',',
224
                 noNames=False) , return_df=True)
225
        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
226
        self.assertEqual(df.loc['chr1'].sum(),563)
227
        self.assertEqual(df.loc['1'].sum(),383)
228
229
        # Amount of RC:2 obs:
230
        self.assertEqual(df.loc['2'].sum(),97)
231
232
233
234
235
    def test_bed_counting(self):
236
        """ Test if the bed feature counting feature works """
237
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
238
            SimpleNamespace(
239
                alignmentfiles=['./data/mini_nla_test.bam'],
240
                o=None,
241
                head=None,
242
                bin=None,
243
                binTag='DS',
244
                byValue=None,
245
                sliding=None,
246
                bedfile='./data/mini_test.bed',
247
                showtags=False,
248
                featureTags=None,
249
                joinedFeatureTags='reference_name',
250
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
251
                minMQ=0,
252
                filterXA=False,
253
                dedup=True,
254
                divideMultimapping=False,
255
                doNotDivideFragments=True,
256
                contig=None,
257
                r1only=False,
258
                r2only=False,
259
                blacklist=None,
260
                filterMP=False,
261
                splitFeatures=False,
262
                feature_delimiter=',',
263
                 noNames=False) , return_df=True)
264
        # !samtools view ./singlecellmultiomics/data/mini_nla_test.bam | grep 'RC:i:1' | wc -l
265
        self.assertEqual( df.xs( 'test4',level='bname', drop_level=False).iloc[0].sum() , 1)
266
        self.assertEqual( df.xs( 'test3',level='bname', drop_level=False).iloc[0].sum() , 383)
267
268
    def test_byValue(self):
269
        """ Test if the by value counting feature works, this counts the value of a feature instead of its presence"""
270
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
271
            SimpleNamespace(
272
                alignmentfiles=['./data/mini_nla_test.bam'],
273
                o=None,
274
                head=None,
275
                bin=30,
276
                sliding=None,
277
                binTag='DS',
278
                byValue='RC',
279
                bedfile=None,
280
                showtags=False,
281
                featureTags=None,
282
                joinedFeatureTags='reference_name,RC',
283
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
284
                minMQ=0,
285
                filterXA=False,
286
                dedup=False,
287
                divideMultimapping=False,
288
                contig=None,
289
                blacklist=None,
290
                r1only=False,
291
                r2only=False,
292
                filterMP=False,
293
                keepOverBounds=False,
294
                doNotDivideFragments=True,
295
                splitFeatures=False,
296
                feature_delimiter=',',
297
                 noNames=False) , return_df=True)
298
299
        self.assertEqual( df.sum(1).sum(), 765 )
300
        self.assertEqual( df.loc[:,['A3-P15-1-1_25']].sum(skipna=True).sum(skipna=True), 12.0 )
301
302
303
    def test_byValue_binned_autofill_joined(self):
304
        """ Test if the by value counting feature works, this counts the value of a feature instead of its presence"""
305
        df = singlecellmultiomics.bamProcessing.bamToCountTable.create_count_table(
306
            SimpleNamespace(
307
                alignmentfiles=['./data/mini_nla_test.bam'],
308
                o=None,
309
                head=None,
310
                bin=30,
311
                sliding=None,
312
                binTag='DS',
313
                byValue='RC',
314
                bedfile=None,
315
                showtags=False,
316
                featureTags=None,
317
                joinedFeatureTags='reference_name,RC',
318
                sampleTags='SM', proper_pairs_only=False, no_indels=False, max_base_edits=None, no_softclips=False,
319
                minMQ=0,
320
                filterXA=False,
321
                dedup=False,
322
                divideMultimapping=False,
323
                contig=None,
324
                blacklist=None,
325
                r1only=False,
326
                r2only=False,
327
                filterMP=False,
328
                keepOverBounds=False,
329
                doNotDivideFragments=True,
330
                splitFeatures=False,
331
                feature_delimiter=',',
332
                 noNames=False) , return_df=True)
333
334
        self.assertEqual( df.sum(1).sum(), 765 )
335
        self.assertEqual( df.loc[:,['A3-P15-1-1_25']].sum(skipna=True).sum(skipna=True), 12.0 )
336
337
338
339
340
if __name__ == '__main__':
341
    unittest.main()