Switch to unified view

a b/tests/test_demultiplexing.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
import unittest
4
from singlecellmultiomics.fastqProcessing.fastqIterator import FastqRecord
5
from singlecellmultiomics.barcodeFileParser.barcodeFileParser import BarcodeParser
6
from singlecellmultiomics.modularDemultiplexer.demultiplexModules.CELSeq2 import CELSeq2_c8_u6_NH
7
from singlecellmultiomics.modularDemultiplexer.demultiplexModules.scCHIC import SCCHIC_384w_c8_u3_cs2
8
from singlecellmultiomics.modularDemultiplexer.demultiplexModules.DamID import DamID2andT_SCA,DamID2_SCA
9
from singlecellmultiomics.modularDemultiplexer.baseDemultiplexMethods import UmiBarcodeDemuxMethod
10
import pkg_resources
11
from singlecellmultiomics.utils import reverse_complement
12
13
class TestUmiBarcodeDemux(unittest.TestCase):
14
15
    def test_UmiBarcodeDemuxMethod_matching_barcode(self):
16
17
        barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/')
18
        index_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/indices/')
19
        barcode_parser = BarcodeParser(barcode_folder, lazyLoad='*')
20
        index_parser = BarcodeParser(index_folder, lazyLoad='*')
21
22
        r1 = FastqRecord(
23
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 1:N:0:GTGAAA',
24
          'ATCACACACTATAGTCATTCAGGAGCAGGTTCTTCAGGTTCCCTGTAGTTGTGTGGTTTTGAGTGAGTTTTTTAAT',
25
          '+',
26
          'AAAAA#EEEEEEEEEEEAEEEEEEEAEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEE/EEEEEEEEEEEEEEEEEE'
27
        )
28
        r2 = FastqRecord(
29
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 2:N:0:GTGAAA',
30
          'ACCCCAGATCAACGTTGGACNTCNNCNTTNTNCTCNGCACCNNNNCNNNCTTATNCNNNANNNNNNNNNNTNNGN',
31
          '+',
32
          '6AAAAEEAEE/AEEEEEEEE#EE##<#6E#A#EEE#EAEEA####A###EE6EE#E###E##########E##A#'
33
        )
34
        demux = UmiBarcodeDemuxMethod(umiRead=0,
35
            umiStart=0,
36
            umiLength=3,
37
            barcodeRead=0,
38
            barcodeStart=3,
39
            barcodeLength=8,
40
            barcodeFileParser=barcode_parser,
41
            barcodeFileAlias='maya_384NLA',
42
            indexFileParser=index_parser,
43
            indexFileAlias='illumina_merged_ThruPlex48S_RP',
44
            random_primer_read=None,
45
            random_primer_length=6)
46
47
        demultiplexed_record = demux.demultiplex([r1,r2])
48
        # The barcode sequence is ACACACTA (first barcode)
49
        self.assertEqual( demultiplexed_record[0].tags['BC'], 'ACACACTA')
50
        self.assertEqual( demultiplexed_record[0].tags['bi'], 1) # 1 from version 0.1.12
51
52
53
54
    def test_CS2_NH_matching_barcode(self):
55
56
        barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/')
57
        index_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/indices/')
58
        barcode_parser = BarcodeParser(barcode_folder, lazyLoad='*')
59
        index_parser = BarcodeParser(index_folder, lazyLoad='*')
60
61
        seq = 'TATGAGCAATCACACACTATAGTCATTCAGGAGCAGGTTCTTCAGGTTCCCTGTAGTTGTGT'
62
        r1 = FastqRecord(
63
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 1:N:0:GTGAAA',
64
          f'ATAATATCTGGGCA{seq}',
65
          '+',
66
          'AAAAA#EEEEEEEEEEEAEEEEEEEAEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEE/EEEEEEEEEEEEEEEEEE'
67
        )
68
        r2 = FastqRecord(
69
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 2:N:0:GTGAAA',
70
          'ACCCCAGATCAACGTTGGACNTCNNCNTTNTNCTCNGCACCNNNNCNNNCTTATNCNNNANNNNNNNNNNTNNGN',
71
          '+',
72
          '6AAAAEEAEE/AEEEEEEEE#EE##<#6E#A#EEE#EAEEA####A###EE6EE#E###E##########E##A#'
73
        )
74
        demux = CELSeq2_c8_u6_NH(
75
            barcodeFileParser=barcode_parser,
76
            indexFileParser=index_parser)
77
78
        demultiplexed_record = demux.demultiplex([r1,r2])
79
        # The barcode sequence is ACACACTA (first barcode)
80
        self.assertEqual( demultiplexed_record[0].tags['BC'], 'TCTGGGCA')
81
        self.assertEqual( demultiplexed_record[0].tags['bi'], 55)
82
        self.assertEqual( demultiplexed_record[0].tags['RX'], 'ATAATA')
83
        self.assertEqual( demultiplexed_record[0].sequence, seq)
84
85
    def construct_tchic_read(self,crx,ccb,trx,tcb,mr,linker):
86
        seq = f'{crx}{ccb}{linker}{trx}{tcb}TTTTTTTTTTTTTTTTTTTTT{mr}'
87
        r1 = FastqRecord(
88
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 1:N:0:GTGAAA',
89
          f'{seq}',
90
          '+',
91
          'A'*len(seq)
92
        )
93
        r2 = FastqRecord(
94
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 2:N:0:GTGAAA',
95
          reverse_complement(seq),
96
          '+',
97
          'A'*len(seq)
98
        )
99
        return r1, r2
100
101
    def test_TCHIC(self):
102
103
        barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/')
104
        index_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/indices/')
105
        barcode_parser = BarcodeParser(barcode_folder, lazyLoad='*',)
106
        index_parser = BarcodeParser(index_folder, lazyLoad='*')
107
108
        crx = 'TAT'
109
        ccb = 'TAAGTGCT'
110
        trx = 'CTGTTG'
111
        tcb = 'ACAGAAGC'
112
        mr = 'TGAGAGAGAGAGAGAGAGAGAGAGC'
113
        linker = 'TATC'
114
        r1,r2 = self.construct_tchic_read(crx,ccb,trx,tcb,mr,linker)
115
116
        demux = SCCHIC_384w_c8_u3_cs2(
117
            barcodeFileParser=barcode_parser,
118
            indexFileParser=index_parser)
119
120
        demultiplexed_record = demux.demultiplex([r1,r2])
121
        # The barcode sequence is ACACACTA (first barcode)
122
        self.assertEqual( demultiplexed_record[0].tags['BC'], ccb)
123
        self.assertEqual( demultiplexed_record[0].tags['bi'], 225)
124
        self.assertEqual( demultiplexed_record[0].tags['dt'], 'VASA')
125
        self.assertEqual( demultiplexed_record[0].tags['RX'], crx)
126
        self.assertEqual( demultiplexed_record[0].tags['rx'], trx)
127
        self.assertEqual( demultiplexed_record[1].sequence, reverse_complement(mr)[:len(mr)-4])
128
129
130
        r1 = FastqRecord(
131
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 1:N:0:GTGAAA',
132
          'GGCGACGTCCTTCACTATAGGGAGTTCTACAGTTCGACGATCCTTAAATGGTGAGTTTTTTTTTTTTTTTTTTTTTTTGACCGACGGTCCCCCCGGGACCC',
133
          '+',
134
          'A'*len('GGCGACGTCCTTCACTATAGGGAGTTCTACAGTTCGACGATCCTTAAATGGTGAGTTTTTTTTTTTTTTTTTTTTTTTGACCGACGGTCCCCCCGGGACCC')
135
        )
136
        r2 = FastqRecord(
137
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 2:N:0:GTGAAA',
138
          'CGATCCTTAAATGGTGAGTTTTTTTTTTTTTTTTTTTTTTTGACCGACGGTCCCCCCGGGACCCGACGGCGCGACGACGCCCGGGGCGCACTGGGGACAGT',
139
          '+',
140
          'A'*len('CGATCCTTAAATGGTGAGTTTTTTTTTTTTTTTTTTTTTTTGACCGACGGTCCCCCCGGGACCCGACGGCGCGACGACGCCCGGGGCGCACTGGGGACAGT')
141
        )
142
        demultiplexed_record = demux.demultiplex([r1,r2])
143
        self.assertEqual( demultiplexed_record[0].tags['BC'], 'GACGTCCT')
144
        self.assertEqual( demultiplexed_record[0].tags['bi'], 214)
145
        self.assertEqual( demultiplexed_record[0].tags['dt'], 'VASA')
146
        self.assertEqual( demultiplexed_record[0].tags['RX'], 'GGC')
147
        self.assertEqual( demultiplexed_record[0].tags['rx'], 'CTTAAA')
148
149
150
    def construct_read_pair(self, prefix, content):
151
        seq = f'{prefix}{content}'
152
        r1 = FastqRecord(
153
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 1:N:0:GTGAAA',
154
          f'{seq}',
155
          '+',
156
          'A'*len(seq)
157
        )
158
        r2 = FastqRecord(
159
          '@NS500414:628:H7YVNBGXC:1:11101:15963:1046 2:N:0:GTGAAA',
160
          reverse_complement(seq),
161
          '+',
162
          'A'*len(seq)
163
        )
164
        return r1, r2
165
    
166
    def test_DAMID(self):
167
168
        barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/')
169
        index_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/indices/')
170
        barcode_parser = BarcodeParser(barcode_folder, lazyLoad='*',)
171
        index_parser = BarcodeParser(index_folder, lazyLoad='*')
172
173
        # First single cell format without overhang:
174
        # EG: DamID2_BC_001   3-TGCA-3-TATG
175
        
176
        first_barcode, second_barcode = 'TGCA', 'TATG'
177
        first_umi = 'ACT'
178
        second_umi='CTC'
179
        read_contents = 'rrrrrr'
180
        r1,r2 = self.construct_read_pair(f'{first_umi}{first_barcode}{second_umi}{second_barcode}',read_contents)
181
            
182
            
183
        damid_demux = DamID2_SCA(barcodeFileParser=barcode_parser,
184
                                    second_barcode_len=4,
185
                                    indexFileParser=index_parser,
186
                                    barcode_alias='DamID2_scattered_8bp',
187
                                        )
188
189
        demultiplexed_record = damid_demux.demultiplex([r1,r2])
190
        self.assertEqual( demultiplexed_record[0].tags['BC'], first_barcode+second_barcode)
191
        self.assertEqual( demultiplexed_record[0].tags['bi'], 1)
192
        self.assertEqual( demultiplexed_record[0].tags['RX'], first_umi+second_umi)
193
        self.assertEqual( demultiplexed_record[0].sequence, read_contents)
194
        
195
        combined_demux = DamID2andT_SCA(
196
            barcodeFileParser=barcode_parser,
197
            indexFileParser=index_parser)
198
199
        # The internal DamID demux of the combined protocol should return the same result
200
        demultiplexed_record = combined_demux.damid_demux.demultiplex([r1,r2])
201
        self.assertEqual( demultiplexed_record[0].tags['BC'], first_barcode+second_barcode)
202
        self.assertEqual( demultiplexed_record[0].tags['bi'], 1)
203
        self.assertEqual( demultiplexed_record[0].tags['RX'], first_umi+second_umi)
204
        self.assertEqual( demultiplexed_record[0].sequence, read_contents)
205
        
206
        
207
        demultiplexed_record = combined_demux.demultiplex([r1,r2])
208
        self.assertEqual( demultiplexed_record[0].tags['BC'], first_barcode+second_barcode)
209
        self.assertEqual( demultiplexed_record[0].tags['bi'], 1)
210
        self.assertEqual( demultiplexed_record[0].tags['dt'], 'DamID')
211
        self.assertEqual( demultiplexed_record[0].tags['RX'], first_umi+second_umi)
212
        self.assertEqual( demultiplexed_record[0].sequence, read_contents)
213
214
215
    def test_3DEC_UmiBarcodeDemuxMethod_matching_barcode(self):
216
217
        barcode_folder = pkg_resources.resource_filename('singlecellmultiomics','modularDemultiplexer/barcodes/')
218
        barcode_parser = BarcodeParser(barcode_folder,lazyLoad='*')
219
220
        r1 = FastqRecord(
221
          '@Cluster_s_1_1101_1000',
222
          'ATCACACACTATAGTCATTCAGGAGCAGGTTCTTCAGGTTCCCTGTAGTTGTGTGGTTTTGAGTGAGTTTTTTAAT',
223
          '+',
224
          'AAAAA#EEEEEEEEEEEAEEEEEEEAEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEE/EEEEEEEEEEEEEEEEEE'
225
        )
226
        r2 = FastqRecord(
227
          '@Cluster_s_1_1101_1002',
228
          'ACCCCAGATCAACGTTGGACNTCNNCNTTNTNCTCNGCACCNNNNCNNNCTTATNCNNNANNNNNNNNNNTNNGN',
229
          '+',
230
          '6AAAAEEAEE/AEEEEEEEE#EE##<#6E#A#EEE#EAEEA####A###EE6EE#E###E##########E##A#'
231
        )
232
        demux = UmiBarcodeDemuxMethod(umiRead=0,
233
            umiStart=0,
234
            umiLength=3,
235
            barcodeRead=0,
236
            barcodeStart=3,
237
            barcodeLength=8,
238
            barcodeFileParser=barcode_parser,
239
            barcodeFileAlias='maya_384NLA',
240
            indexFileParser=None,
241
            indexFileAlias='illumina_merged_ThruPlex48S_RP',
242
            random_primer_read=None,
243
            random_primer_length=6)
244
245
        demultiplexed_record = demux.demultiplex([r1,r2])
246
        # The barcode sequence is ACACACTA (first barcode)
247
        self.assertEqual( demultiplexed_record[0].tags['BC'], 'ACACACTA')
248
        self.assertEqual( demultiplexed_record[0].tags['bi'], 1)
249
250
251
if __name__ == '__main__':
252
    unittest.main()