a b/singlecellmultiomics/modularDemultiplexer/demultiplexingStrategyLoader.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
import os
4
import collections
5
import glob
6
import sys
7
from colorama import Fore
8
from colorama import Back
9
from colorama import Style
10
import importlib
11
import inspect
12
import traceback
13
import singlecellmultiomics.modularDemultiplexer.demultiplexModules as dm
14
import singlecellmultiomics.fastqProcessing.fastqIterator as fastqIterator
15
from singlecellmultiomics.modularDemultiplexer.baseDemultiplexMethods import NonMultiplexable, IlluminaBaseDemultiplexer
16
import logging
17
18
19
class DemultiplexingStrategyLoader:
20
    def __init__(
21
            self,
22
            barcodeParser,
23
            moduleSearchDir='demultiplexModules',
24
            indexParser=None,
25
            ignoreMethods=None,
26
            only_detect_methods=None, #
27
            indexFileAlias=None):
28
        package = f'singlecellmultiomics.modularDemultiplexer.{moduleSearchDir}'
29
        moduleSearchPath = os.path.join(
30
            os.path.dirname(
31
                os.path.realpath(__file__)),
32
            moduleSearchDir).replace(
33
            '/./',
34
            '/')
35
        self.barcodeParser = barcodeParser
36
        self.indexParser = indexParser
37
        self.only_detect_methods = only_detect_methods
38
        moduleSearchPath = moduleSearchPath
39
40
        #print(f'{Style.DIM}Current script location: {__file__}')
41
        #print(f'Searchdir: {moduleSearchDir}')
42
        #print(f'Looking for modules in {moduleSearchPath}{Style.RESET_ALL}')
43
        self.demultiplexingStrategies = []
44
        self.demux_classes = [
45
            IlluminaBaseDemultiplexer,
46
47
            dm.CELSeq1_c8_u4,
48
            dm.CELSeq2_c8_u6,
49
            dm.CELSeq2_c8_u6_NH,
50
            dm.CELSeq2_c8_u8,
51
            dm.CELSeq2_c8_u8_NNLAIII,
52
53
            dm.CELSeq2_c8_u6_swapped_reads,
54
55
            dm.NLAIII_384w_c8_u3,
56
            dm.NLAIII_96w_c8_u3,
57
            dm.Nla_384w_u8_c8_ad3_is15,
58
            dm.NLAIII_384w_c8_u3_SINGLE_END,
59
            dm.NLAIII_96w_c8_u3_SINGLE_END,
60
61
            dm.SCCHIC_384w_c8_u3,
62
            dm.SCCHIC_384w_c8_u3_cs2,
63
            dm.SCCHIC_384w_c8_u3_pdt,
64
            dm.SCCHIC_384w_c8_u3_direct_ligation,
65
            dm.SCCHIC_384w_c8_u3_direct_ligation_SINGLE_END,
66
67
            dm.MSPJI_c8_u3,
68
            dm.ScartraceR2,
69
            dm.ScartraceR1,
70
            dm.ScartraceR2RP4,
71
72
            dm.chrom10x_c16_u12,
73
74
            dm.DamID2,
75
            dm.DamID2_c8_u3_cs2,
76
            dm.DamID2_SCA,
77
            dm.DamID2andT_SCA,
78
            dm.DamID2andT_SCA6,
79
            dm.DamID2_NO_OVERHANG
80
81
82
        ]
83
        for c in self.demux_classes:
84
85
86
            initialised_demux = c(
87
                barcodeFileParser=barcodeParser,
88
                indexFileParser=indexParser,
89
                indexFileAlias=indexFileAlias)
90
91
            if self.only_detect_methods is not None:
92
93
                if initialised_demux.shortName in self.only_detect_methods:
94
                    print(f'Only loading {initialised_demux.shortName}')
95
                else:
96
                    continue
97
98
            self.demultiplexingStrategies.append(initialised_demux)
99
100
101
    def getSelectedStrategiesFromStringList(self, strList, verbose=True):
102
        selectedStrategies = []
103
104
        resolved = {part: False for part in strList}
105
        for strategy in self.demultiplexingStrategies:
106
            if strategy.shortName in strList:
107
                selectedStrategies.append(strategy)
108
                if verbose:
109
                    print('Selected strategy %s' % strategy)
110
                resolved[strategy.shortName] = True
111
112
        if any([v is False for v in resolved.values()]):
113
            for strat in strList:
114
                if resolved[strat] is False:
115
                    print(f'{Fore.RED}Could not resolve {strat}{Style.RESET_ALL}')
116
                    print('Available:')
117
                    for s in self.demultiplexingStrategies:
118
                        print(s.shortName)
119
                    raise ValueError(f'Strategy {strat} not found')
120
121
            raise ValueError()
122
        return selectedStrategies
123
124
    def list(self):
125
        print(f"{Style.BRIGHT}Available demultiplexing strategies:{Style.RESET_ALL}")
126
        #print('Name, alias, will be auto detected, description')
127
        for strategy in self.demultiplexingStrategies:
128
129
            try:
130
                print(
131
                    f'{Style.BRIGHT}{strategy.shortName}{Style.RESET_ALL}\t{strategy.longName}\t' +
132
                    (
133
                        f'{Fore.GREEN}Will be autodetected' if strategy.autoDetectable else f'{Fore.RED}Will not be autodetected') +
134
                    Style.RESET_ALL +
135
                    Style.DIM +
136
                    f' {strategy.barcodeFileParser.getTargetCount(strategy.barcodeFileAlias) if hasattr(strategy,"barcodeFileParser") else "NA"} targets\n ' +
137
                    Style.DIM +
138
                    strategy.description +
139
                    '\n' +
140
                    strategy.getParserSummary() +
141
                    Style.RESET_ALL +
142
                    '\n')
143
            except Exception as e:
144
                print(
145
                    f"{Fore.RED}{Style.BRIGHT}Error in: {strategy.shortName}\nException: {e}{Style.RESET_ALL}\nTraceback for the error:\n")
146
                import traceback
147
                traceback.print_exc()
148
                from os import stat
149
                from pwd import getpwuid
150
                try:
151
                    modulePath = sys.modules[strategy.__module__].__file__
152
153
                    print(
154
                        f'Contact {Style.BRIGHT}%s{Style.RESET_ALL} for help\n' %
155
                        getpwuid(
156
                            stat(modulePath).st_uid).pw_name)
157
                    print(
158
                        'The error only affects this module.\nProceeding to load more modules...\n')
159
                except Exception as e:
160
                    pass
161
162
    def getAutodetectStrategies(self):
163
        return [
164
            strategy for strategy in self.demultiplexingStrategies if strategy.autoDetectable]
165
166
    def getDemultiplexingSelectedStrategies(self):
167
        if self.selectedStrategies is None:
168
            raise ValueError('No strategies selected')
169
        return self.selectedStrategies
170
171
    def demultiplex(
172
            self,
173
            fastqfiles,
174
            maxReadPairs=None,
175
            strategies=None,
176
            library=None,
177
            targetFile=None,
178
            rejectHandle=None,
179
            log_handle=None,
180
            probe=None
181
            ):
182
183
        useStrategies = strategies if strategies is not None else self.getAutodetectStrategies()
184
        strategyYields = collections.Counter()
185
        processedReadPairs = 0
186
        baseDemux = IlluminaBaseDemultiplexer(
187
            indexFileParser=self.indexParser,
188
            barcodeParser=self.barcodeParser,
189
            probe=probe)
190
191
        for p, reads in enumerate(
192
                fastqIterator.FastqIterator(*fastqfiles)):
193
            processedReadPairs = p+1
194
195
            for strategy in useStrategies:
196
                try:
197
                    recodedRecords = strategy.demultiplex(
198
                        reads, library=library, probe=probe)
199
200
                    if targetFile is not None:
201
                        targetFile.write(recodedRecords)
202
203
                except NonMultiplexable as reason:
204
                    # print('NonMultiplexable')
205
                    if rejectHandle is not None:
206
207
                        try:
208
                            to_write = baseDemux.demultiplex(
209
                                reads, library=library, reason=reason)
210
                            rejectHandle.write(to_write)
211
212
                        except NonMultiplexable as e:
213
                            # we cannot read the header of the read..
214
                            reads = [
215
                                '\n'.join(
216
                                    (read.header +
217
                                     f';RR:{reason};Rr:{e}',
218
                                     read.sequence,
219
                                     read.plus,
220
                                     read.qual)) for read in reads]
221
                            rejectHandle.write(reads)
222
223
                    continue
224
                except Exception as e:
225
                    if probe:
226
                        continue
227
                    traceback_str = str(traceback.format_exc())
228
                    print(traceback_str)
229
                    print(
230
                        f'{Fore.RED}Fatal error. While demultiplexing strategy {strategy.longName} yielded an error, the error message was: {e}')
231
                    print('The read(s) causing the error looked like this:')
232
                    for read in reads:
233
                        print(str(read))
234
                    print(Style.RESET_ALL)
235
                    if log_handle is not None:
236
                        log_handle.write(
237
                            f"Error occured using {strategy.longName}\n{traceback_str}\n")
238
                # print(recodedRecord)
239
                strategyYields[strategy.shortName] += 1
240
            if (maxReadPairs is not None and (
241
                    processedReadPairs) >= maxReadPairs):
242
                break
243
        # write yields to log file if applicable:
244
        if log_handle is not None:
245
            log_handle.write(f'processed {processedReadPairs} read pairs\n')
246
            log_handle.write(f'Reads obtained per protocol\n')
247
            log_handle.write(f'Strategy\tReads\n')
248
            for strategy, used_reads in strategyYields.items():
249
                log_handle.write(f'{strategy}\t{used_reads}\n')
250
        return processedReadPairs, strategyYields
251
252
    def detectLibYields(
253
            self,
254
            libraries,
255
            strategies=None,
256
            testReads=100000,
257
            maxAutoDetectMethods=1,
258
            minAutoDetectPct=5,
259
            verbose=False):
260
        libYields = dict()
261
262
        for lib, lanes in libraries.items():
263
            for lane, readPairs in lanes.items():
264
265
                for readPair in readPairs:
266
                    if len(readPairs) == 1:
267
                        processedReadPairs, strategyYields = self.demultiplex(
268
                            [ readPairs[
269
                                list(readPairs.keys())[0]
270
                                ][0] ], maxReadPairs=testReads, strategies=strategies, probe=True)
271
                    elif len(readPairs) == 2:
272
                        processedReadPairs, strategyYields = self.demultiplex(
273
                            (readPairs['R1'][0], readPairs['R2'][0]), maxReadPairs=testReads, strategies=strategies, probe=True)
274
                    else:
275
                        raise ValueError('Error: %s' % readPairs.keys())
276
277
                if verbose:
278
                    print(f'Report for {lib}:')
279
                    self.strategyYieldsToFormattedReport(
280
                        processedReadPairs,
281
                        strategyYields,
282
                        maxAutoDetectMethods=maxAutoDetectMethods,
283
                        minAutoDetectPct=minAutoDetectPct)
284
                libYields[lib] = {
285
                    'processedReadPairs': processedReadPairs,
286
                    'strategyYields': strategyYields}
287
                break
288
        return processedReadPairs, libYields
289
290
    def strategyYieldsToFormattedReport(
291
            self,
292
            processedReadPairs,
293
            strategyYields,
294
            selectedStrategies=None,
295
            maxAutoDetectMethods=1,
296
            minAutoDetectPct=5):
297
        print(
298
            f'Analysed {Style.BRIGHT}{processedReadPairs}{Style.RESET_ALL} read pairs')
299
300
        if selectedStrategies is None:
301
            selectedStrategies = {}
302
        #selectedStrategies = self.selectedStrategiesBasedOnYield(processedReadPairs, strategyYields)
303
        for i, (strategy, strategyYield) in enumerate(
304
                strategyYields.most_common()):
305
            yieldRatio = strategyYield / (0.001 + processedReadPairs)
306
            print(
307
                (
308
                    Style.BRIGHT +
309
                    Fore.GREEN if (
310
                        (strategy in selectedStrategies) or i < maxAutoDetectMethods) else (
311
                        Fore.YELLOW if yieldRatio *
312
                        100 >= minAutoDetectPct else Style.DIM)) +
313
                f'\t {strategy}:%.2f%%{Style.RESET_ALL}' %
314
                (100.0 *
315
                 yieldRatio))
316
317
    def selectedStrategiesBasedOnYield(
318
            self,
319
            processedReadPairs,
320
            strategyYields,
321
            maxAutoDetectMethods=1,
322
            minAutoDetectPct=0.05):
323
        selectedStrategies = []
324
        for strategy, strategyYield in strategyYields.most_common(
325
                maxAutoDetectMethods):
326
            yieldRatio = strategyYield / (0.001 + processedReadPairs) * 100.0
327
            if yieldRatio >= minAutoDetectPct:
328
                selectedStrategies.append(strategy)
329
        return selectedStrategies