Switch to unified view

a b/singlecellmultiomics/libraryDetection/archivestats.py
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
import argparse
5
6
import pkg_resources
7
import singlecellmultiomics.barcodeFileParser.barcodeFileParser as barcodeFileParser
8
from singlecellmultiomics.modularDemultiplexer.demultiplexingStrategyLoader import DemultiplexingStrategyLoader
9
import singlecellmultiomics.libraryDetection.sequencingLibraryListing as sequencingLibraryListing
10
import glob
11
import fnmatch
12
import os
13
from types import SimpleNamespace
14
15
if __name__ == '__main__':
16
    barcode_dir = pkg_resources.resource_filename(
17
        'singlecellmultiomics', 'modularDemultiplexer/barcodes/')
18
    index_dir = pkg_resources.resource_filename(
19
        'singlecellmultiomics', 'modularDemultiplexer/indices/')
20
21
    barcode_parser = barcodeFileParser.BarcodeParser(
22
        hammingDistanceExpansion=0, barcodeDirectory=barcode_dir)
23
    index_parser = barcodeFileParser.BarcodeParser(
24
        hammingDistanceExpansion=1, barcodeDirectory=index_dir)
25
26
    dmx = DemultiplexingStrategyLoader(barcodeParser=barcode_parser,
27
                                       indexParser=index_parser,
28
                                       indexFileAlias=None)
29
30
    argparser = argparse.ArgumentParser(
31
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
32
        description="""Check multiplexability of many fastq files""")
33
34
    argparser.add_argument('-locations', default='.')
35
36
    arguments = argparser.parse_args()
37
    sequencing_dirs = arguments.locations.split(',')
38
    dmxes = sorted([x.shortName for x in dmx.demultiplexingStrategies])
39
    #print('\t'.join(['RUN', 'SEQ', 'AVO', 'INDEX', 'LIBRARY'] + dmxes))
40
41
    matches = []
42
    for sdir in sequencing_dirs:
43
        for root, dirnames, filenames in os.walk(sdir):
44
            for d in dirnames:
45
                try:
46
                    fp = os.path.join(root, d)
47
                    #print('   ' + fp)
48
                    if len(list(glob.glob(fp + '/*.fastq.gz'))
49
                           ) and d != 'BaseCalls':
50
                        matches.append(fp)
51
                        fastqfiles = list(glob.glob(fp + '/*.fastq.gz'))
52
53
                        args = SimpleNamespace(
54
                            replace=None,
55
                            fastqfiles=fastqfiles,
56
                            slib=None,
57
                            merge='_',
58
                            dsize=10000,
59
                            se=False,
60
                            ignore=True,
61
                            maxAutoDetectMethods=100,
62
                            minAutoDetectPct=1)
63
                        libraries = sequencingLibraryListing.SequencingLibraryLister(
64
                            verbose=False).detect(fastqfiles, args=args)
65
66
                        processedReadPairs, strategyYieldsForAllLibraries = dmx.detectLibYields(
67
                            libraries, testReads=args.dsize, maxAutoDetectMethods=args.maxAutoDetectMethods, minAutoDetectPct=args.minAutoDetectPct, verbose=False)
68
69
                        #print(strategyYieldsForAllLibraries)
70
71
                        if False:
72
                            for library, associated_fastqs_lane in libraries.items():
73
                                # Obtain run id
74
                                run_id = '?'
75
                                seqid = '?'
76
                                index = '?'
77
                                avo_id = '?'
78
                                i = None
79
                                dpos = None
80
                                for lane, reads in associated_fastqs_lane.items():
81
                                    parts = os.path.dirname(
82
                                        reads['R1'][0]).split('/')
83
84
                                    if 'Data' in parts:
85
                                        try:
86
                                            i = parts.index('Data')
87
                                        except Exception as e:
88
                                            i = -2
89
90
                                        try:
91
                                            dpos = parts.index('BaseCalls')
92
                                        except Exception as e:
93
                                            dpos = -1
94
                                            pass
95
96
                                        try:
97
                                            run_id = parts[i - 2]
98
                                        except Exception as e:
99
                                            pass
100
101
                                        try:
102
                                            seqid = parts[i - 1]
103
                                        except Exception as e:
104
                                            pass
105
106
                                        try:
107
                                            index = parts[dpos + 2]
108
                                        except Exception as e:
109
                                            pass
110
111
                                        try:
112
                                            avo_id = parts[dpos + 1]
113
                                        except Exception as e:
114
                                            pass
115
116
                                        else:
117
                                            try:
118
                                                avo_id = parts[-2]
119
                                            except Exception as e:
120
                                                pass
121
122
                                            try:
123
                                                index = parts[-1]
124
                                            except Exception as e:
125
                                                pass
126
127
                                        break
128
129
130
131
                        for library in libraries:
132
                            processedReadPairs = strategyYieldsForAllLibraries[library]['processedReadPairs']
133
                            strategyYieldForLibrary = strategyYieldsForAllLibraries[library]['strategyYields']
134
                            selectedStrategies = dmx.selectedStrategiesBasedOnYield(
135
                                processedReadPairs,
136
                                strategyYieldForLibrary,
137
                                maxAutoDetectMethods=args.maxAutoDetectMethods,
138
                                minAutoDetectPct=args.minAutoDetectPct)
139
                            selectedStrategies = dmx.getSelectedStrategiesFromStringList(
140
                                selectedStrategies, verbose=False)
141
142
                            print(library,selectedStrategies[0].shortName)
143
144
                        #print('\t'.join([run_id, seqid, avo_id, index, library] + [str(
145
                        #    strategyYieldsForAllLibraries[library]['strategyYields'].get(x, 0) ) for x in dmxes]))
146
                except Exception as e:
147
                    raise
148
                    #print(e)