Download this file

149 lines (121 with data), 6.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import pkg_resources
import singlecellmultiomics.barcodeFileParser.barcodeFileParser as barcodeFileParser
from singlecellmultiomics.modularDemultiplexer.demultiplexingStrategyLoader import DemultiplexingStrategyLoader
import singlecellmultiomics.libraryDetection.sequencingLibraryListing as sequencingLibraryListing
import glob
import fnmatch
import os
from types import SimpleNamespace
if __name__ == '__main__':
barcode_dir = pkg_resources.resource_filename(
'singlecellmultiomics', 'modularDemultiplexer/barcodes/')
index_dir = pkg_resources.resource_filename(
'singlecellmultiomics', 'modularDemultiplexer/indices/')
barcode_parser = barcodeFileParser.BarcodeParser(
hammingDistanceExpansion=0, barcodeDirectory=barcode_dir)
index_parser = barcodeFileParser.BarcodeParser(
hammingDistanceExpansion=1, barcodeDirectory=index_dir)
dmx = DemultiplexingStrategyLoader(barcodeParser=barcode_parser,
indexParser=index_parser,
indexFileAlias=None)
argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""Check multiplexability of many fastq files""")
argparser.add_argument('-locations', default='.')
arguments = argparser.parse_args()
sequencing_dirs = arguments.locations.split(',')
dmxes = sorted([x.shortName for x in dmx.demultiplexingStrategies])
#print('\t'.join(['RUN', 'SEQ', 'AVO', 'INDEX', 'LIBRARY'] + dmxes))
matches = []
for sdir in sequencing_dirs:
for root, dirnames, filenames in os.walk(sdir):
for d in dirnames:
try:
fp = os.path.join(root, d)
#print(' ' + fp)
if len(list(glob.glob(fp + '/*.fastq.gz'))
) and d != 'BaseCalls':
matches.append(fp)
fastqfiles = list(glob.glob(fp + '/*.fastq.gz'))
args = SimpleNamespace(
replace=None,
fastqfiles=fastqfiles,
slib=None,
merge='_',
dsize=10000,
se=False,
ignore=True,
maxAutoDetectMethods=100,
minAutoDetectPct=1)
libraries = sequencingLibraryListing.SequencingLibraryLister(
verbose=False).detect(fastqfiles, args=args)
processedReadPairs, strategyYieldsForAllLibraries = dmx.detectLibYields(
libraries, testReads=args.dsize, maxAutoDetectMethods=args.maxAutoDetectMethods, minAutoDetectPct=args.minAutoDetectPct, verbose=False)
#print(strategyYieldsForAllLibraries)
if False:
for library, associated_fastqs_lane in libraries.items():
# Obtain run id
run_id = '?'
seqid = '?'
index = '?'
avo_id = '?'
i = None
dpos = None
for lane, reads in associated_fastqs_lane.items():
parts = os.path.dirname(
reads['R1'][0]).split('/')
if 'Data' in parts:
try:
i = parts.index('Data')
except Exception as e:
i = -2
try:
dpos = parts.index('BaseCalls')
except Exception as e:
dpos = -1
pass
try:
run_id = parts[i - 2]
except Exception as e:
pass
try:
seqid = parts[i - 1]
except Exception as e:
pass
try:
index = parts[dpos + 2]
except Exception as e:
pass
try:
avo_id = parts[dpos + 1]
except Exception as e:
pass
else:
try:
avo_id = parts[-2]
except Exception as e:
pass
try:
index = parts[-1]
except Exception as e:
pass
break
for library in libraries:
processedReadPairs = strategyYieldsForAllLibraries[library]['processedReadPairs']
strategyYieldForLibrary = strategyYieldsForAllLibraries[library]['strategyYields']
selectedStrategies = dmx.selectedStrategiesBasedOnYield(
processedReadPairs,
strategyYieldForLibrary,
maxAutoDetectMethods=args.maxAutoDetectMethods,
minAutoDetectPct=args.minAutoDetectPct)
selectedStrategies = dmx.getSelectedStrategiesFromStringList(
selectedStrategies, verbose=False)
print(library,selectedStrategies[0].shortName)
#print('\t'.join([run_id, seqid, avo_id, index, library] + [str(
# strategyYieldsForAllLibraries[library]['strategyYields'].get(x, 0) ) for x in dmxes]))
except Exception as e:
raise
#print(e)