[9b26b7]: / deepvariant / make_examples.py

Download this file

235 lines (202 with data), 8.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# Copyright 2017 Google LLC.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Step one of DeepVariant: creates tf.Example protos for training/calling."""
import os
from absl import app
from absl import flags
from deepvariant import dv_constants
from deepvariant import logging_level
from deepvariant import make_examples_core
from deepvariant import make_examples_options
from deepvariant.protos import deepvariant_pb2
from third_party.nucleus.io.python import hts_verbose
from third_party.nucleus.util import errors
from third_party.nucleus.util import proto_utils
# Sentinel command line flag value indicating no downsampling should occur.
NO_DOWNSAMPLING = 0.0
MAIN_SAMPLE_INDEX = 0 # 0 is the only sample.
FLAGS = flags.FLAGS
# Adopt more general flags from make_examples_options.
flags.adopt_module_key_flags(make_examples_options)
# Flags related to the sample in DeepVariant:
READS_ = flags.DEFINE_string(
'reads',
None,
(
'Required. Aligned, sorted, indexed BAM file containing the reads we'
' want to call. Should be aligned to a reference genome compatible with'
' --ref. Can provide multiple BAMs (comma-separated).'
),
)
SAMPLE_NAME_ = flags.DEFINE_string(
'sample_name',
'',
(
'Sample name to use for our sample_name in the output'
' Variant/DeepVariantCall protos. If not specified, will be inferred'
' from the header information from --reads.'
),
)
PILEUP_IMAGE_HEIGHT_ = flags.DEFINE_integer(
'pileup_image_height',
0,
'Height for the pileup image. If 0, uses the default height',
)
DOWNSAMPLE_FRACTION_ = flags.DEFINE_float(
'downsample_fraction',
NO_DOWNSAMPLING,
(
f'If not {NO_DOWNSAMPLING} must be a value between 0.0 and 1.0. Reads'
' will be kept (randomly) with a probability of downsample_fraction'
' from the input BAM. This argument makes it easy to create examples'
' as though the input BAM had less coverage.'
),
)
PROPOSED_VARIANTS_ = flags.DEFINE_string(
'proposed_variants',
'',
(
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer`. The GTs will be ignored.'
),
)
CANDIDATE_POSITIONS_ = flags.DEFINE_string(
'candidate_positions',
None,
'Path to the binary file containing candidate positions.',
)
def one_sample_from_flags(add_flags=True, flags_obj=None):
"""Collect sample-related options into a list of samples."""
# Sample-specific options.
sample_name = make_examples_core.assign_sample_name(
sample_name_flag=SAMPLE_NAME_.value, reads_filenames=READS_.value
)
sample_options = deepvariant_pb2.SampleOptions(
role='main_sample',
name=sample_name,
variant_caller_options=make_examples_core.make_vc_options(
sample_name=sample_name, flags_obj=flags_obj
),
order=[0],
pileup_height=dv_constants.PILEUP_DEFAULT_HEIGHT,
)
if add_flags:
if READS_.value:
sample_options.reads_filenames.extend(READS_.value.split(','))
if DOWNSAMPLE_FRACTION_.value != NO_DOWNSAMPLING:
sample_options.downsample_fraction = DOWNSAMPLE_FRACTION_.value
if PILEUP_IMAGE_HEIGHT_.value:
sample_options.pileup_height = PILEUP_IMAGE_HEIGHT_.value
if PROPOSED_VARIANTS_.value:
sample_options.proposed_variants_filename = PROPOSED_VARIANTS_.value
if CANDIDATE_POSITIONS_.value:
sample_options.candidate_positions = CANDIDATE_POSITIONS_.value
samples_in_order = [sample_options]
sample_role_to_train = sample_options.role
return samples_in_order, sample_role_to_train
def default_options(add_flags=True, flags_obj=None):
"""Creates a MakeExamplesOptions proto populated with reasonable defaults.
Args:
add_flags: bool. defaults to True. If True, we will push the value of
certain FLAGS into our options. If False, those option fields are left
uninitialized.
flags_obj: object. If not None, use as the source of flags, else use global
FLAGS.
Returns:
deepvariant_pb2.MakeExamplesOptions protobuf.
Raises:
ValueError: If we observe invalid flag values.
"""
if not flags_obj:
flags_obj = FLAGS
samples_in_order, sample_role_to_train = one_sample_from_flags(
add_flags=add_flags, flags_obj=flags_obj
)
options = make_examples_options.shared_flags_to_options(
add_flags=add_flags,
flags_obj=flags_obj,
samples_in_order=samples_in_order,
sample_role_to_train=sample_role_to_train,
main_sample_index=MAIN_SAMPLE_INDEX,
)
if add_flags:
options.bam_fname = os.path.basename(READS_.value)
return options
def check_options_are_valid(options):
"""Checks that all the options chosen make sense together."""
# Check for general flags (shared for DeepVariant and DeepTrio).
make_examples_options.check_options_are_valid(
options, main_sample_index=MAIN_SAMPLE_INDEX
)
main_sample = options.sample_options[MAIN_SAMPLE_INDEX]
if (
options.mode == deepvariant_pb2.MakeExamplesOptions.CANDIDATE_SWEEP
and main_sample.candidate_positions is None
):
errors.log_and_raise(
'--candidate_positions is required when --positions_sweep is set.'
)
if (
options.mode == deepvariant_pb2.MakeExamplesOptions.CANDIDATE_SWEEP
and main_sample.proposed_variants_filename
):
errors.log_and_raise(
'--positions_sweep_mode is incompatible with --proposed_variants'
)
if main_sample.candidate_positions and main_sample.proposed_variants_filename:
errors.log_and_raise(
'--candidate_positions is incompatible with --proposed_variants'
)
def main(argv=()):
with errors.clean_commandline_error_exit():
if len(argv) > 1:
errors.log_and_raise(
'Command line parsing failure: make_examples does not accept '
'positional arguments but some are present on the command line: '
'"{}".'.format(str(argv)),
errors.CommandLineError,
)
del argv # Unused.
proto_utils.uses_fast_cpp_protos_or_die()
logging_level.set_from_flag()
hts_verbose.set(hts_verbose.htsLogLevel[FLAGS.hts_logging_level])
# Set up options; may do I/O.
options = default_options(add_flags=True, flags_obj=FLAGS)
check_options_are_valid(options)
# Run!
make_examples_core.make_examples_runner(options)
if __name__ == '__main__':
flags.mark_flags_as_required([
'examples',
'mode',
'reads',
'ref',
])
app.run(main)