--- a
+++ b/deepvariant/make_examples_somatic.py
@@ -0,0 +1,321 @@
+# Copyright 2021 Google LLC.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from this
+#    software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+"""A prototype to create tumor-normal images (tf.Example protos)."""
+
+import logging
+import os
+
+from absl import app
+from absl import flags
+
+from deepvariant import dv_constants
+from deepvariant import logging_level
+from deepvariant import make_examples_core
+from deepvariant import make_examples_options
+from deepvariant.protos import deepvariant_pb2
+from third_party.nucleus.io.python import hts_verbose
+from third_party.nucleus.util import errors
+from third_party.nucleus.util import proto_utils
+
+# Sentinel command line flag value indicating no downsampling should occur.
+NO_DOWNSAMPLING = 0.0
+
+# 1 is the tumor, 0 is the normal match.
+# Tumor sample is the "main" sample because the goal here is somatic calling.
+NORMAL_SAMPLE_INDEX = (
+    0  # If the normal sample is present, it will appear first.
+)
+
+FLAGS = flags.FLAGS
+
+# Adopt more general flags from make_examples_options.
+flags.adopt_module_key_flags(make_examples_options)
+
+# Flags related to samples in DeepSomatic:
+_READS_TUMOR = flags.DEFINE_string(
+    'reads_tumor',
+    None,
+    (
+        'Required. Reads from the tumor sample. '
+        'Aligned, sorted, indexed BAM file. '
+        'Should be aligned to a reference genome compatible with --ref. '
+        'Can provide multiple BAMs (comma-separated).'
+    ),
+)
+_READS_NORMAL = flags.DEFINE_string(
+    'reads_normal',
+    None,
+    (
+        'Required. Reads from the normal matched sample. '
+        'Aligned, sorted, indexed BAM file. '
+        'Should be aligned to a reference genome compatible with --ref. '
+        'Can provide multiple BAMs (comma-separated).'
+    ),
+)
+_SAMPLE_NAME_TUMOR = flags.DEFINE_string(
+    'sample_name_tumor',
+    '',
+    (
+        'Sample name for tumor to use for our sample_name in the output'
+        ' Variant/DeepVariantCall protos. If not specified, will be inferred'
+        ' from the header information from --reads_tumor.'
+    ),
+)
+_SAMPLE_NAME_NORMAL = flags.DEFINE_string(
+    'sample_name_normal',
+    '',
+    (
+        'Sample name for normal match to use for our sample_name in the output'
+        ' Variant/DeepVariantCall protos. If not specified, will be inferred'
+        ' from the header information from --reads_normal.'
+    ),
+)
+_DOWNSAMPLE_FRACTION_TUMOR = flags.DEFINE_float(
+    'downsample_fraction_tumor',
+    NO_DOWNSAMPLING,
+    'If not '
+    + str(NO_DOWNSAMPLING)
+    + ' must be a value between 0.0 and 1.0. '
+    'Reads will be kept (randomly) with a probability of downsample_fraction '
+    'from the input tumor sample BAM. This argument makes it easy to create '
+    'examples as though the input BAM had less coverage.',
+)
+_DOWNSAMPLE_FRACTION_NORMAL = flags.DEFINE_float(
+    'downsample_fraction_normal',
+    NO_DOWNSAMPLING,
+    'If not '
+    + str(NO_DOWNSAMPLING)
+    + ' must be a value between 0.0 and 1.0. '
+    'Reads will be kept (randomly) with a probability of downsample_fraction '
+    'from the input normal matched BAMs. This argument makes it easy to create '
+    'examples as though the input BAMs had less coverage.',
+)
+_PILEUP_IMAGE_HEIGHT_TUMOR = flags.DEFINE_integer(
+    'pileup_image_height_tumor',
+    dv_constants.PILEUP_DEFAULT_HEIGHT,
+    (
+        'Height for the part of the pileup image showing reads from the tumor. '
+        f'Uses {dv_constants.PILEUP_DEFAULT_HEIGHT} by default.'
+    ),
+)
+_PILEUP_IMAGE_HEIGHT_NORMAL = flags.DEFINE_integer(
+    'pileup_image_height_normal',
+    dv_constants.PILEUP_DEFAULT_HEIGHT,
+    (
+        'Height for the part of the pileup image showing reads from the matched'
+        f' normal. Uses {dv_constants.PILEUP_DEFAULT_HEIGHT} by default.'
+    ),
+)
+_CANDIDATE_POSITIONS = flags.DEFINE_string(
+    'candidate_positions',
+    '',
+    'Path to the binary file containing candidate positions.',
+)
+
+# Change any flag defaults that differ for DeepSomatic.
+# I'm setting this to float('inf') because we don't want to include any
+# candidates from the non-target (i.e., normal) sample.
+FLAGS.set_default('vsc_min_fraction_multiplier', float('inf'))
+
+
+def tumor_normal_samples_from_flags(flags_obj):
+  """Collects sample-related options into a list of samples."""
+  samples_in_order = []
+
+  def setup_sample(role):
+    pileup_height = (
+        flags_obj.pileup_image_height_tumor
+        if role == 'tumor'
+        else flags_obj.pileup_image_height_normal
+    )
+    read_filenames = (
+        flags_obj.reads_tumor if role == 'tumor' else flags_obj.reads_normal
+    )
+    sample_name_flag = (
+        flags_obj.sample_name_tumor
+        if role == 'tumor'
+        else flags_obj.sample_name_normal
+    )
+    reads_filenames_split = read_filenames.split(',')
+    sample_name = make_examples_core.assign_sample_name(
+        sample_name_flag=sample_name_flag,
+        reads_filenames=read_filenames,
+    )
+    skip_output_generation = True if role == 'normal' else False
+
+    sample_options = deepvariant_pb2.SampleOptions(
+        role=role,
+        name=sample_name,
+        variant_caller_options=make_examples_core.make_vc_options(
+            sample_name=sample_name, flags_obj=flags_obj
+        ),
+        skip_output_generation=skip_output_generation,
+        pileup_height=pileup_height,
+        reads_filenames=reads_filenames_split,
+    )
+    if role == 'tumor':
+      if flags_obj.reads_normal and flags_obj.reads_tumor:
+        sample_options.order.extend([0, 1])
+      else:
+        sample_options.order.extend([0])
+
+    downsample_fraction = (
+        flags_obj.downsample_fraction_tumor
+        if role == 'tumor'
+        else flags_obj.downsample_fraction_normal
+    )
+    if downsample_fraction != NO_DOWNSAMPLING:
+      sample_options.downsample_fraction = downsample_fraction
+    samples_in_order.append(sample_options)
+
+  if flags_obj.reads_normal:
+    setup_sample('normal')
+
+  setup_sample('tumor')
+
+  return samples_in_order, 'tumor'
+
+
+def default_options(main_sample_index, add_flags=True, flags_obj=None):
+  """Creates a MakeExamplesOptions proto populated with reasonable defaults.
+
+  Args:
+    main_sample_index: int. Indicates the position of the tumor sample.
+    add_flags: bool. defaults to True. If True, we will push the value of
+      certain FLAGS into our options. If False, those option fields are left
+      uninitialized.
+    flags_obj: object.  If not None, use as the source of flags, else use global
+      FLAGS.
+
+  Returns:
+    deepvariant_pb2.MakeExamplesOptions protobuf.
+
+  Raises:
+    ValueError: If we observe invalid flag values.
+  """
+  if not flags_obj:
+    flags_obj = FLAGS
+
+  samples_in_order, sample_role_to_train = tumor_normal_samples_from_flags(
+      flags_obj=flags_obj
+  )
+  samples_in_order[main_sample_index].candidate_positions = (
+      flags_obj.candidate_positions
+  )
+
+  options = make_examples_options.shared_flags_to_options(
+      add_flags=add_flags,
+      flags_obj=flags_obj,
+      samples_in_order=samples_in_order,
+      sample_role_to_train=sample_role_to_train,
+      main_sample_index=main_sample_index,
+  )
+
+  if _READS_NORMAL.value:
+    options.bam_fname = f'{os.path.basename(flags_obj.reads_tumor)}|{os.path.basename(flags_obj.reads_normal)}'
+  else:
+    options.bam_fname = os.path.basename(flags_obj.reads_tumor)
+
+  return options
+
+
+def check_options_are_valid(options, main_sample_index):
+  """Checks that all the options chosen make sense together."""
+
+  # Check for general flags (shared for DeepVariant and DeepTrio).
+  make_examples_options.check_options_are_valid(
+      options, main_sample_index=main_sample_index
+  )
+
+  tumor = options.sample_options[main_sample_index]
+  if _READS_NORMAL.value:
+    normal = options.sample_options[NORMAL_SAMPLE_INDEX]
+
+    if (
+        tumor.variant_caller_options.sample_name
+        == normal.variant_caller_options.sample_name
+    ):
+      errors.log_and_raise(
+          (
+              'Sample names of tumor and normal samples cannot be the same. Use'
+              ' --sample_name_tumor and --sample_name_normal with different'
+              ' names '
+          ),
+          errors.CommandLineError,
+      )
+
+  if options.sample_options[main_sample_index].candidate_positions:
+    if options.max_reads_per_partition:
+      logging.warning(
+          'Since candidate_positions is set, we use '
+          'max_reads_for_dynamic_bases_per_region instead of '
+          'max_reads_per_partition. This is due to the dynamic nature of the '
+          'partition size when candidate_positions is enabled, making a fixed '
+          'max_reads_per_partition unsuitable.'
+      )
+      options.max_reads_for_dynamic_bases_per_region = (
+          options.max_reads_per_partition
+      )
+      options.max_reads_per_partition = 0
+
+
+def main(argv=()):
+  with errors.clean_commandline_error_exit():
+    if len(argv) > 1:
+      errors.log_and_raise(
+          'Command line parsing failure: make_examples does not accept '
+          'positional arguments but some are present on the command line: '
+          '"{}".'.format(str(argv)),
+          errors.CommandLineError,
+      )
+    del argv  # Unused.
+
+    proto_utils.uses_fast_cpp_protos_or_die()
+
+    logging_level.set_from_flag()
+    hts_verbose.set(hts_verbose.htsLogLevel.HTS_LOG_WARNING)
+
+    # Set up options; may do I/O.
+    is_tumor_only = _READS_TUMOR.value and not _READS_NORMAL.value
+    main_sample_index = 0 if is_tumor_only else 1
+    options = default_options(main_sample_index, flags_obj=FLAGS)
+    check_options_are_valid(options, main_sample_index)
+
+    # Run!
+    make_examples_core.make_examples_runner(options)
+
+
+if __name__ == '__main__':
+  flags.mark_flags_as_required([
+      'examples',
+      'mode',
+      'reads_tumor',
+      'ref',
+  ])
+  app.run(main)