[9b26b7]: / deepvariant / realigner / python / ssw.clif

Download this file

91 lines (77 with data), 4.4 kB

# Copyright 2017 Google LLC.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from this
#    software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from third_party.nucleus.io.clif_postproc import ValueErrorOnInaccurate

from "deepvariant/realigner/ssw.h":

  namespace `learning::genomics::deepvariant`:

    class Alignment:
      # The best alignment score
      sw_score: int
      sw_score_next_best: int # The next best alignment score
      ref_begin: int          # Reference begin position of the best alignment
      ref_end: int            # Reference end position of the best alignment
      query_begin: int        # Query begin position of the best alignment
      query_end: int          # Query end position of the best alignment
      ref_end_next_best: int  # Reference end position of next best alignment
      mismatches: int         # Number of mismatches of the alignment
      cigar_string: bytes     # Cigar string of the best alignment


    # TODO: make use of score_filter.
    class Filter:
      report_begin_position: bool  # Give ref_begin and query_begin.
                                   #   If it is not set, ref_begin and
                                   #   query_begin are -1.
      report_cigar: bool           # Give cigar_string and cigar.
                                   #   report_begin_position is automatically
                                   #   TRUE.

      # When *report_cigar* is true and alignment passes these two filters,
      # cigar_string and cigar will be given.
      score_filter: int         # score >= score_filter
      distance_filter: int      # ((ref_end - ref_begin) < distance_filter) &&
                                # ((query_end - read_begin) < distance_filter)

      # Wrapper for the constructor with arguments.
      @add__init__
      def `ConstructFromValues` as construct(self, pos: bool, cigar: bool,
                                             score: int, dis: int)


    # Wrap for a *subset* of the functionality in the Aligner class.  In
    # particular, we haven't wrapped support for custom score matrices or
    # non-ATGCN alphabets, nor the "rebuild" functions.
    class Aligner:

      # Wrap for the constructor with arguments.
      @add__init__
      def `ConstructWithParameters` as construct(self,
                                                 match_score: int,
                                                 mismatch_penalty: int,
                                                 gap_opening_penalty: int,
                                                 gap_extending_penalty: int)

      def `SetReferenceSequence` as set_reference_sequence(self,
                                                           seq: bytes) -> int

      # Align query against previously set reference sequence.
      # Prefer this method if aligning many queries to a single reference.
      def `Align` as align(self, query: bytes, filter: Filter)
          -> (accuracy: int, alignment: Alignment):
          return ValueErrorOnInaccurate(...)