91 lines (77 with data), 4.4 kB
# Copyright 2017 Google LLC.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from third_party.nucleus.io.clif_postproc import ValueErrorOnInaccurate
from "deepvariant/realigner/ssw.h":
namespace `learning::genomics::deepvariant`:
class Alignment:
# The best alignment score
sw_score: int
sw_score_next_best: int # The next best alignment score
ref_begin: int # Reference begin position of the best alignment
ref_end: int # Reference end position of the best alignment
query_begin: int # Query begin position of the best alignment
query_end: int # Query end position of the best alignment
ref_end_next_best: int # Reference end position of next best alignment
mismatches: int # Number of mismatches of the alignment
cigar_string: bytes # Cigar string of the best alignment
# TODO: make use of score_filter.
class Filter:
report_begin_position: bool # Give ref_begin and query_begin.
# If it is not set, ref_begin and
# query_begin are -1.
report_cigar: bool # Give cigar_string and cigar.
# report_begin_position is automatically
# TRUE.
# When *report_cigar* is true and alignment passes these two filters,
# cigar_string and cigar will be given.
score_filter: int # score >= score_filter
distance_filter: int # ((ref_end - ref_begin) < distance_filter) &&
# ((query_end - read_begin) < distance_filter)
# Wrapper for the constructor with arguments.
@add__init__
def `ConstructFromValues` as construct(self, pos: bool, cigar: bool,
score: int, dis: int)
# Wrap for a *subset* of the functionality in the Aligner class. In
# particular, we haven't wrapped support for custom score matrices or
# non-ATGCN alphabets, nor the "rebuild" functions.
class Aligner:
# Wrap for the constructor with arguments.
@add__init__
def `ConstructWithParameters` as construct(self,
match_score: int,
mismatch_penalty: int,
gap_opening_penalty: int,
gap_extending_penalty: int)
def `SetReferenceSequence` as set_reference_sequence(self,
seq: bytes) -> int
# Align query against previously set reference sequence.
# Prefer this method if aligning many queries to a single reference.
def `Align` as align(self, query: bytes, filter: Filter)
-> (accuracy: int, alignment: Alignment):
return ValueErrorOnInaccurate(...)