Switch to side-by-side view

--- a
+++ b/third_party/nucleus/protos/cigar.proto
@@ -0,0 +1,94 @@
+// Copyright 2018 Google LLC.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+syntax = "proto3";
+
+package nucleus.genomics.v1;
+
+// A single CIGAR operation.
+message CigarUnit {
+  // Describes the different types of CIGAR alignment operations that exist.
+  // Used wherever CIGAR alignments are used.
+  enum Operation {
+    OPERATION_UNSPECIFIED = 0;
+    // An alignment match indicates that a sequence can be aligned to the
+    // reference without evidence of an INDEL. Unlike the
+    // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
+    // the `ALIGNMENT_MATCH` operator does not indicate whether the
+    // reference and read sequences are an exact match. This operator is
+    // equivalent to SAM's `M`.
+    ALIGNMENT_MATCH = 1;
+    // The insert operator indicates that the read contains evidence of bases
+    // being inserted into the reference. This operator is equivalent to SAM's
+    // `I`.
+    INSERT = 2;
+    // The delete operator indicates that the read contains evidence of bases
+    // being deleted from the reference. This operator is equivalent to SAM's
+    // `D`.
+    DELETE = 3;
+    // The skip operator indicates that this read skips a long segment of the
+    // reference, but the bases have not been deleted. This operator is commonly
+    // used when working with RNA-seq data, where reads may skip long segments
+    // of the reference between exons. This operator is equivalent to SAM's
+    // `N`.
+    SKIP = 4;
+    // The soft clip operator indicates that bases at the start/end of a read
+    // have not been considered during alignment. This may occur if the majority
+    // of a read maps, except for low quality bases at the start/end of a read.
+    // This operator is equivalent to SAM's `S`. Bases that are soft
+    // clipped will still be stored in the read.
+    CLIP_SOFT = 5;
+    // The hard clip operator indicates that bases at the start/end of a read
+    // have been omitted from this alignment. This may occur if this linear
+    // alignment is part of a chimeric alignment, or if the read has been
+    // trimmed (for example, during error correction or to trim poly-A tails for
+    // RNA-seq). This operator is equivalent to SAM's `H`.
+    CLIP_HARD = 6;
+    // The pad operator indicates that there is padding in an alignment. This
+    // operator is equivalent to SAM's `P`.
+    PAD = 7;
+    // This operator indicates that this portion of the aligned sequence exactly
+    // matches the reference. This operator is equivalent to SAM's `=`.
+    SEQUENCE_MATCH = 8;
+    // This operator indicates that this portion of the aligned sequence is an
+    // alignment match to the reference, but a sequence mismatch. This can
+    // indicate a SNP or a read error. This operator is equivalent to SAM's
+    // `X`.
+    SEQUENCE_MISMATCH = 9;
+  }
+  Operation operation = 1;
+
+  // The number of genomic bases that the operation runs for. Required.
+  int64 operation_length = 2;
+
+  // `referenceSequence` is only used at mismatches
+  // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
+  // Filling this field replaces SAM's MD tag. If the relevant information is
+  // not available, this field is unset.
+  string reference_sequence = 3;
+}