[9b26b7]: / third_party / nucleus / io / vcf_writer.h

Download this file

135 lines (114 with data), 5.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*
* Copyright 2018 Google LLC.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef THIRD_PARTY_NUCLEUS_IO_VCF_WRITER_H_
#define THIRD_PARTY_NUCLEUS_IO_VCF_WRITER_H_
#include <memory>
#include <string>
#include "htslib/hts.h"
#include "htslib/sam.h"
#include "htslib/vcf.h"
#include "third_party/nucleus/io/vcf_conversion.h"
#include "third_party/nucleus/platform/types.h"
#include "third_party/nucleus/protos/range.pb.h"
#include "third_party/nucleus/protos/variants.pb.h"
#include "third_party/nucleus/util/proto_ptr.h"
#include "third_party/nucleus/core/status.h"
#include "third_party/nucleus/core/statusor.h"
namespace nucleus {
// A VCF writer, allowing us to write VCF files.
class VcfWriter {
public:
// Creates a new VcfWriter writing to the file at variants_path, which is
// opened and created if needed. Returns either a unique_ptr to the VcfWriter
// or a Status indicating why an error occurred.
static StatusOr<std::unique_ptr<VcfWriter>> ToFile(
const string& variants_path,
const nucleus::genomics::v1::VcfHeader& header,
const nucleus::genomics::v1::VcfWriterOptions& options);
~VcfWriter();
// Disable copy or assignment
VcfWriter(const VcfWriter& other) = delete;
VcfWriter& operator=(const VcfWriter&) = delete;
// Write a variant record to the VCF.
// Note that variant calls must be provided in the same order as samples
// listed in the options. Returns Status::OK() if the write was successful;
// otherwise the status provides information about what error occurred.
::nucleus::Status Write(
const nucleus::genomics::v1::Variant& variant_message);
::nucleus::Status WritePython(
const ConstProtoPtr<const nucleus::genomics::v1::Variant>& wrapped) {
return Write(*(wrapped.p_));
}
// These API extends Write() method to enable somatic processing.
::nucleus::Status WriteSomatic(
const nucleus::genomics::v1::Variant& variant_message);
::nucleus::Status WriteSomaticPython(
const ConstProtoPtr<const nucleus::genomics::v1::Variant>& wrapped) {
return WriteSomatic(*(wrapped.p_));
}
// Close the underlying resource descriptors. Returns Status::OK() if the
// close was successful; otherwise the status provides information about what
// error occurred.
::nucleus::Status Close();
// This no-op function is needed only for Python context manager support. Do
// not use it!
void PythonEnter() const {}
// Access to the record converter.
const VcfRecordConverter& RecordConverter() const {
return record_converter_;
}
// Infers htsFile open mode from the given file path.
// Returns one of the following.
// "wb" for compressed BCF if path ends with .bcf.gz;
// "wbu" for uncompressed BCF if path ends with .bcf;
// "wz" for compressed VCF if file path ends with .gz and not .bcf.gz;
// "w" for all other cases default to uncompressed VCF.
// This method is public for testing.
static const char* GetOpenMode(const string& file_path);
private:
VcfWriter(const nucleus::genomics::v1::VcfHeader& header,
const nucleus::genomics::v1::VcfWriterOptions& options,
htsFile* fp);
::nucleus::Status WriteHeader();
// A pointer to the htslib file used to write the VCF data.
htsFile* fp_;
// The options controlling the behavior of this VcfWriter.
const nucleus::genomics::v1::VcfWriterOptions options_;
// The VcfHeader proto representation of the VCF header.
const nucleus::genomics::v1::VcfHeader vcf_header_;
// A pointer to the VCF header object.
bcf_hdr_t* header_;
// VCF record interconverter.
VcfRecordConverter record_converter_;
};
} // namespace nucleus
#endif // THIRD_PARTY_NUCLEUS_IO_VCF_WRITER_H_