[9b26b7]: / third_party / nucleus / io / gff_reader.h

Download this file

120 lines (98 with data), 4.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
* Copyright 2018 Google LLC.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef THIRD_PARTY_NUCLEUS_IO_GFF_READER_H_
#define THIRD_PARTY_NUCLEUS_IO_GFF_READER_H_
#include <memory>
#include <string>
#include "third_party/nucleus/io/reader_base.h"
#include "third_party/nucleus/io/text_reader.h"
#include "third_party/nucleus/platform/types.h"
#include "third_party/nucleus/protos/gff.pb.h"
#include "third_party/nucleus/core/status.h"
#include "third_party/nucleus/core/statusor.h"
namespace nucleus {
const nucleus::genomics::v1::GffReaderOptions kDefaultGffReaderOptions{};
// Alias for the abstract base class for GFF record iterables.
using GffIterable = Iterable<nucleus::genomics::v1::GffRecord>;
class GffReader : public Reader {
public:
// Creates a new GffReader reading reads from the GFF file gff_path.
//
// gff_path must point to an existing GFF formatted file (or gzipped
// equivalent).
//
// The GFF format is described here:
// https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
//
// Returns a StatusOr that is OK if the GffReader could be successfully
// created or an error code indicating the error that occurred.
static StatusOr<std::unique_ptr<GffReader>> FromFile(
const string& gff_path,
const nucleus::genomics::v1::GffReaderOptions& options =
kDefaultGffReaderOptions);
~GffReader() = default;
// Disable copy and assignment operations.
GffReader(const GffReader& other) = delete;
GffReader& operator=(const GffReader&) = delete;
// Gets all of the GFF records in this file in order.
// Returns an OK status if the iterable can be constructed, or not
// OK otherwise. Iteration is over proto records of type
// nucleus.genomics.v1.GffRecord
StatusOr<std::shared_ptr<GffIterable>> Iterate() const;
// Closes the underlying resource descriptors. Returns a Status to
// indicate if everything went OK with the close.
::nucleus::Status Close();
// This no-op function is needed only for Python context manager support.
void PythonEnter() const {}
// Get the options controlling the behavior of this GffReader.
const nucleus::genomics::v1::GffReaderOptions& Options() const {
return options_;
}
// Returns the header that tracks the number of fields in each record in the
// reader.
const nucleus::genomics::v1::GffHeader& Header() const { return header_; }
private:
// Private constructor used by FromFile factory.
GffReader(std::unique_ptr<TextReader> text_reader,
const nucleus::genomics::v1::GffReaderOptions& options,
const nucleus::genomics::v1::GffHeader& header);
// A pointer to a raw TextReader object.
std::unique_ptr<TextReader> text_reader_;
// Options controlling the behavior of this class.
const nucleus::genomics::v1::GffReaderOptions options_;
// The GFF header, reflecting how to interpret fields.
const nucleus::genomics::v1::GffHeader header_;
// Allow iteration to access the underlying reader.
friend class GffFullFileIterable;
};
} // namespace nucleus
#endif // THIRD_PARTY_NUCLEUS_IO_GFF_READER_H_