|
a |
|
b/neusomatic/include/targeted_loading.hpp |
|
|
1 |
#ifndef TARGET_LOADING_HPP |
|
|
2 |
#define TARGET_LOADING_HPP |
|
|
3 |
|
|
|
4 |
#include <vector> |
|
|
5 |
#include <map> |
|
|
6 |
#include <unordered_map> |
|
|
7 |
|
|
|
8 |
#include <SeqLib/BamReader.h> |
|
|
9 |
#include "Options.h" |
|
|
10 |
|
|
|
11 |
namespace neusomatic{ |
|
|
12 |
|
|
|
13 |
template<typename GInv, typename BamRecord, typename BamReader> |
|
|
14 |
class CaptureLayout { |
|
|
15 |
size_t region_idx_; |
|
|
16 |
const neusomatic::Options & opts_; |
|
|
17 |
BamReader bam_reader_; |
|
|
18 |
std::vector<GInv> ginvs_; |
|
|
19 |
//std::map <std::string, std::vector<GInv>> contig_ginv_; |
|
|
20 |
std::map <GInv, std::vector<BamRecord>> ginv_records_; |
|
|
21 |
|
|
|
22 |
public: |
|
|
23 |
template<typename GInvString> |
|
|
24 |
CaptureLayout(const std::string& bam_path, const std::vector<GInvString>& regions, const neusomatic::Options& o): |
|
|
25 |
region_idx_(0), opts_(o) { |
|
|
26 |
bam_reader_.Open(bam_path); |
|
|
27 |
LoadRegions_(regions); |
|
|
28 |
} |
|
|
29 |
|
|
|
30 |
decltype(auto) ginv_records() const { |
|
|
31 |
return (ginv_records_); |
|
|
32 |
} |
|
|
33 |
|
|
|
34 |
decltype(auto) Reader() const { |
|
|
35 |
return (bam_reader_); |
|
|
36 |
} |
|
|
37 |
|
|
|
38 |
size_t NumRegion() const { |
|
|
39 |
return ginvs_.size(); |
|
|
40 |
} |
|
|
41 |
|
|
|
42 |
bool NextRegion(const bool full_contained) { |
|
|
43 |
if (region_idx_ == ginvs_.size()) return false; |
|
|
44 |
ginv_records_.clear(); |
|
|
45 |
|
|
|
46 |
const GInv& curr_ginv = ginvs_[region_idx_]; |
|
|
47 |
SeqLib::GenomicRegion gr(curr_ginv.contig(), curr_ginv.left(), curr_ginv.right()); |
|
|
48 |
bam_reader_.SetRegion(gr); |
|
|
49 |
|
|
|
50 |
std::vector<BamRecord> records; |
|
|
51 |
BamRecord rec; |
|
|
52 |
while (bam_reader_.GetNextRecord(rec)) { |
|
|
53 |
bool good = true; |
|
|
54 |
if (full_contained) { |
|
|
55 |
if (rec.Position() > curr_ginv.left() || rec.PositionEnd() + 1 < curr_ginv.right()) { |
|
|
56 |
good = false; |
|
|
57 |
} |
|
|
58 |
} |
|
|
59 |
else { |
|
|
60 |
if (rec.Position() >= curr_ginv.right() || curr_ginv.left() > rec.PositionEnd()) { // overlapped |
|
|
61 |
good = false; |
|
|
62 |
} |
|
|
63 |
} |
|
|
64 |
|
|
|
65 |
if (rec.MapQuality() < opts_.min_mapq()) { // mapq filter |
|
|
66 |
good = false; |
|
|
67 |
} |
|
|
68 |
if (!opts_.include_secondary() && rec.SecondaryFlag()) { // secondary flag |
|
|
69 |
good = false; |
|
|
70 |
} |
|
|
71 |
if (opts_.filter_duplicate() && rec.DuplicateFlag()) { // duplicate flag |
|
|
72 |
good = false; |
|
|
73 |
} |
|
|
74 |
if (opts_.filter_QCfailed() && rec.QCFailFlag()) { // QC failed flag |
|
|
75 |
good = false; |
|
|
76 |
} |
|
|
77 |
if (opts_.filter_improper_pair() && !rec.ProperPair()) { // Proper pair flag. Set by aligner |
|
|
78 |
good = false; |
|
|
79 |
} |
|
|
80 |
if (opts_.filter_mate_unmapped() && !rec.MateMappedFlag()) { // Mate is ummapped |
|
|
81 |
good = false; |
|
|
82 |
} |
|
|
83 |
if (opts_.filter_improper_orientation() && !rec.ProperOrientation()) { // pair read has proper orientation (FR) and on same chrom. |
|
|
84 |
good = false; |
|
|
85 |
} |
|
|
86 |
|
|
|
87 |
if (good) { |
|
|
88 |
if(rec.GetCigar().size() == 0) { |
|
|
89 |
std::cerr << "warning: " << rec.Qname() << " has no cigar\n"; |
|
|
90 |
} else if (rec.Position() == rec.PositionEnd()) { |
|
|
91 |
std::cerr << "warning: " << rec.Qname() << " has no acutall alignment\n"; |
|
|
92 |
} |
|
|
93 |
else { |
|
|
94 |
records.push_back(rec); |
|
|
95 |
} |
|
|
96 |
} |
|
|
97 |
} |
|
|
98 |
ginv_records_[curr_ginv] = records; |
|
|
99 |
|
|
|
100 |
region_idx_ ++; |
|
|
101 |
|
|
|
102 |
return true; |
|
|
103 |
} |
|
|
104 |
|
|
|
105 |
private: |
|
|
106 |
template<typename GInvString> |
|
|
107 |
void LoadRegions_(const std::vector<GInvString>& regions) { |
|
|
108 |
ginvs_.resize(regions.size()); |
|
|
109 |
for (size_t i = 0; i < regions.size(); ++i) { |
|
|
110 |
ginvs_[i] = GInv(bam_reader_.Header().Name2ID(regions[i].contig()), regions[i].left(), regions[i].right()); |
|
|
111 |
} |
|
|
112 |
} |
|
|
113 |
|
|
|
114 |
}; |
|
|
115 |
|
|
|
116 |
}//namespace neusomatic |
|
|
117 |
|
|
|
118 |
#endif /* LIB_INCLUDE_FRAGSTORESEQAN_HPP_ */ |