Switch to unified view

a b/neusomatic/include/targeted_loading.hpp
1
#ifndef TARGET_LOADING_HPP
2
#define TARGET_LOADING_HPP
3
4
#include <vector>
5
#include <map>
6
#include <unordered_map>
7
8
#include <SeqLib/BamReader.h>
9
#include "Options.h"
10
11
namespace neusomatic{
12
13
template<typename GInv, typename BamRecord, typename BamReader>
14
class CaptureLayout {
15
  size_t region_idx_;
16
  const neusomatic::Options & opts_; 
17
  BamReader bam_reader_;
18
  std::vector<GInv> ginvs_;
19
  //std::map <std::string, std::vector<GInv>> contig_ginv_;
20
  std::map <GInv, std::vector<BamRecord>> ginv_records_;
21
22
public:
23
  template<typename GInvString>
24
  CaptureLayout(const std::string& bam_path, const std::vector<GInvString>& regions, const neusomatic::Options& o): 
25
      region_idx_(0), opts_(o) {
26
      bam_reader_.Open(bam_path);
27
      LoadRegions_(regions);
28
  }
29
30
  decltype(auto) ginv_records() const {
31
    return (ginv_records_);
32
  }
33
34
  decltype(auto) Reader() const {
35
    return (bam_reader_);
36
  }
37
38
  size_t NumRegion() const {
39
    return ginvs_.size();
40
  }
41
42
  bool NextRegion(const bool full_contained) {
43
    if (region_idx_  == ginvs_.size()) return false;
44
    ginv_records_.clear();
45
46
    const GInv& curr_ginv = ginvs_[region_idx_];
47
    SeqLib::GenomicRegion gr(curr_ginv.contig(), curr_ginv.left(), curr_ginv.right());
48
    bam_reader_.SetRegion(gr); 
49
50
    std::vector<BamRecord> records;
51
    BamRecord rec;
52
    while (bam_reader_.GetNextRecord(rec)) {
53
      bool good = true;
54
      if (full_contained) {
55
        if (rec.Position() > curr_ginv.left() || rec.PositionEnd() + 1 < curr_ginv.right()) {
56
          good = false;
57
        }
58
      }
59
      else {
60
        if (rec.Position() >= curr_ginv.right() || curr_ginv.left() > rec.PositionEnd()) { // overlapped
61
          good = false;
62
        }
63
      }
64
65
      if (rec.MapQuality() < opts_.min_mapq()) { // mapq filter
66
        good = false; 
67
      }
68
      if (!opts_.include_secondary() && rec.SecondaryFlag()) { // secondary flag
69
        good = false; 
70
      }
71
      if (opts_.filter_duplicate() && rec.DuplicateFlag()) { // duplicate flag
72
        good = false;
73
      }
74
      if (opts_.filter_QCfailed() && rec.QCFailFlag()) { // QC failed flag
75
        good = false;
76
      }
77
      if (opts_.filter_improper_pair() && !rec.ProperPair()) { // Proper pair flag. Set by aligner
78
        good = false;
79
      }
80
      if (opts_.filter_mate_unmapped() && !rec.MateMappedFlag()) { // Mate is ummapped
81
        good = false;
82
      }
83
      if (opts_.filter_improper_orientation() && !rec.ProperOrientation()) { // pair read has proper orientation (FR) and on same chrom.
84
        good = false;
85
      }
86
87
      if (good) {
88
        if(rec.GetCigar().size() == 0) {
89
          std::cerr << "warning: " << rec.Qname() << " has no cigar\n";
90
        } else if (rec.Position() == rec.PositionEnd()) {
91
          std::cerr << "warning: " << rec.Qname() << " has no acutall alignment\n";
92
        }
93
        else {
94
          records.push_back(rec);
95
        }
96
      } 
97
    }
98
    ginv_records_[curr_ginv] = records;
99
100
    region_idx_ ++;
101
102
    return true;
103
  }
104
105
private:
106
  template<typename GInvString>
107
  void LoadRegions_(const std::vector<GInvString>& regions) {
108
    ginvs_.resize(regions.size());    
109
    for (size_t i = 0; i < regions.size(); ++i) {
110
      ginvs_[i] = GInv(bam_reader_.Header().Name2ID(regions[i].contig()), regions[i].left(), regions[i].right());
111
    }
112
  }
113
114
};
115
116
}//namespace neusomatic
117
118
#endif /* LIB_INCLUDE_FRAGSTORESEQAN_HPP_ */