Switch to unified view

a b/neusomatic/include/SeqanUtils.h
1
#ifndef SEQAN_UTILS_HPP
2
#define SEQAN_UTILS_HPP
3
4
#include <vector>
5
#include<seqan/seq_io.h>
6
#include<seqan/misc/interval_tree.h>
7
#include<seqan/find.h>
8
9
namespace neusomatic{ namespace bio{
10
11
template<typename Base>
12
std::string Base2String(const Base& base, const std::string& gap_symbol){
13
14
    //std::cout<<static_cast<unsigned char>(base)<<" :static cast: ";
15
    //std::cout<<(unsigned)seqan::ordValue(base)<<std::endl;
16
    
17
    switch((unsigned)seqan::ordValue(base))
18
    {
19
        case 0: return "A";
20
        case 1: return "C";
21
        case 2: return "G";
22
        case 3: return "T";
23
        default: return gap_symbol;
24
    }
25
    //should never reach here
26
    return "";
27
28
}
29
30
template<>
31
std::string Base2String<char>(const char& base, const std::string& gap_symbol) {
32
  switch(base) {
33
    case 'a':
34
    case 'A': 
35
    case 0:
36
      return "A";
37
38
    case 'c':
39
    case 'C':
40
    case 1:
41
      return "C";
42
43
    case 'g':
44
    case 'G':
45
    case 2:
46
      return "G";
47
    case 't':
48
    case 'T':
49
    case 3:
50
      return "T";
51
    default:
52
      return gap_symbol;
53
  }
54
}
55
56
template<typename Itr>
57
std::string to_string(Itr b, Itr e, const std::string& gap_symbol = "N"){
58
  std::string result;
59
  auto it = b;
60
  for(; it < e; ++it){
61
    result += Base2String(*it, gap_symbol);
62
  }
63
  return result;
64
}
65
66
inline std::string to_string(const seqan::Dna5String& seq, const std::string& gap_symbol = "N"){
67
  std::string result;
68
  for (unsigned int i = 0; i < seqan::length(seq); ++i) {
69
     result += Base2String(seq[i], gap_symbol); 
70
  }
71
  return result;
72
}
73
74
template<typename TInterval>
75
inline decltype(auto) ToSeqanIntervals(const std::vector<TInterval>& invs){ 
76
77
  typedef seqan::IntervalAndCargo<int, TInterval> TIntervalAndCargo;
78
  std::vector<TIntervalAndCargo> seqan_intervals; 
79
  for (auto it = invs.cbegin(); it != invs.cend(); ++it) {
80
    TIntervalAndCargo i(it->left(), it->right(), *it);
81
    seqan_intervals.push_back(i);
82
  }
83
  return seqan_intervals;
84
}
85
86
inline std::vector<seqan::CharString> SeqanSplit(seqan::CharString target, std::string splitter) {
87
  /*
88
    Return a vec of size 1 containing the intact target if splitter is not found in the target. 
89
  */
90
  if (seqan::length(target) == 0 || seqan::length(splitter) == 0) {
91
    throw std::runtime_error("Invalid string and pattern to split.");
92
  }
93
  using TPos = typename seqan::Position<seqan::CharString>::Type;
94
95
  std::vector<seqan::CharString> result;
96
  seqan::Finder<seqan::CharString> finder(target);
97
  seqan::Pattern<seqan::CharString, seqan::Horspool> pattern(splitter);
98
  std::vector<std::pair<TPos, TPos>> searches;
99
  while (seqan::find(finder, pattern)) {
100
    searches.emplace_back(seqan::beginPosition(finder), seqan::endPosition(finder));
101
  }
102
103
  TPos lastp = 0;
104
  for (auto const& p: searches) {
105
    seqan::CharString unit = seqan::infix(target, lastp, p.first);
106
    if (!seqan::empty(unit)) result.push_back(unit);
107
    lastp = p.second;
108
  }
109
  if (lastp < seqan::length(target) - 1) {
110
    seqan::CharString unit = seqan::infix(target, lastp, seqan::length(target));
111
    result.push_back(unit);
112
  }
113
  return result;
114
}
115
116
}// namespace neusomatic
117
}// namespace bio
118
119
#endif /* SEQAN_UTILS_HPP */