Download this file

153 lines (116 with data), 5.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from singlecellmultiomics.molecule.molecule import Molecule
from singlecellmultiomics.molecule.featureannotatedmolecule import FeatureAnnotatedMolecule
class NlaIIIMolecule(Molecule):
"""Nla III based Molecule class
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
**kwargs: extra args
"""
def __init__(self, fragment,
site_has_to_be_mapped=False,
# molecule is invalid when NLA site does not map (requires
# reference)
**kwargs):
self.site_location = None
self.site_has_to_be_mapped = site_has_to_be_mapped
Molecule.__init__(self, fragment, **kwargs)
def _add_fragment(self,fragment):
# Update the cut coordinate tho the (left most extreme value)
self.assignment_radius = fragment.assignment_radius
if fragment.site_location is not None:
if self.site_location is None:
self.site_location = [fragment.site_location[0],fragment.site_location[1]]
elif fragment.strand: # Reverse:
self.site_location[1] = max(fragment.site_location[1], self.site_location[1]) # this is the coordinate
else:
self.site_location[1] = min(fragment.site_location[1], self.site_location[1]) # this is the coordinate
# else : writing a fragment which has no cut location associated
Molecule._add_fragment(self, fragment)
def write_tags(self):
Molecule.write_tags(self)
if self.reference is not None: # Only calculate this statistic when a reference is available
if self.get_cut_site() is not None:
self.set_meta('Us', self.get_undigested_site_count())
def is_valid(self, set_rejection_reasons=False, reference=None):
try:
chrom, start, strand = self.get_cut_site()
except Exception as e:
if set_rejection_reasons:
self.set_rejection_reason('no_cut_site_found')
return False
if self.site_has_to_be_mapped:
if reference is None:
if self.reference is None:
raise ValueError(
'Please supply a reference (PySAM.FastaFile)')
reference = self.reference
if reference.fetch(chrom, start, start + 4).upper() != 'CATG':
if set_rejection_reasons:
self.set_rejection_reason('no_CATG_in_ref')
return False
return True
def get_fragment_span_sequence(self, reference=None):
"""Obtain the sequence between the start and end of the molecule
Args:
reference(pysam.FastaFile) : reference to use.
If not specified `self.reference` is used
Returns:
sequence (str)
"""
if self.chromosome is None:
return ''
if reference is None:
if self.reference is None:
raise ValueError('Please supply a reference (PySAM.FastaFile)')
reference = self.reference
return reference.fetch(
self.chromosome,
self.spanStart,
self.spanEnd).upper()
def get_undigested_site_count(self, reference=None):
"""
Obtain the amount of undigested sites in the span of the molecule
Args:
reference(pysam.FastaFile) : reference handle
Returns:
undigested_site_count : int
amount of undigested cut sites in the mapping span of the molecule
Raises:
ValueError : when the span of the molecule is not properly defined
"""
if reference is None:
if self.reference is None:
raise ValueError('Please supply a reference (PySAM.FastaFile)')
reference = self.reference
seq = self.get_fragment_span_sequence(reference)
total = seq.count('CATG')
if self.strand == 0 and seq.endswith('CATG'):
total -= 1
elif self.strand == 1 and seq.startswith('CATG'):
total -= 1
return total
def write_tags_to_psuedoreads(self, reads, call_super=True):
if call_super:
Molecule.write_tags_to_psuedoreads(self, reads)
if self.reference is not None: # Only calculate this statistic when a reference is available
if self.get_cut_site() is not None:
us = self.get_undigested_site_count()
for read in reads:
read.set_tag('Us', us)
class AnnotatedNLAIIIMolecule(FeatureAnnotatedMolecule, NlaIIIMolecule):
"""Nla III based Molecule which is annotated with features (genes/exons/introns, .. )
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
features (singlecellmultiomics.features.FeatureContainer) : container to use to obtain features from
**kwargs: extra args
"""
def write_tags(self):
NlaIIIMolecule.write_tags(self)
FeatureAnnotatedMolecule.write_tags(self)
def __init__(self, fragment, features, **kwargs):
self.site_location = None
FeatureAnnotatedMolecule.__init__(self, fragment, features, **kwargs)
NlaIIIMolecule.__init__(self, fragment, **kwargs)
def write_tags_to_psuedoreads(self,reads):
NlaIIIMolecule.write_tags_to_psuedoreads(self, reads)
FeatureAnnotatedMolecule.write_tags_to_psuedoreads(self,reads,call_super=False)