Download this file

178 lines (131 with data), 5.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
from singlecellmultiomics.molecule.molecule import Molecule
from singlecellmultiomics.molecule.featureannotatedmolecule import FeatureAnnotatedMolecule
import collections
class CHICMolecule(Molecule):
"""CHIC Molecule class
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
**kwargs: extra args
"""
def __init__(self, fragment,
**kwargs):
self.site_location = None
Molecule.__init__(self, fragment, **kwargs)
self.ligation_motif = None
# Extracted from fragments:
self.assignment_radius=None
def _add_fragment(self,fragment):
# Update the cut coordinate tho the (left most extreme value)
self.assignment_radius =fragment.assignment_radius
if fragment.site_location is not None:
if self.site_location is None:
self.site_location = [fragment.site_location[0],fragment.site_location[1]]
elif fragment.strand: # Reverse:
self.site_location[1] = max(fragment.site_location[1], self.site_location[1]) # this is the coordinate
else:
self.site_location[1] = min(fragment.site_location[1], self.site_location[1]) # this is the coordinate
# else : writing a fragment which has no cut location associated
Molecule._add_fragment(self, fragment)
def get_cut_site(self):
"""For restriction based protocol data, obtain genomic location of cut site
Returns:
None if site is not available
chromosome (str)
position (int)
strand (bool)
"""
return (*self.site_location, self.strand)
def write_tags(self):
# Write DS tag when it could have been updated
if self.assignment_radius is not None and self.assignment_radius>0 and len(self)>1:
for frag in self:
frag.set_meta('DS', self.site_location[1])
Molecule.write_tags(self)
def is_valid(self, set_rejection_reasons=False):
try:
chrom, start, strand = self.get_cut_site()
except Exception as e:
if set_rejection_reasons:
self.set_rejection_reason(f'no_cut_site_found')
return False
return True
def get_fragment_span_sequence(self, reference=None):
"""Obtain the sequence between the start and end of the molecule
Args:
reference(pysam.FastaFile) : reference to use.
If not specified `self.reference` is used
Returns:
sequence (str)
"""
if reference is None:
if self.reference is None:
raise ValueError('Please supply a reference (PySAM.FastaFile)')
reference = self.reference
return reference.fetch(
self.chromosome,
self.spanStart,
self.spanEnd).upper()
def __finalise__(self):
# Obtain ligation motif(s)
self.update_ligation_motif()
Molecule.__finalise__(self)
def update_ligation_motif(self):
"""
Extract lh tag from associated reads and set most common one to the RZ tag
"""
ligation_motif_counts = collections.Counter()
for fragment in self:
if fragment.ligation_motif is not None:
ligation_motif_counts[fragment.ligation_motif]+=1
if len(ligation_motif_counts)>0:
self.ligation_motif = ligation_motif_counts.most_common(1)[0][0]
self.set_meta('RZ', self.ligation_motif)
class CHICNLAMolecule(Molecule):
"""CHIC NLA Molecule class
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
**kwargs: extra args
"""
def get_upstream_site(self, scan_extra_bp=3):
read = None
for fragment in self:
if fragment[0] is not None:
read = fragment[0]
break
if read is None:
raise ValueError() # Read 1 is not mapped
if self.strand==0:
start = read.reference_start
return self.reference.fetch( self.chromosome, start-4-scan_extra_bp, start)
else:
start = read.reference_end
return self.reference.fetch( self.chromosome, start, start+4+scan_extra_bp)
def __init__(self, fragment, reference, **kwargs):
self.reference = reference
CHICMolecule.__init__(self, fragment, reference=reference, **kwargs)
def write_tags(self):
try:
site = self.get_upstream_site()
self.set_meta('RZ',site)
if 'N' in site:
self.set_meta('dt','UNK')
elif 'CATG' in site:
self.set_meta('dt','NLA')
else:
self.set_meta('dt','CHIC')
except ValueError:
self.set_meta('dt','UNK')
self.set_meta('RR','No_R1')
class AnnotatedCHICMolecule(CHICMolecule, FeatureAnnotatedMolecule):
"""Chic Molecule which is annotated with features (genes/exons/introns, .. )
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
features (singlecellmultiomics.features.FeatureContainer) : container to use to obtain features from
**kwargs: extra args
"""
def write_tags(self):
CHICMolecule.write_tags(self)
FeatureAnnotatedMolecule.write_tags(self)
def __init__(self, fragment, features, **kwargs):
CHICMolecule.__init__(self, fragment, **kwargs)
FeatureAnnotatedMolecule.__init__(self, fragment, features, **kwargs)