[45ad7e]: / singlecellmultiomics / bamProcessing / alignment_view.py

Download this file

171 lines (139 with data), 6.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pysamiterators.iterators as pysamiterators
import itertools
import html
def cstr(s, color='black', weight=300):
return f'<text style="color:{color}; font-weight:{weight}" >{s}</text>'
"""
Convert molecule to HTML representation
"""
def visualise_molecule(molecule,
reference=None, # pysam handle to reference
show_reference=False, # Show reference sequence
margin_bases=0,
start_span=None,
end_span=None,
show_quals=True,
R1PrimerLength=4,
R2PrimerLength=6,
highlight=set()
):
html_str = f'<div style="font-family:monospace;line-height: 10px; font-size:12px; white-space: nowrap;">'
reference_sequence = None
molecule_flattened = list(itertools.chain.from_iterable(molecule))
chrom, _start_span, _end_span = pysamiterators.getListSpanningCoordinates(
molecule_flattened)
# Calculate start and end of visualisation if not supplied by user:
if end_span is None:
end_span = _end_span
end_span += margin_bases
if start_span is None:
start_span = _start_span
start_span -= margin_bases
start_span = max(0, start_span)
# todo : end span
end = end_span
start = start_span
# Obtain reference sequence
if reference is not None and show_reference:
reference_sequence = reference.fetch(chrom, start_span, end_span)
html_str += f'ref : {reference_sequence}<br />'
for R1, R2 in molecule:
try:
s, e = pysamiterators.getPairGenomicLocations(
R1, R2,
R1PrimerLength=4, R2PrimerLength=6)
keep = range(s, e + 1)
except Exception as e:
keep = None
for read in [R1, R2]:
visBases = ['.'] * (end - start)
visQuals = ['.'] * (end - start)
emittedRef = ['.'] * (end - start)
it = pysamiterators.ReadCycleIterator(
read, with_seq=True, matches_only=True, reference=reference)
for i, (cycle, qpos, refpos, refbase) in enumerate(it):
if i == 0:
firstCycle = cycle
visPos = refpos - start
if not (visPos >= 0 and visPos < (end - start)):
continue
refbase = refbase.upper()
emittedRef[visPos] = refbase
#print(qpos,refpos,refbase ,visPos,start, end)
qbase = read.query_sequence[qpos]
qqual = read.qual[qpos]
if read.is_read2 and cycle < R2PrimerLength:
weight = 100
visBases[visPos] = cstr(qbase, color='blue', weight=weight)
visQuals[visPos] = cstr(
html.escape(
str(qqual)),
color='black',
weight=weight)
elif keep is not None and refpos not in keep:
weight = 200
c = '#CCC'
visBases[visPos] = cstr(qbase, color=c, weight=weight)
visQuals[visPos] = cstr(
html.escape(
str(qqual)),
color=c,
weight=weight)
elif refpos in highlight:
if qbase != refbase:
weight = 700
visBases[visPos] = cstr(
qbase, color='red', weight=weight)
weight = 400
visQuals[visPos] = cstr(
html.escape(
str(qqual)),
color='black',
weight=weight)
else:
weight = 700
visBases[visPos] = cstr(
qbase, color='green', weight=weight)
weight = 400
visQuals[visPos] = cstr(
html.escape(
str(qqual)),
color='black',
weight=weight)
else:
if qbase != refbase:
weight = 700
# +'/'+cstr(refbase,color='red',weight=weight)
visBases[visPos] = cstr(
f'{qbase}', color='orange', weight=weight)
weight = 400
visQuals[visPos] = cstr(
html.escape(
str(qqual)),
color='black',
weight=weight)
else:
weight = 100
visBases[visPos] = cstr(
qbase, color='grey', weight=weight)
visQuals[visPos] = cstr(
html.escape(
str(qqual)),
color='#CCC',
weight=weight)
html_str += f'{"RD1 " if read.is_read1 else "RD2 "}: ' + \
''.join(visBases) + f' cycle:{firstCycle}:{cycle} {it.start} {it.len}'
html_str += ' ' + cstr(
f'SEQ:{read.get_tag("Is")}:{read.get_tag("Fc")}:{read.get_tag("La")}:{read.get_tag("Ti")}',
'brown') + cstr(
f' MD:{read.get_tag("MD")}',
'black')
html_str += '<br />'
if show_quals:
html_str += 'rdQ : ' + ''.join(visQuals) + '<br />'
# if showEmittedRef:
# html_str+='eref: ' +''.join(emittedRef)+'<br />'
#html_str+=f'<br />pred: {"".join(visPredict)}<br />conf: {"".join(visConf)}<br /><br /><br />'
return html_str