[45ad7e]: / singlecellmultiomics / statistic / mappingquality.py

Download this file

50 lines (36 with data), 1.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from .statistic import StatisticHistogram
import singlecellmultiomics.pyutils as pyutils
import collections
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
class MappingQualityHistogram(StatisticHistogram):
def __init__(self, args):
StatisticHistogram.__init__(self, args)
self.histogram = collections.Counter()
def processRead(self, R1,R2):
for read in [R1,R2]:
if read is None:
continue
self.histogram[read.mapping_quality] += 1
def __repr__(self):
return f'The average mapping quality is {pyutils.meanOfCounter(self.histogram)}, SD:{pyutils.varianceOfCounter(self.histogram)}'
def get_df(self):
return pd.DataFrame.from_dict({'mq': self.histogram})
def to_csv(self, path):
self.get_df().to_csv(path)
def plot(self, target_path, title=None):
df = self.get_df() # ,'UnmappedReads']]
df['mq'].plot.bar(figsize=(10, 4))
ax = plt.gca()
ax.set_xlabel('Mapping quality')
ax.set_ylabel('Frequency (reads)')
if title is not None:
plt.title(title)
plt.tight_layout()
plt.savefig(target_path)
ax.set_yscale('log')
sns.despine()
plt.tight_layout()
plt.savefig(target_path.replace('.png', '.log.png'))
plt.close()