Switch to unified view

a b/singlecellmultiomics/statistic/mappingquality.py
1
from .statistic import StatisticHistogram
2
import singlecellmultiomics.pyutils as pyutils
3
import collections
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
import seaborn as sns
7
8
9
class MappingQualityHistogram(StatisticHistogram):
10
    def __init__(self, args):
11
        StatisticHistogram.__init__(self, args)
12
        self.histogram = collections.Counter()
13
14
    def processRead(self, R1,R2):
15
16
        for read in [R1,R2]:
17
            if read is None:
18
                continue
19
20
            self.histogram[read.mapping_quality] += 1
21
22
    def __repr__(self):
23
        return f'The average mapping quality is {pyutils.meanOfCounter(self.histogram)}, SD:{pyutils.varianceOfCounter(self.histogram)}'
24
25
    def get_df(self):
26
        return pd.DataFrame.from_dict({'mq': self.histogram})
27
28
    def to_csv(self, path):
29
        self.get_df().to_csv(path)
30
31
    def plot(self, target_path, title=None):
32
        df = self.get_df()  # ,'UnmappedReads']]
33
34
        df['mq'].plot.bar(figsize=(10, 4))
35
        ax = plt.gca()
36
        ax.set_xlabel('Mapping quality')
37
        ax.set_ylabel('Frequency (reads)')
38
39
        if title is not None:
40
            plt.title(title)
41
        plt.tight_layout()
42
        plt.savefig(target_path)
43
44
        ax.set_yscale('log')
45
        sns.despine()
46
        plt.tight_layout()
47
        plt.savefig(target_path.replace('.png', '.log.png'))
48
49
        plt.close()