Switch to unified view

a b/singlecellmultiomics/statistic/oversequencing.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
from matplotlib.ticker import MaxNLocator
4
import matplotlib.pyplot as plt
5
from .statistic import StatisticHistogram
6
import singlecellmultiomics.pyutils as pyutils
7
import collections
8
9
import matplotlib
10
matplotlib.rcParams['figure.dpi'] = 160
11
matplotlib.use('Agg')
12
13
14
class OversequencingHistogram(StatisticHistogram):
15
    def __init__(self, args):
16
        StatisticHistogram.__init__(self, args)
17
        self.histogram = collections.Counter()
18
19
    def processRead(self, R1,R2=None):
20
21
        for read in [R1, R2]:
22
            if read is None:
23
                continue
24
25
            if read.has_tag('RC'):
26
                overseq = read.get_tag('RC')
27
                self.histogram[overseq] += 1
28
                if overseq > 1:
29
                    self.histogram[overseq - 1] -= 1
30
31
                # Compatibility with picard --TAG_DUPLICATE_SET_MEMBERS
32
                """
33
                java -jar `which picard.jar` MarkDuplicates I=sorted.bam O=marked_duplicates.bam M=marked_dup_metrics.txt TAG_DUPLICATE_SET_MEMBERS=1
34
                """
35
            elif read.has_tag('PG'):
36
                if read.has_tag('DS') and not read.is_duplicate:
37
                    self.histogram[read.get_tag('DS')] += 1
38
                else:
39
                    self.histogram[1] += 1
40
41
            break
42
43
    def __repr__(self):
44
        return f'The average oversequencing is {pyutils.meanOfCounter(self.histogram)}, SD:{pyutils.varianceOfCounter(self.histogram)}'
45
46
    def plot(self, target_path, title=None):
47
        fig, ax = plt.subplots()
48
        overseqRange = list(range(1, 20))
49
50
        ax.scatter(overseqRange, [self.histogram[x] for x in overseqRange])
51
        if title is not None:
52
            ax.set_title(title)
53
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
54
        ax.set_xlabel("Amount of fragments associated with molecule")
55
        ax.set_ylabel("# Molecules")
56
        plt.tight_layout()
57
        plt.savefig(target_path)
58
59
        ax = plt.gca()
60
        ax.set_ylim(1, None)
61
        ax.set_yscale('log')
62
        plt.savefig(target_path.replace('.png', '.log.png'))
63
64
        plt.close()