Switch to unified view

a b/singlecellmultiomics/statistic/methylation.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
import matplotlib.pyplot as plt
4
from .statistic import StatisticHistogram
5
import singlecellmultiomics.pyutils as pyutils
6
import collections
7
import pandas as pd
8
import matplotlib
9
matplotlib.rcParams['figure.dpi'] = 160
10
matplotlib.use('Agg')
11
12
13
class MethylationContextHistogram(StatisticHistogram):
14
    def __init__(self, args):
15
        StatisticHistogram.__init__(self, args)
16
        self.context_obs = collections.Counter()  # (bismark_call_tag)=> observations
17
        self.hasdata = False
18
    def processRead(self, R1,R2):
19
20
        for read in [R2,R2]:
21
            if read is None:
22
                continue
23
24
            if not read.is_paired or not read.has_tag(
25
                    'XM') or not read.is_read1 or read.is_duplicate or read.has_tag('RR'):
26
                return
27
            self.hasdata = True
28
            tags = dict(read.tags)
29
            for tag in 'zhx':
30
                self.context_obs[tag] += tags.get(f's{tag}', 0)
31
                self.context_obs[tag.upper()] += tags.get(f's{tag.upper()}', 0)
32
33
    def __repr__(self):
34
        return f'Methylation status.'
35
36
    def get_df(self):
37
38
        x = self.context_obs
39
40
        return pd.DataFrame(
41
            {'ratio':
42
             {
43
                 'CpG': (x['Z'] / (x['z'] + x['Z'])) if x['z'] + x['Z'] > 0 else 0 ,
44
                 'CHG': (x['X'] / (x['x'] + x['X'])) if x['x'] + x['X'] >0 else 0,
45
                 'CHH': (x['H'] / (x['h'] + x['H'])) if x['h'] + x['H'] > 0 else 0},
46
             'methylated': {
47
                 'CpG': x['Z'],
48
                 'CHG': x['X'],
49
                 'CHH': x['H']
50
             },
51
             'unmethylated': {
52
                 'CpG': x['z'],
53
                 'CHG': x['x'],
54
                 'CHH': x['h']
55
             }
56
             })
57
58
    def plot(self, target_path, title=None):
59
60
        df = self.get_df()
61
        # Methylation ratio plot:
62
        name = 'methylation_pct'
63
64
        (df['ratio'] * 100).plot.bar()
65
        ax = plt.gca()
66
        for p in ax.patches:
67
            ax.annotate(
68
                f'{p.get_height():.1f}%',
69
                (p.get_x() + p.get_width() / 2, p.get_height() * 1.005),
70
                va='bottom', ha='center')
71
        ax.set_ylim(0, 110)
72
        ax.set_xlabel('Methylation context')
73
        ax.set_ylabel('% methylated')
74
        ax.spines['right'].set_visible(False)
75
        ax.spines['top'].set_visible(False)
76
        plt.show()
77
        if title is not None:
78
            plt.title(title)
79
        plt.tight_layout()
80
        plt.savefig(target_path.replace('.png', f'.{name}.png'))
81
82
        ax.set_yscale('log')
83
        ax.set_ylim(0.01, 110)
84
85
        plt.tight_layout()
86
        plt.savefig(target_path.replace('.png', f'.{name}.log.png'))
87
        plt.close()
88
        ########
89
        name = 'methylation_absolute'
90
91
        (df[['methylated', 'unmethylated']]).plot.bar()
92
        ax = plt.gca()
93
        for p in ax.patches:
94
            ax.annotate(
95
                f'{p.get_height()}',
96
                (p.get_x() + p.get_width() / 2, p.get_height() * 1.005),
97
                va='bottom', ha='center')
98
99
        ax.set_xlabel('Methylation context')
100
        ax.set_ylabel('Bases total')
101
        plt.tight_layout()
102
        ax.spines['right'].set_visible(False)
103
        ax.spines['top'].set_visible(False)
104
        if title is not None:
105
            plt.title(title)
106
        plt.tight_layout()
107
        plt.savefig(target_path.replace('.png', f'.{name}.png'))
108
109
        ax.set_yscale('log')
110
        plt.tight_layout()
111
        plt.savefig(target_path.replace('.png', f'.{name}.log.png'))
112
        plt.close()
113
114
    def to_csv(self, path):
115
116
        self.get_df().to_csv(path)