Switch to side-by-side view

--- a
+++ b/singlecellmultiomics/statistic/methylation.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import matplotlib.pyplot as plt
+from .statistic import StatisticHistogram
+import singlecellmultiomics.pyutils as pyutils
+import collections
+import pandas as pd
+import matplotlib
+matplotlib.rcParams['figure.dpi'] = 160
+matplotlib.use('Agg')
+
+
+class MethylationContextHistogram(StatisticHistogram):
+    def __init__(self, args):
+        StatisticHistogram.__init__(self, args)
+        self.context_obs = collections.Counter()  # (bismark_call_tag)=> observations
+        self.hasdata = False
+    def processRead(self, R1,R2):
+
+        for read in [R2,R2]:
+            if read is None:
+                continue
+
+            if not read.is_paired or not read.has_tag(
+                    'XM') or not read.is_read1 or read.is_duplicate or read.has_tag('RR'):
+                return
+            self.hasdata = True
+            tags = dict(read.tags)
+            for tag in 'zhx':
+                self.context_obs[tag] += tags.get(f's{tag}', 0)
+                self.context_obs[tag.upper()] += tags.get(f's{tag.upper()}', 0)
+
+    def __repr__(self):
+        return f'Methylation status.'
+
+    def get_df(self):
+
+        x = self.context_obs
+
+        return pd.DataFrame(
+            {'ratio':
+             {
+                 'CpG': (x['Z'] / (x['z'] + x['Z'])) if x['z'] + x['Z'] > 0 else 0 ,
+                 'CHG': (x['X'] / (x['x'] + x['X'])) if x['x'] + x['X'] >0 else 0,
+                 'CHH': (x['H'] / (x['h'] + x['H'])) if x['h'] + x['H'] > 0 else 0},
+             'methylated': {
+                 'CpG': x['Z'],
+                 'CHG': x['X'],
+                 'CHH': x['H']
+             },
+             'unmethylated': {
+                 'CpG': x['z'],
+                 'CHG': x['x'],
+                 'CHH': x['h']
+             }
+             })
+
+    def plot(self, target_path, title=None):
+
+        df = self.get_df()
+        # Methylation ratio plot:
+        name = 'methylation_pct'
+
+        (df['ratio'] * 100).plot.bar()
+        ax = plt.gca()
+        for p in ax.patches:
+            ax.annotate(
+                f'{p.get_height():.1f}%',
+                (p.get_x() + p.get_width() / 2, p.get_height() * 1.005),
+                va='bottom', ha='center')
+        ax.set_ylim(0, 110)
+        ax.set_xlabel('Methylation context')
+        ax.set_ylabel('% methylated')
+        ax.spines['right'].set_visible(False)
+        ax.spines['top'].set_visible(False)
+        plt.show()
+        if title is not None:
+            plt.title(title)
+        plt.tight_layout()
+        plt.savefig(target_path.replace('.png', f'.{name}.png'))
+
+        ax.set_yscale('log')
+        ax.set_ylim(0.01, 110)
+
+        plt.tight_layout()
+        plt.savefig(target_path.replace('.png', f'.{name}.log.png'))
+        plt.close()
+        ########
+        name = 'methylation_absolute'
+
+        (df[['methylated', 'unmethylated']]).plot.bar()
+        ax = plt.gca()
+        for p in ax.patches:
+            ax.annotate(
+                f'{p.get_height()}',
+                (p.get_x() + p.get_width() / 2, p.get_height() * 1.005),
+                va='bottom', ha='center')
+
+        ax.set_xlabel('Methylation context')
+        ax.set_ylabel('Bases total')
+        plt.tight_layout()
+        ax.spines['right'].set_visible(False)
+        ax.spines['top'].set_visible(False)
+        if title is not None:
+            plt.title(title)
+        plt.tight_layout()
+        plt.savefig(target_path.replace('.png', f'.{name}.png'))
+
+        ax.set_yscale('log')
+        plt.tight_layout()
+        plt.savefig(target_path.replace('.png', f'.{name}.log.png'))
+        plt.close()
+
+    def to_csv(self, path):
+
+        self.get_df().to_csv(path)