Switch to unified view

a b/singlecellmultiomics/statistic/plate2.py
1
import matplotlib.patheffects as path_effects
2
import math
3
import string
4
import numpy as np
5
import pandas as pd
6
import matplotlib.pyplot as plt
7
from .statistic import StatisticHistogram
8
import singlecellmultiomics.pyutils as pyutils
9
from collections import defaultdict, Counter
10
from singlecellmultiomics.utils.plotting import plot_plate
11
import matplotlib
12
matplotlib.rcParams['figure.dpi'] = 160
13
matplotlib.use('Agg')
14
15
16
def counter_defaultdict():
17
    return defaultdict(Counter)
18
19
class PlateStatistic2(object):
20
21
    def __init__(self, args):
22
        self.args = args
23
24
        self.stats = defaultdict(counter_defaultdict)
25
26
    def to_csv(self, path):
27
28
        export = {}
29
        for library, libd in self.stats.items():
30
            for (stat,metric), data in  libd.items():
31
                print(data)
32
                export[library,stat,metric] = data
33
34
35
        pd.DataFrame(
36
            export).to_csv(
37
            path.replace(
38
                '.csv',
39
                f'plate_metrics.csv'))
40
41
    def processRead(self, R1,R2=None, default_lib=None):
42
43
        for read in [R1,R2]:
44
45
            if read is None:
46
                continue
47
48
            cell_index = int(read.get_tag('bi'))
49
50
            if read.has_tag('MX'):
51
                mux= read.get_tag('MX')
52
                format = 384 if ('384' in mux or mux.startswith('CS2')) else 96
53
                if format==384:
54
                    n_rows = 16
55
                    n_cols = 24
56
                else:
57
                    n_rows = 8
58
                    n_cols = 12
59
            else:
60
                n_rows = 16
61
                n_cols = 24
62
63
            if default_lib is None:
64
                if read.has_tag('LY'):
65
                    library = read.get_tag('LY')
66
67
68
                else:
69
                    library = '_'
70
            else:
71
                library = default_lib
72
73
            cell_index = int(cell_index)-1
74
            row = int(cell_index/n_cols)
75
            col = cell_index - row*n_cols
76
77
            cell= read.get_tag('SM')
78
            self.stats[library][('total mapped','# reads')][(row,col, cell)] += 1
79
80
            if read.is_qcfail:
81
                self.stats[library]['qcfail', '# reads'][(row,col,cell)] += 1
82
                continue
83
84
            if read.is_duplicate:
85
                continue
86
87
            self.stats[library][('total mapped','# molecules')][(row,col,cell)] += 1
88
89
            if read.has_tag('lh') and  read.get_tag('lh') in ('TA','TT','AA'):
90
                self.stats[library][read.get_tag('lh'), 'ligated molecules'][(row,col,cell)] += 1
91
92
            break
93
94
95
    def __repr__(self):
96
        return 'Plate statistic'
97
98
99
    def __iter__(self):
100
        for library, libd in self.stats.items():
101
            for name, data in libd.items():
102
                for k,v in data.items():
103
                    yield (library,name,k),v
104
105
106
    def plot(self, target_path, title=None):
107
        for library, libd in self.stats.items():
108
            for (stat,metric), data in  libd.items():
109
                for log in [True,False]:
110
111
                    fig, ax, cax = plot_plate(data, log=log, cmap_name='viridis',vmin=1 if  log else 0)
112
                    cax.set_ylabel(metric)
113
                    ax.set_title(f'{stat}, {metric}')
114
                    plt.savefig(
115
                        target_path.replace(
116
                            '.png',
117
                            '') +
118
                        f'_{library}_{stat}_{metric.replace("# ","n_")}.{"log" if log else "linear"}.png', bbox_inches='tight', dpi=250)
119
                    plt.close()
120
            # Create oversequencing plot: (reads / molecule)
121
            overseq = {}
122
            for coordinate, n_molecules in libd[('total mapped','# molecules')].items():
123
                n_reads = libd[('total mapped','# reads')].get(coordinate,0)
124
                overseq[coordinate] = (n_reads/n_molecules) if n_reads>0 else 0
125
126
            fig, ax, cax = plot_plate(overseq, log=False, cmap_name='viridis', vmin=np.percentile(list(overseq.values()),5))
127
            cax.set_ylabel('reads / molecule')
128
            ax.set_title(f'Oversequencing')
129
            plt.savefig(
130
                target_path.replace(
131
                    '.png',
132
                    '') +
133
                f'_{library}_oversequencing.png', bbox_inches='tight', dpi=250)
134
            plt.close()
135
136
            # Create TA ratio plot: (ta molecules / total molecules)
137
            ta_ratio = {}
138
            for coordinate, n_ta_molecules in libd[('TA','ligated molecules')].items():
139
                n_molecules = libd[('total mapped','# molecules')].get(coordinate,0)
140
                ta_ratio[coordinate] = (n_ta_molecules/n_molecules) if n_molecules>0 else 0
141
142
            fig, ax, cax = plot_plate(ta_ratio, log=False, cmap_name='viridis', vmin=np.percentile(list(ta_ratio.values()),5))
143
            cax.set_ylabel('TA molecules / molecule')
144
            ax.set_title(f'TA fraction')
145
            plt.savefig(
146
                target_path.replace(
147
                    '.png',
148
                    '') +
149
                f'_{library}_ta_fraction.png', bbox_inches='tight', dpi=250)
150
            plt.close()