|
a |
|
b/singlecellmultiomics/statistic/cellreadcount.py |
|
|
1 |
#!/usr/bin/env python3 |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
import matplotlib.pyplot as plt |
|
|
4 |
from .statistic import StatisticHistogram |
|
|
5 |
import singlecellmultiomics.pyutils as pyutils |
|
|
6 |
import collections |
|
|
7 |
import pandas as pd |
|
|
8 |
import matplotlib |
|
|
9 |
import numpy as np |
|
|
10 |
matplotlib.rcParams['figure.dpi'] = 160 |
|
|
11 |
matplotlib.use('Agg') |
|
|
12 |
import seaborn as sns |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
def readIsDuplicate(read): |
|
|
16 |
return (read.has_tag('RC') and read.get_tag('RC') > 1) or read.is_duplicate |
|
|
17 |
|
|
|
18 |
|
|
|
19 |
class CellReadCount(StatisticHistogram): |
|
|
20 |
def __init__(self, args): |
|
|
21 |
StatisticHistogram.__init__(self, args) |
|
|
22 |
self.read_counts = collections.Counter() |
|
|
23 |
self.molecule_counts = collections.Counter() |
|
|
24 |
|
|
|
25 |
def processRead(self, R1,R2=None): |
|
|
26 |
|
|
|
27 |
for read in [R1,R2]: |
|
|
28 |
if read is None: |
|
|
29 |
continue |
|
|
30 |
|
|
|
31 |
if not read.has_tag('SM'): |
|
|
32 |
continue |
|
|
33 |
|
|
|
34 |
cell = read.get_tag('SM') |
|
|
35 |
|
|
|
36 |
self.read_counts[cell] +=1 |
|
|
37 |
|
|
|
38 |
if not read.is_duplicate: |
|
|
39 |
self.molecule_counts[cell] +=1 |
|
|
40 |
break |
|
|
41 |
|
|
|
42 |
def to_csv(self, path): |
|
|
43 |
pd.DataFrame({'reads':self.read_counts, 'umis':self.molecule_counts}).to_csv(path) |
|
|
44 |
|
|
|
45 |
def __repr__(self): |
|
|
46 |
return f'The average amount of reads is {np.mean(list(self.read_counts.values()))}' |
|
|
47 |
|
|
|
48 |
def plot(self, target_path, title=None): |
|
|
49 |
fig, ax = plt.subplots() |
|
|
50 |
print(self.read_counts) |
|
|
51 |
ax.hist(list(self.read_counts.values()), bins=25, zorder=1) |
|
|
52 |
|
|
|
53 |
if title is not None: |
|
|
54 |
ax.set_title(title) |
|
|
55 |
|
|
|
56 |
ax.set_xlabel("# Reads") |
|
|
57 |
ax.set_ylabel("# Cells") |
|
|
58 |
ax.grid(zorder=0) |
|
|
59 |
sns.despine() |
|
|
60 |
plt.tight_layout() |
|
|
61 |
plt.savefig(target_path) |
|
|
62 |
plt.close() |
|
|
63 |
|
|
|
64 |
fig, ax = plt.subplots() |
|
|
65 |
ax.hist(list(self.molecule_counts.values()), bins=25,zorder=1) |
|
|
66 |
ax.grid(zorder=0) |
|
|
67 |
sns.despine() |
|
|
68 |
if title is not None: |
|
|
69 |
plt.title(title) |
|
|
70 |
|
|
|
71 |
ax.set_xlabel("# Molecules") |
|
|
72 |
ax.set_ylabel("# Cells") |
|
|
73 |
plt.tight_layout() |
|
|
74 |
plt.savefig(target_path.replace('.png', '.molecules.png')) |
|
|
75 |
plt.close() |