|
a |
|
b/singlecellmultiomics/utils/copyNumberStatePlotter.py |
|
|
1 |
#!/usr/bin/env python3 |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
|
|
|
4 |
from singlecellmultiomics.utils import bdbplot,organoidTools |
|
|
5 |
from copy import deepcopy |
|
|
6 |
from importlib import reload |
|
|
7 |
import numpy as np |
|
|
8 |
|
|
|
9 |
class StatePlotter(): |
|
|
10 |
|
|
|
11 |
def __init__(self, plot=None): |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
self.canvas = bdbplot.BDBPlot() if plot is None else plot |
|
|
15 |
self.heightPerState = 10 |
|
|
16 |
self.stateMargin = 1 |
|
|
17 |
self.chromosomeMargin = 2 |
|
|
18 |
self.pixelsPerBase = 1/10_000_000 |
|
|
19 |
self.headerHeight=30 |
|
|
20 |
self.bigFontSize = 10 |
|
|
21 |
self.smallFontSize = 6 |
|
|
22 |
self.gray = 220 |
|
|
23 |
|
|
|
24 |
self.gainColor = '#B36466' |
|
|
25 |
self.gainTwoColor = '#FF0087' |
|
|
26 |
self.normalColor = '#FFFFFF' |
|
|
27 |
self.lossColor = '#6562B5' |
|
|
28 |
self.totalLossColor = '#004BD8' |
|
|
29 |
self.missingColor = '#AAAAAA' |
|
|
30 |
|
|
|
31 |
def plotStates(self, df, offset=(0,0), **kwargs): |
|
|
32 |
self.offset = offset |
|
|
33 |
stateGroup = self.canvas.getGroup('stateGrid') |
|
|
34 |
self.stateGroup = stateGroup |
|
|
35 |
stateGroup.set('transform' ,f"translate({offset[0]},{offset[1]})" ) |
|
|
36 |
self.canvas.svgTree.append(stateGroup) |
|
|
37 |
self.states = sorted(list(set(df['cluster']))) |
|
|
38 |
self.chromosomeOrder = sorted(sorted(list(set(df[ 'chromosome']))), key=organoidTools.chrom_sort_human) |
|
|
39 |
|
|
|
40 |
x = 10 |
|
|
41 |
|
|
|
42 |
for i,state in enumerate(self.states): |
|
|
43 |
g = self.canvas.getGroup(f'state_{state}') |
|
|
44 |
stateGroup.append(g) |
|
|
45 |
|
|
|
46 |
result = self.plotState(self.canvas, g, df[ df['cluster']==state ], x, |
|
|
47 |
y= (self.heightPerState*i + (i*self.stateMargin)), label=state , |
|
|
48 |
plotChromosomeLabels=(i==0), **kwargs |
|
|
49 |
) |
|
|
50 |
|
|
|
51 |
return self.canvas |
|
|
52 |
|
|
|
53 |
|
|
|
54 |
def plotState(self, plot, g, row, x, y, label, plotChromosomeLabels=False, |
|
|
55 |
logScale=False, chromosomeSizes=None, logRepeats=False): |
|
|
56 |
|
|
|
57 |
currentX = x |
|
|
58 |
|
|
|
59 |
|
|
|
60 |
shownAlleles = False |
|
|
61 |
for chromosome in self.chromosomeOrder: |
|
|
62 |
print(chromosome) |
|
|
63 |
isAllelic = chromosome.endswith('_A') or chromosome.endswith('_B') |
|
|
64 |
#Obtain how many bases this chromosome has |
|
|
65 |
chrom = chromosome.split('_')[0] |
|
|
66 |
if chromosomeSizes is not None and chrom in chromosomeSizes: |
|
|
67 |
chromosomeSize = chromosomeSizes[chrom] |
|
|
68 |
else: |
|
|
69 |
|
|
|
70 |
chromosomeSize = row[ row['chromosome']==chromosome ]['endCoordinate'].max() |
|
|
71 |
|
|
|
72 |
|
|
|
73 |
chromosomePixelWidth = chromosomeSize * self.pixelsPerBase |
|
|
74 |
|
|
|
75 |
rect = self.canvas.getRectangle(currentX, y, chromosomePixelWidth, self.heightPerState) |
|
|
76 |
self.canvas.modifyStyle( rect, {'fill':f'rgb({self.gray},{self.gray},{self.gray})', 'stroke':'none'}) |
|
|
77 |
g.append(rect) |
|
|
78 |
|
|
|
79 |
if plotChromosomeLabels: |
|
|
80 |
|
|
|
81 |
if not chromosome.endswith('_B'): # allelic |
|
|
82 |
text = plot.getText(chromosome.replace('chr','').replace('_A',''), currentX+chromosomePixelWidth*0.5 + (chromosomePixelWidth*0.5 if chromosome.endswith('_A') else 0), y - self.heightPerState) |
|
|
83 |
text.set('text-anchor','middle') |
|
|
84 |
text.set('dominant-baseline','middle') |
|
|
85 |
text.set('font-family','Helvetica') |
|
|
86 |
text.set('font-size', str(self.smallFontSize)) |
|
|
87 |
|
|
|
88 |
g.append(text) |
|
|
89 |
|
|
|
90 |
if isAllelic: |
|
|
91 |
alleleDescriptor = 'allele ' if not shownAlleles else '' |
|
|
92 |
allele = 'A' if chromosome.endswith('_A') else 'B' |
|
|
93 |
text = self.canvas.getText( f'{alleleDescriptor}{allele}', currentX+chromosomePixelWidth*0.5 , y - 0.4*self.heightPerState) |
|
|
94 |
text.set('text-anchor','middle') |
|
|
95 |
text.set('dominant-baseline','middle') |
|
|
96 |
text.set('font-family','Helvetica') |
|
|
97 |
text.set('font-size', str(self.smallFontSize*0.8)) |
|
|
98 |
g.append(text) |
|
|
99 |
if allele=='B': |
|
|
100 |
shownAlleles=True |
|
|
101 |
|
|
|
102 |
if isAllelic: |
|
|
103 |
offset = self.chromosomeMargin*0.8 |
|
|
104 |
r = self.canvas.getRectangle(currentX-offset, y-self.headerHeight*0.5, |
|
|
105 |
chromosomePixelWidth+offset*2, self.heightPerState+offset*2+self.headerHeight*0.5) |
|
|
106 |
r.set('z-index', '0') |
|
|
107 |
self.canvas.modifyStyle( r, {'fill':'#DCFFCE', 'stroke':'none'}) |
|
|
108 |
self.stateGroup.insert(0,r) |
|
|
109 |
|
|
|
110 |
|
|
|
111 |
withinX = currentX |
|
|
112 |
|
|
|
113 |
binSizes = {} |
|
|
114 |
if logScale: |
|
|
115 |
currCoord = 0 |
|
|
116 |
minBinSize = 1_000 |
|
|
117 |
|
|
|
118 |
for binIndex in sorted(list(row[ row['chromosome']==chromosome ]['binIndex'])): |
|
|
119 |
|
|
|
120 |
d = row[ row['chromosome']==chromosome ]['binIndex']==binIndex |
|
|
121 |
dat = row[ row['chromosome']==chromosome ][d].iloc[0,:] |
|
|
122 |
cn =dat['copyNumber'] |
|
|
123 |
|
|
|
124 |
space = dat['startCoordinate'] - currCoord |
|
|
125 |
if space > minBinSize: # add intermediate bin: |
|
|
126 |
binSizes[(binIndex, 'spacer')] = np.log(space)/10 if logRepeats else space/chromosomePixelWidth |
|
|
127 |
|
|
|
128 |
|
|
|
129 |
size = np.log((dat['endCoordinate'] - dat['startCoordinate'] )) |
|
|
130 |
binSizes[binIndex] = size |
|
|
131 |
currCoord=dat['endCoordinate'] |
|
|
132 |
|
|
|
133 |
space = chromosomeSize - currCoord |
|
|
134 |
if space > minBinSize: # add intermediate bin: |
|
|
135 |
binSizes[(binIndex, 'spacerfinal')] = np.log(space)/10 if logRepeats else space/chromosomePixelWidth |
|
|
136 |
|
|
|
137 |
|
|
|
138 |
for binIndex in sorted(list(row[ row['chromosome']==chromosome ]['binIndex'])): |
|
|
139 |
d = row[ row['chromosome']==chromosome ]['binIndex']==binIndex |
|
|
140 |
dat = row[ row['chromosome']==chromosome ][d].iloc[0,:] |
|
|
141 |
cn =dat['copyNumber'] |
|
|
142 |
|
|
|
143 |
if logScale: |
|
|
144 |
if (binIndex, 'spacer') in binSizes: |
|
|
145 |
withinX+=( binSizes[(binIndex,'spacer')] / sum( binSizes.values() ) ) * chromosomePixelWidth |
|
|
146 |
|
|
|
147 |
size = ( binSizes[binIndex] / sum( binSizes.values() ) ) * chromosomePixelWidth |
|
|
148 |
|
|
|
149 |
|
|
|
150 |
else: |
|
|
151 |
size = (dat['endCoordinate'] - dat['startCoordinate'] )*self.pixelsPerBase |
|
|
152 |
|
|
|
153 |
r = self.canvas.getRectangle(withinX, y, size, self.heightPerState) |
|
|
154 |
self.canvas.modifyStyle(r, {'stroke-width':'0.2'}) |
|
|
155 |
fillAttr='fill' |
|
|
156 |
|
|
|
157 |
if chromosome.endswith('_A') or chromosome.endswith('_B'): # allelic |
|
|
158 |
cn+=1 # (diploid color == white == 2 ) |
|
|
159 |
|
|
|
160 |
|
|
|
161 |
if cn >= 4: |
|
|
162 |
self.canvas.modifyStyle(r, {fillAttr:self.gainTwoColor}) |
|
|
163 |
if cn == 3: |
|
|
164 |
self.canvas.modifyStyle(r, {fillAttr:self.gainColor}) |
|
|
165 |
if cn==2: |
|
|
166 |
self.canvas.modifyStyle(r, {fillAttr:self.normalColor}) |
|
|
167 |
if cn==1: |
|
|
168 |
self.canvas.modifyStyle(r, {fillAttr:self.lossColor}) |
|
|
169 |
if cn==0: |
|
|
170 |
self.canvas.modifyStyle(r, {fillAttr:self.totalLossColor}) |
|
|
171 |
if np.isnan(cn): |
|
|
172 |
self.canvas.modifyStyle(r, {fillAttr:self.missingColor}) |
|
|
173 |
|
|
|
174 |
g.append(r) |
|
|
175 |
|
|
|
176 |
withinX+=size |
|
|
177 |
|
|
|
178 |
currentX += chromosomePixelWidth+self.chromosomeMargin |
|
|
179 |
|
|
|
180 |
##### plot the label: |
|
|
181 |
text = self.canvas.getText(label, currentX, y+0.5*self.heightPerState) |
|
|
182 |
text.set('text-anchor','begin') |
|
|
183 |
text.set('dominant-baseline','middle') |
|
|
184 |
text.set('font-family','Helvetica') |
|
|
185 |
text.set('font-size', str(self.bigFontSize)) |
|
|
186 |
|
|
|
187 |
|
|
|
188 |
g.append(text) |
|
|
189 |
|
|
|
190 |
self.canvas.setWidth( max(self.canvas.width, currentX+self.chromosomeMargin+20+ self.offset[0] ) ) |
|
|
191 |
self.canvas.setHeight( max(self.canvas.height, y+self.heightPerState*1.5+ self.offset[1] )) |
|
|
192 |
|
|
|
193 |
return {'x':x} |