|
a |
|
b/modas/visual.py |
|
|
1 |
import pandas as pd |
|
|
2 |
import numpy as np |
|
|
3 |
import modas.multiprocess as mp |
|
|
4 |
from rpy2.robjects.packages import importr |
|
|
5 |
from rpy2.robjects import pandas2ri |
|
|
6 |
from rpy2.rinterface_lib.embedded import RRuntimeError |
|
|
7 |
import rpy2.robjects as robjects |
|
|
8 |
from collections import Counter |
|
|
9 |
import modas.gwas_cmd as gc |
|
|
10 |
import pyranges as pr |
|
|
11 |
from yattag import Doc, indent |
|
|
12 |
import subprocess |
|
|
13 |
import shutil |
|
|
14 |
import warnings |
|
|
15 |
import glob |
|
|
16 |
import os |
|
|
17 |
import re |
|
|
18 |
|
|
|
19 |
pandas2ri.activate() |
|
|
20 |
data_table = importr('data.table') |
|
|
21 |
base = importr('base') |
|
|
22 |
robjects.r('options(datatable.showProgress = FALSE)') |
|
|
23 |
|
|
|
24 |
warnings.filterwarnings("ignore") |
|
|
25 |
|
|
|
26 |
utils_path = subprocess.check_output('locate modas/utils', shell=True, text=True, encoding='utf-8') |
|
|
27 |
#utils_path = '/'.join(re.search('\n(.*site-packages.*)\n', utils_path).group(1).split('/')[:-1]) |
|
|
28 |
utils_path = re.search('\n(.*site-packages.*)\n', utils_path).group(1) |
|
|
29 |
if not utils_path.endswith('utils'): |
|
|
30 |
utils_path = '/'.join(utils_path.split('/')[:-1]) |
|
|
31 |
|
|
|
32 |
|
|
|
33 |
# def gwas(phe, geno, num_threads, phe_fn): |
|
|
34 |
# geno_prefix = geno.split('/')[-1] |
|
|
35 |
# related_matrix_cmd = 'gemma.linux -bfile {0}.link -gk 1 -o {1}'.format(geno_prefix,geno_prefix) |
|
|
36 |
# gwas_cmd = 'gemma.linux -bfile {0}.link -k output/{0}.cXX.txt -lmm -n {1} -o {2}' |
|
|
37 |
# fam = pd.read_csv(geno+'.fam', sep=r'\s+', header=None) |
|
|
38 |
# fam[5] = 1 |
|
|
39 |
# fam = pd.merge(fam, phe, left_on=0, right_index=True, how='left') |
|
|
40 |
# fam.to_csv(geno_prefix+'.link.fam', sep='\t', na_rep='NA', header=None, index=False) |
|
|
41 |
# if os.path.exists(geno_prefix+'.link.bed'): |
|
|
42 |
# os.remove(geno_prefix+'.link.bed') |
|
|
43 |
# if os.path.exists(geno_prefix+'.link.bim'): |
|
|
44 |
# os.remove(geno_prefix+'.link.bim') |
|
|
45 |
# os.symlink(geno+'.bed', geno_prefix+'.link.bed') |
|
|
46 |
# os.symlink(geno+'.bim', geno_prefix+'.link.bim') |
|
|
47 |
# values = list() |
|
|
48 |
# for _, p in enumerate(phe.columns): |
|
|
49 |
# p = p.replace('/', '.') |
|
|
50 |
# values.append((gwas_cmd.format(*[geno_prefix, _ + 2, '.'.join(phe_fn.split('/')[-1].split('.')[:-1])+ '_' + str(p)]),)) |
|
|
51 |
# s = mp.run(related_matrix_cmd) |
|
|
52 |
# if s != 0: |
|
|
53 |
# return None |
|
|
54 |
# else: |
|
|
55 |
# s = mp.parallel(mp.run, values, num_threads) |
|
|
56 |
# os.remove(geno_prefix+'.link.bed') |
|
|
57 |
# os.remove(geno_prefix+'.link.bim') |
|
|
58 |
# os.remove(geno_prefix+'.link.fam') |
|
|
59 |
# return s |
|
|
60 |
|
|
|
61 |
|
|
|
62 |
def gwas(phe, geno, num_threads, phe_fn, gwas_model): |
|
|
63 |
software, model = gwas_model.split('_') |
|
|
64 |
geno_prefix = geno.split('/')[-1] |
|
|
65 |
phe_fn = '.'.join(phe_fn.split('/')[-1].split('.')[:-1]) |
|
|
66 |
if software == 'gemma' and model == 'MLM': |
|
|
67 |
geno_prefix = geno.split('/')[-1] |
|
|
68 |
related_matrix_cmd = utils_path+'/gemma.linux -bfile {0}.link -gk 1 -o {1}'.format(geno_prefix,geno_prefix) |
|
|
69 |
fam = pd.read_csv(geno + '.fam', sep=r'\s+', header=None) |
|
|
70 |
fam[5] = 1 |
|
|
71 |
fam = pd.merge(fam, phe, left_on=0, right_index=True, how='left') |
|
|
72 |
fam.to_csv(geno_prefix+'.link.fam', sep='\t', na_rep='NA', header=None, index=False) |
|
|
73 |
if software != 'GAPIT' and gwas_model != 'gemma_LM': |
|
|
74 |
if os.path.exists(geno_prefix+'.link.bed'): |
|
|
75 |
os.remove(geno_prefix+'.link.bed') |
|
|
76 |
if os.path.exists(geno_prefix+'.link.bim'): |
|
|
77 |
os.remove(geno_prefix+'.link.bim') |
|
|
78 |
os.symlink(geno+'.bed', geno_prefix+'.link.bed') |
|
|
79 |
os.symlink(geno+'.bim', geno_prefix+'.link.bim') |
|
|
80 |
if software == 'rMVP': |
|
|
81 |
if os.path.exists(geno_prefix+'.link.fam'): |
|
|
82 |
os.remove(geno_prefix+'.link.fam') |
|
|
83 |
os.symlink(geno + '.fam', geno_prefix + '.link.fam') |
|
|
84 |
fam = pd.read_csv(geno + '.fam', sep=r'\s+', header=None) |
|
|
85 |
phe = phe.reindex(fam[0]) |
|
|
86 |
if software == 'gemma' and model == "LM": |
|
|
87 |
if os.path.exists('./gemma_lm_geno'): |
|
|
88 |
shutil.rmtree('./gemma_lm_geno') |
|
|
89 |
os.mkdir('./gemma_lm_geno') |
|
|
90 |
for p in phe.columns: |
|
|
91 |
p = p.replace('m/z', 'm.z') |
|
|
92 |
os.symlink('../' + geno + '.bed', './gemma_lm_geno/' + geno_prefix + '_' + p + '.link.bed') |
|
|
93 |
os.symlink('../' + geno + '.bim', './gemma_lm_geno/' + geno_prefix + '_' + p + '.link.bim') |
|
|
94 |
fam = pd.read_csv(geno + '.fam', sep=r'\s+', header=None) |
|
|
95 |
fam = pd.merge(fam.iloc[:, :5], phe.loc[:, p.replace('m.z', 'm/z')].to_frame(), left_on=0, right_index=True, how='left') |
|
|
96 |
fam.to_csv('./gemma_lm_geno/' + geno_prefix + '_' + p + '.link.fam', sep='\t', na_rep='NA', header=None, index=False) |
|
|
97 |
|
|
|
98 |
values = list() |
|
|
99 |
for _, p in enumerate(phe.columns): |
|
|
100 |
p = p.replace('m/z', 'm.z') |
|
|
101 |
if gwas_model == 'gemma_LM': |
|
|
102 |
values.append(((gc.gemma_cmd(model, './gemma_lm_geno/' + geno_prefix + '_' + p + '.link', None, None, '_'.join([phe_fn, gwas_model, p]))),)) |
|
|
103 |
if gwas_model == 'gemma_MLM': |
|
|
104 |
values.append(((gc.gemma_cmd(model, geno_prefix + '.link', geno_prefix, _ + 2, '_'.join([phe_fn, gwas_model, p]))),)) |
|
|
105 |
if software == 'rMVP': |
|
|
106 |
if not os.path.exists('./output'): |
|
|
107 |
os.mkdir('./output') |
|
|
108 |
omics_phe = phe.loc[:, p.replace('m.z', 'm/z')].to_frame().reset_index() |
|
|
109 |
omics_phe.columns = ['Taxa', '_'.join([phe_fn, gwas_model, p])] |
|
|
110 |
values.append((model, geno_prefix+'.link', geno_prefix+'.link', omics_phe, 1, './output')) |
|
|
111 |
if gwas_model == 'gemma_MLM': |
|
|
112 |
s = mp.run(related_matrix_cmd) |
|
|
113 |
if s != 0: |
|
|
114 |
return None |
|
|
115 |
if software == 'gemma': |
|
|
116 |
s = mp.parallel(mp.run, values, num_threads) |
|
|
117 |
if software == 'rMVP': |
|
|
118 |
s1 = mp.parallel(gc.rmvp, (values[0],), 1) |
|
|
119 |
s = mp.parallel(gc.rmvp, values[1:], num_threads) |
|
|
120 |
s = s1 + s |
|
|
121 |
if software == 'GAPIT': |
|
|
122 |
if not os.path.exists('./output'): |
|
|
123 |
os.mkdir('./output') |
|
|
124 |
phe.columns = ['_'.join([phe_fn, gwas_model, p]) for p in phe.columns] |
|
|
125 |
phe = phe.reset_index() |
|
|
126 |
phe.columns = ['Taxa'] + list(phe.columns[1:]) |
|
|
127 |
geno = os.path.abspath(geno) |
|
|
128 |
os.chdir('./output') |
|
|
129 |
s = gc.gapit(model, geno, phe, utils_path) |
|
|
130 |
os.chdir('../') |
|
|
131 |
if gwas_model == 'gemma_MLM' or software == 'rMVP': |
|
|
132 |
os.remove(geno_prefix+'.link.bed') |
|
|
133 |
os.remove(geno_prefix+'.link.bim') |
|
|
134 |
os.remove(geno_prefix+'.link.fam') |
|
|
135 |
if gwas_model == 'gemma_LM': |
|
|
136 |
shutil.rmtree('./gemma_lm_geno') |
|
|
137 |
return s |
|
|
138 |
|
|
|
139 |
|
|
|
140 |
def gwas_plot(res, p, prefix, t, software): |
|
|
141 |
try: |
|
|
142 |
base.sink('/dev/null') |
|
|
143 |
w = data_table.fread(res, data_table=base.getOption("datatable.fread.datatable", False)) |
|
|
144 |
if software == 'gemma': |
|
|
145 |
w_subset = w.loc[w.p_wald <= float(p), :] |
|
|
146 |
m = w_subset[['rs', 'chr', 'ps', 'p_wald']] |
|
|
147 |
q = w[['rs', 'chr', 'ps', 'p_wald']] |
|
|
148 |
if software == 'rMVP': |
|
|
149 |
w_subset = w.loc[w[w.columns[-1]] <= float(p), :] |
|
|
150 |
m = w_subset.iloc[:, [0, 1, 2, -1]] |
|
|
151 |
q = w.iloc[:, [0, 1, 2, -1]] |
|
|
152 |
if software == 'GAPIT': |
|
|
153 |
w_subset = w.loc[w['P.value'] <= float(p), :] |
|
|
154 |
m = w_subset[['SNP', 'Chromosome', 'Position', 'P.value']] |
|
|
155 |
q = w[['SNP', 'Chromosome', 'Position', 'P.value']] |
|
|
156 |
m.columns = ['SNP', 'Chromosome', 'Position', prefix] |
|
|
157 |
q.columns = ['SNP', 'Chromosome', 'Position', prefix] |
|
|
158 |
thresholdi = robjects.FloatVector([1.0 / w.shape[0], 1e-6, 1e-5]) |
|
|
159 |
lim = -np.log10(min(m[prefix])) + 2 |
|
|
160 |
#w_subset = base.subset(w, np.array(w.rx2('p_wald')) <= float(p)) |
|
|
161 |
#m = w_subset.rx(robjects.StrVector(['rs', 'chr', 'ps', 'p_wald'])) |
|
|
162 |
#q = w.rx(robjects.StrVector(['rs', 'chr', 'ps', 'p_wald'])) |
|
|
163 |
# m.names = ['SNP', 'Chromosome', 'Position', prefix] |
|
|
164 |
# q.names = ['SNP', 'Chromosome', 'Position', prefix] |
|
|
165 |
#thresholdi = robjects.FloatVector([1.0/w.nrow, 1e-6, 1e-5]) |
|
|
166 |
#lim = -np.log10(min(np.array(w_subset.rx2('p_wald'))))+2 |
|
|
167 |
#base.sink('/dev/null') |
|
|
168 |
robjects.r('source("'+utils_path+'/CMplot.r")') |
|
|
169 |
CMplot = robjects.r['CMplot'] |
|
|
170 |
CMplot(m, plot_type='m', col=robjects.StrVector(["grey30", "grey60"]), ylim=robjects.FloatVector([2, lim]), threshold=thresholdi, |
|
|
171 |
cex=robjects.FloatVector([0.5, 0.5, 0.5]), signal_cex=robjects.FloatVector([0.5, 0.5, 0.5]), |
|
|
172 |
threshold_col=robjects.StrVector(['red', 'green', 'blue']), chr_den_col=robjects.rinterface.NULL, amplify=True, |
|
|
173 |
signal_pch = robjects.IntVector([19, 19, 19]), dpi=300, |
|
|
174 |
signal_col=robjects.StrVector(['red', 'green', 'blue']), multracks=False, LOG10=True, file=t) |
|
|
175 |
CMplot(q, plot_type='q', col='grey30', threshold=thresholdi[0], |
|
|
176 |
signal_cex=robjects.FloatVector([0.5, 0.5, 0.5]), signal_pch=robjects.IntVector([19, 19, 19]), |
|
|
177 |
conf_int_col='gray', signal_col='red', multracks=False, LOG10=True, file=t, dpi=300) |
|
|
178 |
base.sink() |
|
|
179 |
except RRuntimeError: |
|
|
180 |
return 0 |
|
|
181 |
except ValueError: |
|
|
182 |
return 0 |
|
|
183 |
else: |
|
|
184 |
return 1 |
|
|
185 |
|
|
|
186 |
|
|
|
187 |
def gwas_plot_parallel(phe, p, threads, t, phe_fn, gwas_model): |
|
|
188 |
software, model = gwas_model.split('_') |
|
|
189 |
values = list() |
|
|
190 |
for i in phe.columns: |
|
|
191 |
i = str(i).replace('m/z', 'm.z') |
|
|
192 |
if software == 'gemma': |
|
|
193 |
gwas_fn = 'output/' + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.assoc.txt' |
|
|
194 |
if software == 'rMVP': |
|
|
195 |
gwas_fn = 'output/' + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.'.join(['.'+model, 'csv']) |
|
|
196 |
if software == 'GAPIT': |
|
|
197 |
gwas_fn = 'output/' + '.'.join(['GAPIT', model, '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]), 'GWAS', 'Results', 'csv']) |
|
|
198 |
values.append((gwas_fn, p, '.'.join(phe_fn.split('/')[-1].split('.')[:-1]) + '_' + str(i), t, software)) |
|
|
199 |
s = mp.parallel(gwas_plot, values, threads) |
|
|
200 |
return s |
|
|
201 |
|
|
|
202 |
|
|
|
203 |
def boxplot(phe, g, qtl): |
|
|
204 |
robjects.r('''box_plot <- function(d, phe, rs, level){ |
|
|
205 |
library(ggplot2) |
|
|
206 |
library(ggsignif) |
|
|
207 |
d <- d[d$haplotype!=1,] |
|
|
208 |
d[d$haplotype==0,'haplotype'] <- level[1] |
|
|
209 |
d[d$haplotype==2,'haplotype'] <- level[2] |
|
|
210 |
d$haplotype <- factor(d$haplotype,levels = level) |
|
|
211 |
b <- as.numeric(formatC(max(d[,1],na.rm=T)*1.2/4,format = 'e',digits = 1)) |
|
|
212 |
p <- ggplot(data = d,aes_string(x='haplotype',y=names(d)[1],fill='haplotype'))+ |
|
|
213 |
theme_bw()+ |
|
|
214 |
theme(legend.title = element_blank(), |
|
|
215 |
legend.background = element_blank(), |
|
|
216 |
legend.key = element_rect(colour = NA, fill = NA), |
|
|
217 |
legend.text = element_text(size = 4), |
|
|
218 |
legend.key.size = unit(3,'mm'), |
|
|
219 |
legend.position = 'none', |
|
|
220 |
plot.title = element_text(hjust=0.5,size = 6), |
|
|
221 |
plot.margin=unit(c(0.3,0.3,0,0),'cm'), |
|
|
222 |
panel.grid = element_blank(), |
|
|
223 |
axis.line = element_line(colour = 'black',size=0.4), |
|
|
224 |
axis.text = element_text(size = 6,color = 'black'), |
|
|
225 |
axis.ticks.length=unit(.1, 'cm'))+ |
|
|
226 |
stat_boxplot(geom = 'errorbar', width = 0.2,size=0.1)+ |
|
|
227 |
geom_boxplot(lwd=0.2,width=0.5,outlier.size = 0.2)+ |
|
|
228 |
geom_signif(comparisons = list(level),map_signif_level = F, |
|
|
229 |
test= t.test, size=0.2 ,textsize=2, y_position = max(d[,1],na.rm=T)*1.1)+ |
|
|
230 |
#xlab('')+ylab('')+ggtitle(paste(phe,rs,sep='_'))+ |
|
|
231 |
xlab('')+ylab('')+ggtitle('')+ |
|
|
232 |
scale_y_continuous(breaks=seq(0,4*b,by=b),labels = function(x) formatC(x, format = 'e',digits = 1), limits = c(0, max(d[,1], na.rm=T)*1.2))+ |
|
|
233 |
scale_fill_manual(values=c('#E3FFE2', 'forest green')) |
|
|
234 |
ggsave(paste(phe,'_',rs,'_','boxplot','.jpg',sep=''),plot=p,device='jpg',width=3.5,height=4.3,units = 'cm') |
|
|
235 |
}''') |
|
|
236 |
robjects.r['options'](warn=-1) |
|
|
237 |
base.sink('/dev/null') |
|
|
238 |
box_plot = robjects.r['box_plot'] |
|
|
239 |
g = g.where(g.snp.isin(qtl.SNP), drop=True) |
|
|
240 |
allele = pd.DataFrame([g.a0, g.a1], index=['a0', 'a1'], columns=g.snp.values) |
|
|
241 |
g = pd.DataFrame(g.values, index=g.sample, columns=g.snp.values) |
|
|
242 |
ril = g.index.intersection(phe.index) |
|
|
243 |
g = g.reindex(ril) |
|
|
244 |
phe = phe.reindex(ril) |
|
|
245 |
for index, row in qtl.iterrows(): |
|
|
246 |
if row['phe_name'] not in phe.columns: |
|
|
247 |
continue |
|
|
248 |
d = pd.concat([phe[row['phe_name']], g[row['SNP']]], axis=1) |
|
|
249 |
d.columns = ['trait.' + d.columns[0].replace('-', '.'), 'haplotype'] |
|
|
250 |
level = robjects.StrVector([allele[row['SNP']]['a1'].values*2, allele[row['SNP']]['a0'].values*2]) |
|
|
251 |
box_plot(d, row['phe_name'], row['SNP'], level) |
|
|
252 |
base.sink() |
|
|
253 |
|
|
|
254 |
|
|
|
255 |
def multi_trait_plot(phe, gwas_dir, qtl, phe_fn, prefix, t, gwas_model): |
|
|
256 |
software, model = gwas_model.split('_') |
|
|
257 |
bk = pd.DataFrame() |
|
|
258 |
for i in phe.columns: |
|
|
259 |
i = i.replace('/', '.') |
|
|
260 |
# fn = gwas_dir + '/' + '.'.join(phe_fn.split('/')[-1].split('.')[:-1]) + '_' + str(i)+'.assoc.txt' |
|
|
261 |
if software == 'gemma': |
|
|
262 |
gwas_fn = gwas_dir + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.assoc.txt' |
|
|
263 |
d = pd.read_csv(gwas_fn, sep='\t') |
|
|
264 |
d = d[['rs', 'chr', 'ps', 'p_wald']] |
|
|
265 |
if software == 'rMVP': |
|
|
266 |
gwas_fn = gwas_dir + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.'.join(['.' + model, 'csv']) |
|
|
267 |
d = pd.read_csv(gwas_fn) |
|
|
268 |
d = d.iloc[:, [0, 1, 2, -1]] |
|
|
269 |
d.columns = ['rs', 'chr', 'ps', 'p_wald'] |
|
|
270 |
if software == 'GAPIT': |
|
|
271 |
gwas_fn = gwas_dir + '.'.join(['GAPIT', model, '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]), 'GWAS', 'Results', 'csv']) |
|
|
272 |
d = pd.read_csv(gwas_fn) |
|
|
273 |
d = d[['SNP', 'Chromosome', 'Position ', 'P.value']] |
|
|
274 |
d.columns = ['rs', 'chr', 'ps', 'p_wald'] |
|
|
275 |
|
|
|
276 |
# d = pd.read_csv(gwas_fn, sep='\t') |
|
|
277 |
if bk.empty: |
|
|
278 |
bk = d.copy() |
|
|
279 |
bk.loc[bk.p_wald <= 1e-5, 'p_wald'] = 1e-5 |
|
|
280 |
for index, row in qtl.loc[qtl.phe_name == i, :].iterrows(): |
|
|
281 |
peak_pos = int(row['SNP'].split('_')[-1]) |
|
|
282 |
chrom = row['CHR'] |
|
|
283 |
sig_tmp = pd.concat([bk.loc[(bk.chr.astype(str) == str(chrom)) & (bk.ps >= peak_pos-1000000) & (bk.ps <= peak_pos+1000000), 'p_wald'], |
|
|
284 |
d.loc[(bk.chr.astype(str) == str(chrom)) & (bk.ps >= peak_pos-1000000) & (bk.ps <= peak_pos+1000000), 'p_wald']], axis=1) |
|
|
285 |
sig_tmp.columns = ['bk', 'phe'] |
|
|
286 |
bk.loc[(bk.chr.astype(str) == str(chrom)) & (bk.ps >= peak_pos-1000000) & (bk.ps <= peak_pos+1000000), 'p_wald'] = sig_tmp.apply( |
|
|
287 |
lambda x: x['bk'] if x['bk'] < x['phe'] else x['phe'], axis=1) |
|
|
288 |
bk = bk.loc[bk.p_wald <= 1e-2, :] |
|
|
289 |
bk.loc[bk.p_wald <= 1e-20, 'p_wald'] = 1e-20 |
|
|
290 |
bk = bk[['rs', 'chr', 'ps', 'p_wald']] |
|
|
291 |
thresholdi = robjects.FloatVector([1.0 / d.shape[0], 1e-6, 1e-5]) |
|
|
292 |
lim = -np.log10(min(bk['p_wald'])) + 2 |
|
|
293 |
bk.columns = ['SNP', 'Chromosome', 'Position', prefix] |
|
|
294 |
base.sink('/dev/null') |
|
|
295 |
robjects.r('source("'+utils_path+'/CMplot.r")') |
|
|
296 |
#robjects.r('source("/home/debian/文档/MGWAP/compound_extract/plot/CMplot.r")') |
|
|
297 |
CMplot = robjects.r['CMplot'] |
|
|
298 |
CMplot(bk, plot_type='m', col=robjects.StrVector(["grey30", "grey60"]), ylim=robjects.FloatVector([2, lim]), |
|
|
299 |
threshold=thresholdi, |
|
|
300 |
cex=robjects.FloatVector([0.5, 0.5, 0.5]), signal_cex=robjects.FloatVector([0.5, 0.5, 0.5]), |
|
|
301 |
threshold_col=robjects.StrVector(['red', 'green', 'blue']), chr_den_col=robjects.rinterface.NULL, |
|
|
302 |
amplify=True, |
|
|
303 |
signal_pch=robjects.IntVector([19, 19, 19]), dpi=300, |
|
|
304 |
signal_col=robjects.StrVector(['red', 'green', 'blue']), multracks=False, LOG10=True, file=t) |
|
|
305 |
base.sink() |
|
|
306 |
|
|
|
307 |
|
|
|
308 |
def qtl_anno(qtl, anno): |
|
|
309 |
anno = anno[['geneid', 'position']] |
|
|
310 |
anno.loc[:, 'chr'] = anno['position'].apply(lambda x: x.split(':')[0]) |
|
|
311 |
anno.loc[:, 'start'] = anno['position'].apply(lambda x: x.split(':')[1].split('-')[0]) |
|
|
312 |
anno.loc[:, 'end'] = anno['position'].apply(lambda x: x.split(':')[1].split('-')[1]) |
|
|
313 |
anno = anno[['chr', 'start', 'end', 'geneid']] |
|
|
314 |
anno.columns = ['Chromosome', 'Start', 'End', 'geneid'] |
|
|
315 |
qtl_range = qtl[['CHR', 'qtl_start', 'qtl_end', 'phe_name']] |
|
|
316 |
qtl_range.columns = ['Chromosome', 'Start', 'End', 'phe_name'] |
|
|
317 |
qtl_range = pr.PyRanges(qtl_range) |
|
|
318 |
anno = pr.PyRanges(anno) |
|
|
319 |
qtl_anno_intersect = qtl_range.join(anno, how='left') |
|
|
320 |
qtl_anno = pd.DataFrame() |
|
|
321 |
for k in sorted(qtl_anno_intersect.dfs.keys()): |
|
|
322 |
qtl_anno = pd.concat([qtl_anno, qtl_anno_intersect.dfs[k]]) |
|
|
323 |
qtl_anno = qtl_anno[['Chromosome', 'Start', 'End', 'phe_name', 'geneid']] |
|
|
324 |
qtl_anno.loc[:, 'Chromosome'] = qtl_anno.Chromosome.astype(str) |
|
|
325 |
qtl_anno.loc[:, 'Start'] = qtl_anno.Start.astype(int) |
|
|
326 |
qtl_anno.loc[:, 'End'] = qtl_anno.End.astype(int) |
|
|
327 |
qtl.loc[:, 'CHR'] = qtl.CHR.astype(str) |
|
|
328 |
qtl = pd.merge(qtl, qtl_anno, left_on=['CHR', 'qtl_start', 'qtl_end', 'phe_name'], |
|
|
329 |
right_on=['Chromosome', 'Start', 'End', 'phe_name']) |
|
|
330 |
qtl = qtl.drop(['Chromosome', 'Start', 'End'], axis=1) |
|
|
331 |
qtl = qtl.groupby(['CHR', 'qtl_start', 'qtl_end', 'SNP', 'P', 'SP2_num', 'qtl_length', 'phe_name'])['geneid'].apply(';'.join).reset_index() |
|
|
332 |
qtl.columns = ['CHR', 'qtl_start', 'qtl_end', 'SNP', 'P', 'SP2_num', 'qtl_length', 'phe_name', 'qtl_all_gene'] |
|
|
333 |
qtl.loc[qtl.qtl_all_gene == '-1', 'qtl_all_gene'] = 'nan' |
|
|
334 |
return qtl |
|
|
335 |
|
|
|
336 |
|
|
|
337 |
''' |
|
|
338 |
Generate html report for SingleTrait |
|
|
339 |
Author: CrazyHsu @ crazyhsu9527@gmail.com |
|
|
340 |
Created on: 2020-08-26 20:33:52 |
|
|
341 |
Last modified: 2021-03-24 17:20:33 |
|
|
342 |
''' |
|
|
343 |
|
|
|
344 |
|
|
|
345 |
################# Classes ################# |
|
|
346 |
class AllQtlStatistics(): |
|
|
347 |
def __init__(self): |
|
|
348 |
self.totalSNPs = 0 |
|
|
349 |
self.qtlDetected = 0 |
|
|
350 |
self.medianQtlLen = 0 |
|
|
351 |
self.longestQtlLen = 0 |
|
|
352 |
self.shortestQtlLen = 0 |
|
|
353 |
self.totalAnnoGenes = 0 |
|
|
354 |
# self.totalTargetGenes = 0 |
|
|
355 |
# self.totalEnrichTargetGenes = 0 |
|
|
356 |
# self.aveEnrichGenes = 0 |
|
|
357 |
|
|
|
358 |
def getQtlLengthInfo(self, qtlLenSeries): |
|
|
359 |
self.medianQtlLen = np.median(qtlLenSeries) |
|
|
360 |
self.longestQtlLen = np.max(qtlLenSeries) |
|
|
361 |
self.shortestQtlLen = np.min(qtlLenSeries) |
|
|
362 |
|
|
|
363 |
def getTotalAnnoGenes(self, qtlAllGenesSeries): |
|
|
364 |
self.totalAnnoGenes = len(self.mergeSeries2List(qtlAllGenesSeries)) |
|
|
365 |
|
|
|
366 |
# def getTotalTargetGenes(self, qtlAllMetaGenesSeries): |
|
|
367 |
# self.totalTargetGenes = len(self.mergeSeries2List(qtlAllMetaGenesSeries)) |
|
|
368 |
|
|
|
369 |
# def getEnrichTargetGenes(self, myDataFrame): |
|
|
370 |
# filteredSeries = myDataFrame.loc[myDataFrame.qtl_p_value <= 0.05, "qtl_meta_gene"] |
|
|
371 |
# self.totalEnrichTargetGenes = len(self.mergeSeries2List(filteredSeries)) |
|
|
372 |
|
|
|
373 |
def getInit(self, tableData): |
|
|
374 |
self.qtlDetected = len(tableData) |
|
|
375 |
self.getQtlLengthInfo(tableData.qtl_length) |
|
|
376 |
self.getTotalAnnoGenes(tableData.qtl_all_gene) |
|
|
377 |
# self.getTotalTargetGenes(tableData.qtl_meta_gene) |
|
|
378 |
# self.getEnrichTargetGenes(tableData) |
|
|
379 |
|
|
|
380 |
def mergeSeries2List(self, mySeries, sep=";"): |
|
|
381 |
return set().union(*mySeries.apply(lambda x: str(x).split(sep)).to_list()) |
|
|
382 |
|
|
|
383 |
|
|
|
384 |
class AllTraitStatistics(): |
|
|
385 |
def __init__(self): |
|
|
386 |
self.totalTraits = 0 |
|
|
387 |
self.filteredTraits = 0 |
|
|
388 |
self.clusteredTraits = 0 |
|
|
389 |
self.unclusteredTraits = 0 |
|
|
390 |
|
|
|
391 |
def getFilteredTraits(self, traitSeries): |
|
|
392 |
self.filteredTraits = len(self.mergeSeries2List(traitSeries, sep=",")) |
|
|
393 |
self.totalTraits = len(self.mergeSeries2List(traitSeries, sep=",")) |
|
|
394 |
|
|
|
395 |
def getClusteredTraits(self, labelSeries): |
|
|
396 |
counter = Counter(labelSeries.to_list()) |
|
|
397 |
clustered = [i for i in counter if counter[i] != 1] |
|
|
398 |
unclustered = [i for i in counter if counter[i] == 1] |
|
|
399 |
self.clusteredTraits = len(clustered) |
|
|
400 |
self.unclusteredTraits = len(unclustered) |
|
|
401 |
|
|
|
402 |
# def getUnclusterTraits(self, labelSeries): |
|
|
403 |
# self.clusteredTraits = len(set(labelSeries.to_list())) |
|
|
404 |
|
|
|
405 |
def getInit(self, tableData): |
|
|
406 |
self.getFilteredTraits(tableData.phe_name) |
|
|
407 |
self.getClusteredTraits(tableData.phe_name) |
|
|
408 |
# self.getUnclusterTraits(tableData.phe_name) |
|
|
409 |
|
|
|
410 |
def mergeSeries2List(self, mySeries, sep=";"): |
|
|
411 |
return set().union(*mySeries.apply(lambda x: str(x).split(sep)).to_list()) |
|
|
412 |
|
|
|
413 |
|
|
|
414 |
class SingleQtlStatistics(): |
|
|
415 |
def __init__(self, myRow, rowIndex): |
|
|
416 |
self.qtlName = "QTL{}_chr{}-{}-{}".format(rowIndex, myRow.CHR, myRow.qtl_start, myRow.qtl_end) |
|
|
417 |
self.qtlPosition = "{}:{}-{}".format(myRow.CHR, myRow.qtl_start, myRow.qtl_end) |
|
|
418 |
self.peakSNP = "{}, {}".format(myRow.SNP, myRow.SNP) |
|
|
419 |
self.traitNames = myRow.phe_name.split(",") |
|
|
420 |
self.totalGenesInQtl = str(myRow.qtl_all_gene).split(";") |
|
|
421 |
# self.targetGenesInQtl = str(myRow.qtl_meta_gene).split(";") |
|
|
422 |
# self.enrichTargetGenes = 0 |
|
|
423 |
self.pvalue = myRow.P |
|
|
424 |
|
|
|
425 |
|
|
|
426 |
################# Functions ################## |
|
|
427 |
def resolveDir(dirName): |
|
|
428 |
if not os.path.exists(dirName): |
|
|
429 |
os.makedirs(dirName) |
|
|
430 |
os.chdir(dirName) |
|
|
431 |
|
|
|
432 |
|
|
|
433 |
def getGeneFunc(myGeneFuncFile, sep=","): |
|
|
434 |
gene2func = pd.read_csv(myGeneFuncFile, sep=sep) |
|
|
435 |
tmpDict = gene2func.to_dict("index") |
|
|
436 |
geneFuncDict = {} |
|
|
437 |
for i in tmpDict: |
|
|
438 |
geneFuncDict[tmpDict[i]["geneid"]] = tmpDict[i] |
|
|
439 |
return geneFuncDict |
|
|
440 |
|
|
|
441 |
|
|
|
442 |
def getAllQtlSummary(allQtlStatistics, doc=None, tag=None, text=None, line=None): |
|
|
443 |
with tag("div", id="qtlSummary"): |
|
|
444 |
line("h1", "Summary of QTLs detected by local GWAS", style="text-align: center;") |
|
|
445 |
multi_trait_File = "./Manhattan.multi_trait.jpg" |
|
|
446 |
with tag("div", style="text-align: center;margin-top: 50px;"): |
|
|
447 |
doc.stag("img", klass="img-fluid", src=multi_trait_File, alt="multi_trait") |
|
|
448 |
with tag("div", style="margin-top: 100px;"): |
|
|
449 |
with tag("div"): |
|
|
450 |
line("h2", "QTL summarization criteria") |
|
|
451 |
line("p", "The cutoffs in generating and filtering QTLs") |
|
|
452 |
with tag("div"): |
|
|
453 |
line("h2", "QTL statistics table") |
|
|
454 |
with tag("div", klass="table-responsive"): |
|
|
455 |
with tag("table", klass="table"): |
|
|
456 |
with tag("thead"): |
|
|
457 |
with tag("tr", klass="table-success"): |
|
|
458 |
line("th", "Categories", style="width: 50%") |
|
|
459 |
line("th", "Statistics value", style="width: 50%") |
|
|
460 |
with tag("tbody"): |
|
|
461 |
with tag("tr"): |
|
|
462 |
line("td", "Genotype file") |
|
|
463 |
line("td", "XXXXXX") |
|
|
464 |
with tag("tr"): |
|
|
465 |
line("td", "Number of SNPs for local GWAS") |
|
|
466 |
line("td", str(allQtlStatistics.totalSNPs)) |
|
|
467 |
with tag("tr"): |
|
|
468 |
line("td", "Number of detected QTLs") |
|
|
469 |
line("td", str(allQtlStatistics.qtlDetected)) |
|
|
470 |
with tag("tr"): |
|
|
471 |
line("td", "Median QTL length") |
|
|
472 |
line("td", str(allQtlStatistics.medianQtlLen)) |
|
|
473 |
with tag("tr"): |
|
|
474 |
line("td", "Longest QTL length") |
|
|
475 |
line("td", str(allQtlStatistics.longestQtlLen)) |
|
|
476 |
with tag("tr"): |
|
|
477 |
line("td", "Shortest QTL length") |
|
|
478 |
line("td", str(allQtlStatistics.shortestQtlLen)) |
|
|
479 |
with tag("tr"): |
|
|
480 |
line("td", "Total genes in the QTLs") |
|
|
481 |
line("td", str(allQtlStatistics.totalAnnoGenes)) |
|
|
482 |
# with tag("tr"): |
|
|
483 |
# line("td", "Total target genes in the QTLs") |
|
|
484 |
# line("td", str(allQtlStatistics.totalTargetGenes)) |
|
|
485 |
# with tag("tr"): |
|
|
486 |
# line("td", "Total enrichment of target genes") |
|
|
487 |
# line("td", str(allQtlStatistics.totalEnrichTargetGenes)) |
|
|
488 |
# with tag("tr"): |
|
|
489 |
# line("td", "Average enrichment of target genes") |
|
|
490 |
# line("td", str(allQtlStatistics.aveEnrichGenes)) |
|
|
491 |
|
|
|
492 |
|
|
|
493 |
def getAllTraitSummary(allTraitStatistics, doc=None, tag=None, text=None, line=None): |
|
|
494 |
# doc, tag, text, line = Doc().ttl() |
|
|
495 |
with tag("div", id="traitSummary"): |
|
|
496 |
line("h1", "Summary of omics traits detected by local GWAS", style="text-align: center;") |
|
|
497 |
# heatmapFile = "/home/xufeng/xufeng/Projects/MODAS/xufeng1/assets/img/heatmap.png" |
|
|
498 |
# with tag("div", style="text-align: center;margin-top: 50px;"): |
|
|
499 |
# doc.stag("img", klass="img-fluid", src=heatmapFile, alt="heatmap") |
|
|
500 |
with tag("div", style="margin-top: 100px;"): |
|
|
501 |
with tag("div"): |
|
|
502 |
line("h2", "Omics trait filtration criteria") |
|
|
503 |
line("p", "Parameters and pipelines in filtering traits") |
|
|
504 |
with tag("div"): |
|
|
505 |
line("h2", "Trait statistics table") |
|
|
506 |
with tag("div", klass="table-responsive"): |
|
|
507 |
with tag("table", klass="table"): |
|
|
508 |
with tag("thead"): |
|
|
509 |
with tag("tr", klass="table-success"): |
|
|
510 |
line("th", "Categories", style="width: 50%") |
|
|
511 |
line("th", "Statistics value", style="width: 50%") |
|
|
512 |
with tag("tbody"): |
|
|
513 |
with tag("tr"): |
|
|
514 |
line("td", "Omics trait type") |
|
|
515 |
line("td", "Metabolome") |
|
|
516 |
with tag("tr"): |
|
|
517 |
line("td", "Total number of traits") |
|
|
518 |
line("td", str(allTraitStatistics.totalTraits)) |
|
|
519 |
with tag("tr"): |
|
|
520 |
line("td", "Filtered number of traits") |
|
|
521 |
line("td", str(allTraitStatistics.filteredTraits)) |
|
|
522 |
with tag("tr"): |
|
|
523 |
line("td", "Number of clustered traits") |
|
|
524 |
line("td", str(allTraitStatistics.clusteredTraits)) |
|
|
525 |
# with tag("tr"): |
|
|
526 |
# line("td", "Number of modules of clustered traits") |
|
|
527 |
# line("td", "Cell 2") |
|
|
528 |
with tag("tr"): |
|
|
529 |
line("td", "Number of unclustered traits") |
|
|
530 |
line("td", str(allTraitStatistics.unclusteredTraits)) |
|
|
531 |
# return doc.getvalue() |
|
|
532 |
|
|
|
533 |
|
|
|
534 |
def getSingleQtlInfo(singleQtl, index, geneFuncDict, doc=None, tag=None, text=None, line=None): |
|
|
535 |
with tag("div", klass="container"): |
|
|
536 |
with tag("div", klass="row"): |
|
|
537 |
with tag("div", klass="col"): |
|
|
538 |
line("h1", "Summary in " + singleQtl.qtlName, style="text-align: center;") |
|
|
539 |
with tag("div", style="margin-top: 100px;"): |
|
|
540 |
with tag("div"): |
|
|
541 |
line("h2", "QTL summarization criteria for whole -genome GWAS") |
|
|
542 |
line("p", "Parameters and pipeline for filtering traits") |
|
|
543 |
with tag("div"): |
|
|
544 |
line("h2", "QTL statistics table") |
|
|
545 |
with tag("div", klass="table-responsive"): |
|
|
546 |
with tag("table", klass="table"): |
|
|
547 |
with tag("thead"): |
|
|
548 |
with tag("tr", klass="table-success"): |
|
|
549 |
line("th", "QTL", style="width: 50%") |
|
|
550 |
line("th", "Statistics", style="width: 50%") |
|
|
551 |
with tag("tbody"): |
|
|
552 |
with tag("tr"): |
|
|
553 |
line("td", "QTL position") |
|
|
554 |
line("td", str(singleQtl.qtlPosition)) |
|
|
555 |
with tag("tr"): |
|
|
556 |
line("td", "Peak SNP ID and position") |
|
|
557 |
line("td", str(singleQtl.peakSNP)) |
|
|
558 |
with tag("tr"): |
|
|
559 |
line("td", "Number of total genes in the QTL") |
|
|
560 |
line("td", str(len(singleQtl.totalGenesInQtl))) |
|
|
561 |
# with tag("tr"): |
|
|
562 |
# line("td", "Number of target genes in the QTL") |
|
|
563 |
# line("td", str(len(singleQtl.targetGenesInQtl))) |
|
|
564 |
# with tag("tr"): |
|
|
565 |
# line("td", "Enrichment of target genes") |
|
|
566 |
# line("td", str(singleQtl.enrichTargetGenes)) |
|
|
567 |
with tag("tr"): |
|
|
568 |
line("td", "Enrichment significance vs background") |
|
|
569 |
line("td", str(singleQtl.pvalue)) |
|
|
570 |
# with tag("div"): |
|
|
571 |
# line("h2", "List of target genes in the QTL") |
|
|
572 |
# with tag("div", klass="table-responsive"): |
|
|
573 |
# with tag("table", klass="table"): |
|
|
574 |
# with tag("thead"): |
|
|
575 |
# with tag("tr", klass="table-success"): |
|
|
576 |
# line("th", "Gene ID", style="width: 25%") |
|
|
577 |
# line("th", "Alias ID", style="width: 25%") |
|
|
578 |
# line("th", "Position", style="width: 25%") |
|
|
579 |
# line("th", "Function", style="width: 25%") |
|
|
580 |
# with tag("tbody"): |
|
|
581 |
# for qtl in singleQtl.targetGenesInQtl: |
|
|
582 |
# if qtl == "nan": |
|
|
583 |
# continue |
|
|
584 |
# with tag("tr"): |
|
|
585 |
# line("td", str(geneFuncDict[qtl]["geneId"].strip())) |
|
|
586 |
# line("td", str(geneFuncDict[qtl]["aliasId"].strip())) |
|
|
587 |
# line("td", str(geneFuncDict[qtl]["position"].strip())) |
|
|
588 |
# line("td", str(geneFuncDict[qtl]["function"].strip())) |
|
|
589 |
with tag("div"): |
|
|
590 |
line("h2", "List of total genes in the QTL") |
|
|
591 |
with tag("div", klass="table-responsive"): |
|
|
592 |
with tag("table", klass="table"): |
|
|
593 |
with tag("thead"): |
|
|
594 |
with tag("tr", klass="table-success"): |
|
|
595 |
line("th", "Gene ID", style="width: 25%") |
|
|
596 |
line("th", "Alias ID", style="width: 25%") |
|
|
597 |
line("th", "Position", style="width: 25%") |
|
|
598 |
line("th", "Function", style="width: 25%") |
|
|
599 |
with tag("tbody"): |
|
|
600 |
for qtl in singleQtl.totalGenesInQtl: |
|
|
601 |
if qtl == "nan": |
|
|
602 |
continue |
|
|
603 |
with tag("tr"): |
|
|
604 |
line("td", str(geneFuncDict[qtl]["geneid"].strip())) |
|
|
605 |
line("td", str(geneFuncDict[qtl]["aliasid"].strip())) |
|
|
606 |
line("td", str(geneFuncDict[qtl]["position"].strip())) |
|
|
607 |
line("td", str(geneFuncDict[qtl]["function"].strip())) |
|
|
608 |
|
|
|
609 |
|
|
|
610 |
# def getSingleTrait(traitName, doc=None, tag=None, text=None, line=None): |
|
|
611 |
# with tag("div", klass="container"): |
|
|
612 |
# with tag("div", klass="row"): |
|
|
613 |
# with tag("div", klass="col"): |
|
|
614 |
# line("h1", "Details in " + traitName, style="text-align: center;") |
|
|
615 |
# doc.stag("img", klass="img-fluid", src="../../assets/img/manhattan.jpg") |
|
|
616 |
# with tag("div", klass="row"): |
|
|
617 |
# with tag("div", klass="col-6"): |
|
|
618 |
# doc.stag("img", klass="img-fluid", src="../../assets/img/qqplot.png") |
|
|
619 |
# with tag("div", klass="col-6"): |
|
|
620 |
# doc.stag("img", klass="img-fluid", src="../../assets/img/boxplot.png") |
|
|
621 |
|
|
|
622 |
def getListItem(data, qtlName=None, traitName=None, doc=None, tag=None, text=None, line=None, mainPage=False): |
|
|
623 |
for index, row in data.iterrows(): |
|
|
624 |
qtlItem = SingleQtlStatistics(row, index) |
|
|
625 |
if qtlName and qtlName == qtlItem.qtlName: |
|
|
626 |
expand = "true" |
|
|
627 |
faPlusOrMinus = "fa-minus" |
|
|
628 |
myClass = "list-unstyled collapse nav nav-pills show" |
|
|
629 |
active = " active" |
|
|
630 |
else: |
|
|
631 |
expand = "false" |
|
|
632 |
faPlusOrMinus = "fa-plus" |
|
|
633 |
myClass = "list-unstyled collapse nav nav-pills" |
|
|
634 |
active = "" |
|
|
635 |
if mainPage: |
|
|
636 |
relativeDir = os.path.join("", qtlItem.qtlName) |
|
|
637 |
else: |
|
|
638 |
if qtlName == qtlItem.qtlName: |
|
|
639 |
relativeDir = "" |
|
|
640 |
else: |
|
|
641 |
relativeDir = os.path.join("../", qtlItem.qtlName) |
|
|
642 |
with tag("li"): |
|
|
643 |
with tag("div", klass="qtlItem" + active): |
|
|
644 |
with tag("a", ("href", os.path.join(relativeDir, qtlItem.qtlName + ".html")), klass="qtlLink"): |
|
|
645 |
text(qtlItem.qtlName) |
|
|
646 |
with tag("a", ("href", "#" + qtlItem.qtlName), ("data-toggle", "collapse"), ("aria-expanded", expand)): |
|
|
647 |
line("i", "", klass="fa " + faPlusOrMinus) |
|
|
648 |
with tag("ul", ("class", myClass), ("id", qtlItem.qtlName), ("aria-expanded", expand)): |
|
|
649 |
for i in qtlItem.traitNames: |
|
|
650 |
with tag("li"): |
|
|
651 |
href = os.path.join(relativeDir, i + ".html") |
|
|
652 |
if traitName and traitName == i: |
|
|
653 |
with tag("a", ("href", href), ("class", "active"), ("aria-selected", "true")): |
|
|
654 |
line("i", "", klass="fa fa-link") |
|
|
655 |
text(" " + i) |
|
|
656 |
else: |
|
|
657 |
with tag("a", ("href", href), ("aria-selected", "false")): |
|
|
658 |
line("i", "", klass="fa fa-link") |
|
|
659 |
text(" " + i) |
|
|
660 |
|
|
|
661 |
|
|
|
662 |
def generateMainPage(data, allQtlStatistics, allTraitStatistics): |
|
|
663 |
doc, tag, text, line = Doc().ttl() |
|
|
664 |
doc.asis('<!DOCTYPE html>') |
|
|
665 |
with tag('html'): |
|
|
666 |
with tag('head'): |
|
|
667 |
doc.stag('meta', charset='utf-8') |
|
|
668 |
doc.stag('meta', name='viewport', content='width=device-width, initial-scale=1.0, shrink-to-fit=no') |
|
|
669 |
line('title', 'MODAS main page') |
|
|
670 |
doc.stag('link', rel='stylesheet', href='assets/bootstrap/css/bootstrap.min.css') |
|
|
671 |
doc.stag('link', rel='stylesheet', href='assets/fonts/font-awesome.min.css') |
|
|
672 |
doc.stag('link', rel='stylesheet', href="assets/css/modas.css") |
|
|
673 |
# doc.stag('link', rel='stylesheet', href="assets/css/styles.css") |
|
|
674 |
|
|
|
675 |
with tag("body"): |
|
|
676 |
with tag("div", id="sidebar-test"): |
|
|
677 |
with tag("div", klass="sidebar-header"): |
|
|
678 |
with tag("h2"): |
|
|
679 |
line("a", "MODAS", href="mainPage.html", klass="modas") |
|
|
680 |
with tag("ul"): |
|
|
681 |
getListItem(data, doc=doc, tag=tag, text=text, line=line, mainPage=True) |
|
|
682 |
|
|
|
683 |
with tag("div", klass="content"): |
|
|
684 |
with tag("div", klass="container"): |
|
|
685 |
with tag("div", klass="row"): |
|
|
686 |
with tag("div", klass="col"): |
|
|
687 |
getAllQtlSummary(allQtlStatistics, doc=doc, tag=tag, text=text, line=line) |
|
|
688 |
doc.stag("hr", style="margin-bottom: 50px;margin-top: 50px;") |
|
|
689 |
getAllTraitSummary(allTraitStatistics, doc=doc, tag=tag, text=text, line=line) |
|
|
690 |
line("script", "", src="assets/js/jquery.min.js") |
|
|
691 |
line("script", "", src="assets/bootstrap/js/bootstrap.min.js") |
|
|
692 |
line("script", "", src="assets/js/modas.js") |
|
|
693 |
|
|
|
694 |
mainPageOut = open("mainPage.html", "w") |
|
|
695 |
res = indent(doc.getvalue(), indentation=" ") |
|
|
696 |
print(res, file=mainPageOut) |
|
|
697 |
mainPageOut.close() |
|
|
698 |
|
|
|
699 |
|
|
|
700 |
def generateSingleQtlPage(data, geneFuncDict): |
|
|
701 |
for index, row in data.iterrows(): |
|
|
702 |
qtlItem = SingleQtlStatistics(row, index) |
|
|
703 |
if not os.path.exists(qtlItem.qtlName): |
|
|
704 |
os.makedirs(qtlItem.qtlName) |
|
|
705 |
out = open(os.path.join(qtlItem.qtlName, qtlItem.qtlName + ".html"), "w") |
|
|
706 |
|
|
|
707 |
doc, tag, text, line = Doc().ttl() |
|
|
708 |
doc.asis('<!DOCTYPE html>') |
|
|
709 |
with tag('html'): |
|
|
710 |
with tag('head'): |
|
|
711 |
doc.stag('meta', charset='utf-8') |
|
|
712 |
doc.stag('meta', name='viewport', content='width=device-width, initial-scale=1.0, shrink-to-fit=no') |
|
|
713 |
line('title', 'Summary information in QTL ' + qtlItem.qtlName) |
|
|
714 |
doc.stag('link', rel='stylesheet', href='../assets/bootstrap/css/bootstrap.min.css') |
|
|
715 |
doc.stag('link', rel='stylesheet', href='../assets/fonts/font-awesome.min.css') |
|
|
716 |
doc.stag('link', rel='stylesheet', href="../assets/css/modas.css") |
|
|
717 |
# doc.stag('link', rel='stylesheet', href="assets/css/styles.css") |
|
|
718 |
with tag("body"): |
|
|
719 |
with tag("div", id="sidebar-test"): |
|
|
720 |
with tag("div", klass="sidebar-header"): |
|
|
721 |
with tag("h2"): |
|
|
722 |
line("a", "MODAS", href="../mainPage.html", klass="modas") |
|
|
723 |
with tag("ul"): |
|
|
724 |
getListItem(data, qtlName=qtlItem.qtlName, doc=doc, tag=tag, text=text, line=line) |
|
|
725 |
|
|
|
726 |
with tag("div", klass="content"): |
|
|
727 |
getSingleQtlInfo(qtlItem, index, geneFuncDict, doc=doc, tag=tag, text=text, line=line) |
|
|
728 |
|
|
|
729 |
line("script", "", src="../assets/js/jquery.min.js") |
|
|
730 |
line("script", "", src="../assets/bootstrap/js/bootstrap.min.js") |
|
|
731 |
line("script", "", src="../assets/js/modas.js") |
|
|
732 |
|
|
|
733 |
customJs = ''' |
|
|
734 |
<script> |
|
|
735 |
var offestFromTop = %d * 45 + 68; |
|
|
736 |
$('#sidebar-test').scrollTop(offestFromTop); |
|
|
737 |
|
|
|
738 |
function clickItem(event) { |
|
|
739 |
var target = event.currentTarget; |
|
|
740 |
$(target).parent().removeClass(".active").addClass(".active"); |
|
|
741 |
|
|
|
742 |
var index = $("div.qtlItem").index($(this).parent()); |
|
|
743 |
var offestFromTop = index * 45 + 68; |
|
|
744 |
$('#sidebar-test').scrollTop(offestFromTop); |
|
|
745 |
} |
|
|
746 |
if ($("div.qtlItem .qtlLink")) { |
|
|
747 |
var qtlLink = $("div.qtlItem .qtlLink"); |
|
|
748 |
for (var i = 0; i < qtlLink.length; i++) { |
|
|
749 |
var item = qtlLink[i]; |
|
|
750 |
item.onclick = clickItem; |
|
|
751 |
} |
|
|
752 |
} |
|
|
753 |
</script> |
|
|
754 |
''' % (index) |
|
|
755 |
doc.asis(customJs) |
|
|
756 |
|
|
|
757 |
res = indent(doc.getvalue(), indentation=" ") |
|
|
758 |
print(res, file=out) |
|
|
759 |
out.close() |
|
|
760 |
|
|
|
761 |
|
|
|
762 |
def generateSingleTraitPage(data, manhattanDir, qqDir, boxplotDir): |
|
|
763 |
for index, row in data.iterrows(): |
|
|
764 |
qtlItem = SingleQtlStatistics(row, index) |
|
|
765 |
for traitName in qtlItem.traitNames: |
|
|
766 |
# getListItem(qtlItem) |
|
|
767 |
out = open(os.path.join(qtlItem.qtlName, traitName + ".html"), "w") |
|
|
768 |
doc, tag, text, line = Doc().ttl() |
|
|
769 |
doc.asis('<!DOCTYPE html>') |
|
|
770 |
with tag('html'): |
|
|
771 |
with tag('head'): |
|
|
772 |
doc.stag('meta', charset='utf-8') |
|
|
773 |
doc.stag('meta', name='viewport', content='width=device-width, initial-scale=1.0, shrink-to-fit=no') |
|
|
774 |
line('title', 'Detailed information in trait ' + traitName) |
|
|
775 |
doc.stag('link', rel='stylesheet', href='../assets/bootstrap/css/bootstrap.min.css') |
|
|
776 |
doc.stag('link', rel='stylesheet', href='../assets/fonts/font-awesome.min.css') |
|
|
777 |
doc.stag('link', rel='stylesheet', href="../assets/css/modas.css") |
|
|
778 |
# doc.stag('link', rel='stylesheet', href="assets/css/styles.css") |
|
|
779 |
with tag("body"): |
|
|
780 |
with tag("div", id="sidebar-test"): |
|
|
781 |
with tag("div", klass="sidebar-header"): |
|
|
782 |
with tag("h2"): |
|
|
783 |
line("a", "MODAS", href="../mainPage.html", klass="modas") |
|
|
784 |
with tag("ul"): |
|
|
785 |
getListItem(data, qtlItem.qtlName, traitName, doc=doc, tag=tag, text=text, line=line) |
|
|
786 |
|
|
|
787 |
manhattanFile = glob.glob(os.path.join(manhattanDir, "Manhattan.*_{}.jpg".format(traitName)))[0] |
|
|
788 |
boxplotFile = glob.glob(os.path.join(boxplotDir, "{}_*.jpg".format(traitName)))[0] |
|
|
789 |
qqplotFile = glob.glob(os.path.join(qqDir, "QQplot.*_{}.jpg".format(traitName)))[0] |
|
|
790 |
with tag("div", klass="content"): |
|
|
791 |
with tag("div", klass="container"): |
|
|
792 |
with tag("div", klass="row"): |
|
|
793 |
with tag("div", klass="col"): |
|
|
794 |
line("h1", "Details in " + traitName, style="text-align: center;") |
|
|
795 |
doc.stag("img", klass="img-fluid", src='../'+'/'.join(manhattanFile.split('/')[-2:])) |
|
|
796 |
with tag("div", klass="row"): |
|
|
797 |
with tag("div", klass="col-6"): |
|
|
798 |
doc.stag("img", klass="img-fluid", src='../'+'/'.join(qqplotFile.split('/')[-2:])) |
|
|
799 |
with tag("div", klass="col-6"): |
|
|
800 |
doc.stag("img", klass="img-fluid", src='../'+'/'.join(boxplotFile.split('/')[-2:])) |
|
|
801 |
|
|
|
802 |
line("script", "", src="../assets/js/jquery.min.js") |
|
|
803 |
line("script", "", src="../assets/bootstrap/js/bootstrap.min.js") |
|
|
804 |
line("script", "", src="../assets/js/modas.js") |
|
|
805 |
|
|
|
806 |
customJs = ''' |
|
|
807 |
<script> |
|
|
808 |
var offestFromTop = %d * 45 + 68; |
|
|
809 |
$('#sidebar-test').scrollTop(offestFromTop); |
|
|
810 |
|
|
|
811 |
function clickItem(event) { |
|
|
812 |
var target = event.currentTarget; |
|
|
813 |
$(target).parent().removeClass(".active").addClass(".active"); |
|
|
814 |
|
|
|
815 |
var index = $("div.qtlItem").index($(this).parent()); |
|
|
816 |
var offestFromTop = index * 45 + 68; |
|
|
817 |
$('#sidebar-test').scrollTop(offestFromTop); |
|
|
818 |
} |
|
|
819 |
if ($("div.qtlItem .qtlLink")) { |
|
|
820 |
var qtlLink = $("div.qtlItem .qtlLink"); |
|
|
821 |
for (var i = 0; i < qtlLink.length; i++) { |
|
|
822 |
var item = qtlLink[i]; |
|
|
823 |
item.onclick = clickItem; |
|
|
824 |
} |
|
|
825 |
} |
|
|
826 |
</script> |
|
|
827 |
''' % (index) |
|
|
828 |
doc.asis(customJs) |
|
|
829 |
|
|
|
830 |
res = indent(doc.getvalue(), indentation=" ") |
|
|
831 |
print(res, file=out) |
|
|
832 |
out.close() |
|
|
833 |
|
|
|
834 |
|
|
|
835 |
def generateHtml(qtl_anno, myFuncFile, out_dir, totalSNPs): |
|
|
836 |
manhattanDir = os.path.abspath(out_dir+'/manhattan_plot') |
|
|
837 |
qqplotDir = os.path.abspath(out_dir+'/qqplot') |
|
|
838 |
boxplotDir = os.path.abspath(out_dir+'/boxplot') |
|
|
839 |
|
|
|
840 |
allQtlStatistics = AllQtlStatistics() |
|
|
841 |
allQtlStatistics.getInit(qtl_anno) |
|
|
842 |
allQtlStatistics.totalSNPs = totalSNPs |
|
|
843 |
allTraitStatistics = AllTraitStatistics() |
|
|
844 |
allTraitStatistics.getInit(qtl_anno) |
|
|
845 |
|
|
|
846 |
geneFuncDict = getGeneFunc(myFuncFile, "\t") |
|
|
847 |
|
|
|
848 |
resolveDir(out_dir) |
|
|
849 |
generateMainPage(qtl_anno, allQtlStatistics, allTraitStatistics) |
|
|
850 |
generateSingleQtlPage(qtl_anno, geneFuncDict) |
|
|
851 |
|
|
|
852 |
generateSingleTraitPage(qtl_anno, manhattanDir, qqplotDir, boxplotDir) |
|
|
853 |
|