import pandas as pd
import numpy as np
import modas.multiprocess as mp
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.rinterface_lib.embedded import RRuntimeError
import rpy2.robjects as robjects
from collections import Counter
import modas.gwas_cmd as gc
import pyranges as pr
from yattag import Doc, indent
import subprocess
import shutil
import warnings
import glob
import os
import re
pandas2ri.activate()
data_table = importr('data.table')
base = importr('base')
robjects.r('options(datatable.showProgress = FALSE)')
warnings.filterwarnings("ignore")
utils_path = subprocess.check_output('locate modas/utils', shell=True, text=True, encoding='utf-8')
#utils_path = '/'.join(re.search('\n(.*site-packages.*)\n', utils_path).group(1).split('/')[:-1])
utils_path = re.search('\n(.*site-packages.*)\n', utils_path).group(1)
if not utils_path.endswith('utils'):
utils_path = '/'.join(utils_path.split('/')[:-1])
# def gwas(phe, geno, num_threads, phe_fn):
# geno_prefix = geno.split('/')[-1]
# related_matrix_cmd = 'gemma.linux -bfile {0}.link -gk 1 -o {1}'.format(geno_prefix,geno_prefix)
# gwas_cmd = 'gemma.linux -bfile {0}.link -k output/{0}.cXX.txt -lmm -n {1} -o {2}'
# fam = pd.read_csv(geno+'.fam', sep=r'\s+', header=None)
# fam[5] = 1
# fam = pd.merge(fam, phe, left_on=0, right_index=True, how='left')
# fam.to_csv(geno_prefix+'.link.fam', sep='\t', na_rep='NA', header=None, index=False)
# if os.path.exists(geno_prefix+'.link.bed'):
# os.remove(geno_prefix+'.link.bed')
# if os.path.exists(geno_prefix+'.link.bim'):
# os.remove(geno_prefix+'.link.bim')
# os.symlink(geno+'.bed', geno_prefix+'.link.bed')
# os.symlink(geno+'.bim', geno_prefix+'.link.bim')
# values = list()
# for _, p in enumerate(phe.columns):
# p = p.replace('/', '.')
# values.append((gwas_cmd.format(*[geno_prefix, _ + 2, '.'.join(phe_fn.split('/')[-1].split('.')[:-1])+ '_' + str(p)]),))
# s = mp.run(related_matrix_cmd)
# if s != 0:
# return None
# else:
# s = mp.parallel(mp.run, values, num_threads)
# os.remove(geno_prefix+'.link.bed')
# os.remove(geno_prefix+'.link.bim')
# os.remove(geno_prefix+'.link.fam')
# return s
def gwas(phe, geno, num_threads, phe_fn, gwas_model):
software, model = gwas_model.split('_')
geno_prefix = geno.split('/')[-1]
phe_fn = '.'.join(phe_fn.split('/')[-1].split('.')[:-1])
if software == 'gemma' and model == 'MLM':
geno_prefix = geno.split('/')[-1]
related_matrix_cmd = utils_path+'/gemma.linux -bfile {0}.link -gk 1 -o {1}'.format(geno_prefix,geno_prefix)
fam = pd.read_csv(geno + '.fam', sep=r'\s+', header=None)
fam[5] = 1
fam = pd.merge(fam, phe, left_on=0, right_index=True, how='left')
fam.to_csv(geno_prefix+'.link.fam', sep='\t', na_rep='NA', header=None, index=False)
if software != 'GAPIT' and gwas_model != 'gemma_LM':
if os.path.exists(geno_prefix+'.link.bed'):
os.remove(geno_prefix+'.link.bed')
if os.path.exists(geno_prefix+'.link.bim'):
os.remove(geno_prefix+'.link.bim')
os.symlink(geno+'.bed', geno_prefix+'.link.bed')
os.symlink(geno+'.bim', geno_prefix+'.link.bim')
if software == 'rMVP':
if os.path.exists(geno_prefix+'.link.fam'):
os.remove(geno_prefix+'.link.fam')
os.symlink(geno + '.fam', geno_prefix + '.link.fam')
fam = pd.read_csv(geno + '.fam', sep=r'\s+', header=None)
phe = phe.reindex(fam[0])
if software == 'gemma' and model == "LM":
if os.path.exists('./gemma_lm_geno'):
shutil.rmtree('./gemma_lm_geno')
os.mkdir('./gemma_lm_geno')
for p in phe.columns:
p = p.replace('m/z', 'm.z')
os.symlink('../' + geno + '.bed', './gemma_lm_geno/' + geno_prefix + '_' + p + '.link.bed')
os.symlink('../' + geno + '.bim', './gemma_lm_geno/' + geno_prefix + '_' + p + '.link.bim')
fam = pd.read_csv(geno + '.fam', sep=r'\s+', header=None)
fam = pd.merge(fam.iloc[:, :5], phe.loc[:, p.replace('m.z', 'm/z')].to_frame(), left_on=0, right_index=True, how='left')
fam.to_csv('./gemma_lm_geno/' + geno_prefix + '_' + p + '.link.fam', sep='\t', na_rep='NA', header=None, index=False)
values = list()
for _, p in enumerate(phe.columns):
p = p.replace('m/z', 'm.z')
if gwas_model == 'gemma_LM':
values.append(((gc.gemma_cmd(model, './gemma_lm_geno/' + geno_prefix + '_' + p + '.link', None, None, '_'.join([phe_fn, gwas_model, p]))),))
if gwas_model == 'gemma_MLM':
values.append(((gc.gemma_cmd(model, geno_prefix + '.link', geno_prefix, _ + 2, '_'.join([phe_fn, gwas_model, p]))),))
if software == 'rMVP':
if not os.path.exists('./output'):
os.mkdir('./output')
omics_phe = phe.loc[:, p.replace('m.z', 'm/z')].to_frame().reset_index()
omics_phe.columns = ['Taxa', '_'.join([phe_fn, gwas_model, p])]
values.append((model, geno_prefix+'.link', geno_prefix+'.link', omics_phe, 1, './output'))
if gwas_model == 'gemma_MLM':
s = mp.run(related_matrix_cmd)
if s != 0:
return None
if software == 'gemma':
s = mp.parallel(mp.run, values, num_threads)
if software == 'rMVP':
s1 = mp.parallel(gc.rmvp, (values[0],), 1)
s = mp.parallel(gc.rmvp, values[1:], num_threads)
s = s1 + s
if software == 'GAPIT':
if not os.path.exists('./output'):
os.mkdir('./output')
phe.columns = ['_'.join([phe_fn, gwas_model, p]) for p in phe.columns]
phe = phe.reset_index()
phe.columns = ['Taxa'] + list(phe.columns[1:])
geno = os.path.abspath(geno)
os.chdir('./output')
s = gc.gapit(model, geno, phe, utils_path)
os.chdir('../')
if gwas_model == 'gemma_MLM' or software == 'rMVP':
os.remove(geno_prefix+'.link.bed')
os.remove(geno_prefix+'.link.bim')
os.remove(geno_prefix+'.link.fam')
if gwas_model == 'gemma_LM':
shutil.rmtree('./gemma_lm_geno')
return s
def gwas_plot(res, p, prefix, t, software):
try:
base.sink('/dev/null')
w = data_table.fread(res, data_table=base.getOption("datatable.fread.datatable", False))
if software == 'gemma':
w_subset = w.loc[w.p_wald <= float(p), :]
m = w_subset[['rs', 'chr', 'ps', 'p_wald']]
q = w[['rs', 'chr', 'ps', 'p_wald']]
if software == 'rMVP':
w_subset = w.loc[w[w.columns[-1]] <= float(p), :]
m = w_subset.iloc[:, [0, 1, 2, -1]]
q = w.iloc[:, [0, 1, 2, -1]]
if software == 'GAPIT':
w_subset = w.loc[w['P.value'] <= float(p), :]
m = w_subset[['SNP', 'Chromosome', 'Position', 'P.value']]
q = w[['SNP', 'Chromosome', 'Position', 'P.value']]
m.columns = ['SNP', 'Chromosome', 'Position', prefix]
q.columns = ['SNP', 'Chromosome', 'Position', prefix]
thresholdi = robjects.FloatVector([1.0 / w.shape[0], 1e-6, 1e-5])
lim = -np.log10(min(m[prefix])) + 2
#w_subset = base.subset(w, np.array(w.rx2('p_wald')) <= float(p))
#m = w_subset.rx(robjects.StrVector(['rs', 'chr', 'ps', 'p_wald']))
#q = w.rx(robjects.StrVector(['rs', 'chr', 'ps', 'p_wald']))
# m.names = ['SNP', 'Chromosome', 'Position', prefix]
# q.names = ['SNP', 'Chromosome', 'Position', prefix]
#thresholdi = robjects.FloatVector([1.0/w.nrow, 1e-6, 1e-5])
#lim = -np.log10(min(np.array(w_subset.rx2('p_wald'))))+2
#base.sink('/dev/null')
robjects.r('source("'+utils_path+'/CMplot.r")')
CMplot = robjects.r['CMplot']
CMplot(m, plot_type='m', col=robjects.StrVector(["grey30", "grey60"]), ylim=robjects.FloatVector([2, lim]), threshold=thresholdi,
cex=robjects.FloatVector([0.5, 0.5, 0.5]), signal_cex=robjects.FloatVector([0.5, 0.5, 0.5]),
threshold_col=robjects.StrVector(['red', 'green', 'blue']), chr_den_col=robjects.rinterface.NULL, amplify=True,
signal_pch = robjects.IntVector([19, 19, 19]), dpi=300,
signal_col=robjects.StrVector(['red', 'green', 'blue']), multracks=False, LOG10=True, file=t)
CMplot(q, plot_type='q', col='grey30', threshold=thresholdi[0],
signal_cex=robjects.FloatVector([0.5, 0.5, 0.5]), signal_pch=robjects.IntVector([19, 19, 19]),
conf_int_col='gray', signal_col='red', multracks=False, LOG10=True, file=t, dpi=300)
base.sink()
except RRuntimeError:
return 0
except ValueError:
return 0
else:
return 1
def gwas_plot_parallel(phe, p, threads, t, phe_fn, gwas_model):
software, model = gwas_model.split('_')
values = list()
for i in phe.columns:
i = str(i).replace('m/z', 'm.z')
if software == 'gemma':
gwas_fn = 'output/' + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.assoc.txt'
if software == 'rMVP':
gwas_fn = 'output/' + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.'.join(['.'+model, 'csv'])
if software == 'GAPIT':
gwas_fn = 'output/' + '.'.join(['GAPIT', model, '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]), 'GWAS', 'Results', 'csv'])
values.append((gwas_fn, p, '.'.join(phe_fn.split('/')[-1].split('.')[:-1]) + '_' + str(i), t, software))
s = mp.parallel(gwas_plot, values, threads)
return s
def boxplot(phe, g, qtl):
robjects.r('''box_plot <- function(d, phe, rs, level){
library(ggplot2)
library(ggsignif)
d <- d[d$haplotype!=1,]
d[d$haplotype==0,'haplotype'] <- level[1]
d[d$haplotype==2,'haplotype'] <- level[2]
d$haplotype <- factor(d$haplotype,levels = level)
b <- as.numeric(formatC(max(d[,1],na.rm=T)*1.2/4,format = 'e',digits = 1))
p <- ggplot(data = d,aes_string(x='haplotype',y=names(d)[1],fill='haplotype'))+
theme_bw()+
theme(legend.title = element_blank(),
legend.background = element_blank(),
legend.key = element_rect(colour = NA, fill = NA),
legend.text = element_text(size = 4),
legend.key.size = unit(3,'mm'),
legend.position = 'none',
plot.title = element_text(hjust=0.5,size = 6),
plot.margin=unit(c(0.3,0.3,0,0),'cm'),
panel.grid = element_blank(),
axis.line = element_line(colour = 'black',size=0.4),
axis.text = element_text(size = 6,color = 'black'),
axis.ticks.length=unit(.1, 'cm'))+
stat_boxplot(geom = 'errorbar', width = 0.2,size=0.1)+
geom_boxplot(lwd=0.2,width=0.5,outlier.size = 0.2)+
geom_signif(comparisons = list(level),map_signif_level = F,
test= t.test, size=0.2 ,textsize=2, y_position = max(d[,1],na.rm=T)*1.1)+
#xlab('')+ylab('')+ggtitle(paste(phe,rs,sep='_'))+
xlab('')+ylab('')+ggtitle('')+
scale_y_continuous(breaks=seq(0,4*b,by=b),labels = function(x) formatC(x, format = 'e',digits = 1), limits = c(0, max(d[,1], na.rm=T)*1.2))+
scale_fill_manual(values=c('#E3FFE2', 'forest green'))
ggsave(paste(phe,'_',rs,'_','boxplot','.jpg',sep=''),plot=p,device='jpg',width=3.5,height=4.3,units = 'cm')
}''')
robjects.r['options'](warn=-1)
base.sink('/dev/null')
box_plot = robjects.r['box_plot']
g = g.where(g.snp.isin(qtl.SNP), drop=True)
allele = pd.DataFrame([g.a0, g.a1], index=['a0', 'a1'], columns=g.snp.values)
g = pd.DataFrame(g.values, index=g.sample, columns=g.snp.values)
ril = g.index.intersection(phe.index)
g = g.reindex(ril)
phe = phe.reindex(ril)
for index, row in qtl.iterrows():
if row['phe_name'] not in phe.columns:
continue
d = pd.concat([phe[row['phe_name']], g[row['SNP']]], axis=1)
d.columns = ['trait.' + d.columns[0].replace('-', '.'), 'haplotype']
level = robjects.StrVector([allele[row['SNP']]['a1'].values*2, allele[row['SNP']]['a0'].values*2])
box_plot(d, row['phe_name'], row['SNP'], level)
base.sink()
def multi_trait_plot(phe, gwas_dir, qtl, phe_fn, prefix, t, gwas_model):
software, model = gwas_model.split('_')
bk = pd.DataFrame()
for i in phe.columns:
i = i.replace('/', '.')
# fn = gwas_dir + '/' + '.'.join(phe_fn.split('/')[-1].split('.')[:-1]) + '_' + str(i)+'.assoc.txt'
if software == 'gemma':
gwas_fn = gwas_dir + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.assoc.txt'
d = pd.read_csv(gwas_fn, sep='\t')
d = d[['rs', 'chr', 'ps', 'p_wald']]
if software == 'rMVP':
gwas_fn = gwas_dir + '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]) + '.'.join(['.' + model, 'csv'])
d = pd.read_csv(gwas_fn)
d = d.iloc[:, [0, 1, 2, -1]]
d.columns = ['rs', 'chr', 'ps', 'p_wald']
if software == 'GAPIT':
gwas_fn = gwas_dir + '.'.join(['GAPIT', model, '_'.join(['.'.join(phe_fn.split('/')[-1].split('.')[:-1]), gwas_model, str(i)]), 'GWAS', 'Results', 'csv'])
d = pd.read_csv(gwas_fn)
d = d[['SNP', 'Chromosome', 'Position ', 'P.value']]
d.columns = ['rs', 'chr', 'ps', 'p_wald']
# d = pd.read_csv(gwas_fn, sep='\t')
if bk.empty:
bk = d.copy()
bk.loc[bk.p_wald <= 1e-5, 'p_wald'] = 1e-5
for index, row in qtl.loc[qtl.phe_name == i, :].iterrows():
peak_pos = int(row['SNP'].split('_')[-1])
chrom = row['CHR']
sig_tmp = pd.concat([bk.loc[(bk.chr.astype(str) == str(chrom)) & (bk.ps >= peak_pos-1000000) & (bk.ps <= peak_pos+1000000), 'p_wald'],
d.loc[(bk.chr.astype(str) == str(chrom)) & (bk.ps >= peak_pos-1000000) & (bk.ps <= peak_pos+1000000), 'p_wald']], axis=1)
sig_tmp.columns = ['bk', 'phe']
bk.loc[(bk.chr.astype(str) == str(chrom)) & (bk.ps >= peak_pos-1000000) & (bk.ps <= peak_pos+1000000), 'p_wald'] = sig_tmp.apply(
lambda x: x['bk'] if x['bk'] < x['phe'] else x['phe'], axis=1)
bk = bk.loc[bk.p_wald <= 1e-2, :]
bk.loc[bk.p_wald <= 1e-20, 'p_wald'] = 1e-20
bk = bk[['rs', 'chr', 'ps', 'p_wald']]
thresholdi = robjects.FloatVector([1.0 / d.shape[0], 1e-6, 1e-5])
lim = -np.log10(min(bk['p_wald'])) + 2
bk.columns = ['SNP', 'Chromosome', 'Position', prefix]
base.sink('/dev/null')
robjects.r('source("'+utils_path+'/CMplot.r")')
#robjects.r('source("/home/debian/文档/MGWAP/compound_extract/plot/CMplot.r")')
CMplot = robjects.r['CMplot']
CMplot(bk, plot_type='m', col=robjects.StrVector(["grey30", "grey60"]), ylim=robjects.FloatVector([2, lim]),
threshold=thresholdi,
cex=robjects.FloatVector([0.5, 0.5, 0.5]), signal_cex=robjects.FloatVector([0.5, 0.5, 0.5]),
threshold_col=robjects.StrVector(['red', 'green', 'blue']), chr_den_col=robjects.rinterface.NULL,
amplify=True,
signal_pch=robjects.IntVector([19, 19, 19]), dpi=300,
signal_col=robjects.StrVector(['red', 'green', 'blue']), multracks=False, LOG10=True, file=t)
base.sink()
def qtl_anno(qtl, anno):
anno = anno[['geneid', 'position']]
anno.loc[:, 'chr'] = anno['position'].apply(lambda x: x.split(':')[0])
anno.loc[:, 'start'] = anno['position'].apply(lambda x: x.split(':')[1].split('-')[0])
anno.loc[:, 'end'] = anno['position'].apply(lambda x: x.split(':')[1].split('-')[1])
anno = anno[['chr', 'start', 'end', 'geneid']]
anno.columns = ['Chromosome', 'Start', 'End', 'geneid']
qtl_range = qtl[['CHR', 'qtl_start', 'qtl_end', 'phe_name']]
qtl_range.columns = ['Chromosome', 'Start', 'End', 'phe_name']
qtl_range = pr.PyRanges(qtl_range)
anno = pr.PyRanges(anno)
qtl_anno_intersect = qtl_range.join(anno, how='left')
qtl_anno = pd.DataFrame()
for k in sorted(qtl_anno_intersect.dfs.keys()):
qtl_anno = pd.concat([qtl_anno, qtl_anno_intersect.dfs[k]])
qtl_anno = qtl_anno[['Chromosome', 'Start', 'End', 'phe_name', 'geneid']]
qtl_anno.loc[:, 'Chromosome'] = qtl_anno.Chromosome.astype(str)
qtl_anno.loc[:, 'Start'] = qtl_anno.Start.astype(int)
qtl_anno.loc[:, 'End'] = qtl_anno.End.astype(int)
qtl.loc[:, 'CHR'] = qtl.CHR.astype(str)
qtl = pd.merge(qtl, qtl_anno, left_on=['CHR', 'qtl_start', 'qtl_end', 'phe_name'],
right_on=['Chromosome', 'Start', 'End', 'phe_name'])
qtl = qtl.drop(['Chromosome', 'Start', 'End'], axis=1)
qtl = qtl.groupby(['CHR', 'qtl_start', 'qtl_end', 'SNP', 'P', 'SP2_num', 'qtl_length', 'phe_name'])['geneid'].apply(';'.join).reset_index()
qtl.columns = ['CHR', 'qtl_start', 'qtl_end', 'SNP', 'P', 'SP2_num', 'qtl_length', 'phe_name', 'qtl_all_gene']
qtl.loc[qtl.qtl_all_gene == '-1', 'qtl_all_gene'] = 'nan'
return qtl
'''
Generate html report for SingleTrait
Author: CrazyHsu @ crazyhsu9527@gmail.com
Created on: 2020-08-26 20:33:52
Last modified: 2021-03-24 17:20:33
'''
################# Classes #################
class AllQtlStatistics():
def __init__(self):
self.totalSNPs = 0
self.qtlDetected = 0
self.medianQtlLen = 0
self.longestQtlLen = 0
self.shortestQtlLen = 0
self.totalAnnoGenes = 0
# self.totalTargetGenes = 0
# self.totalEnrichTargetGenes = 0
# self.aveEnrichGenes = 0
def getQtlLengthInfo(self, qtlLenSeries):
self.medianQtlLen = np.median(qtlLenSeries)
self.longestQtlLen = np.max(qtlLenSeries)
self.shortestQtlLen = np.min(qtlLenSeries)
def getTotalAnnoGenes(self, qtlAllGenesSeries):
self.totalAnnoGenes = len(self.mergeSeries2List(qtlAllGenesSeries))
# def getTotalTargetGenes(self, qtlAllMetaGenesSeries):
# self.totalTargetGenes = len(self.mergeSeries2List(qtlAllMetaGenesSeries))
# def getEnrichTargetGenes(self, myDataFrame):
# filteredSeries = myDataFrame.loc[myDataFrame.qtl_p_value <= 0.05, "qtl_meta_gene"]
# self.totalEnrichTargetGenes = len(self.mergeSeries2List(filteredSeries))
def getInit(self, tableData):
self.qtlDetected = len(tableData)
self.getQtlLengthInfo(tableData.qtl_length)
self.getTotalAnnoGenes(tableData.qtl_all_gene)
# self.getTotalTargetGenes(tableData.qtl_meta_gene)
# self.getEnrichTargetGenes(tableData)
def mergeSeries2List(self, mySeries, sep=";"):
return set().union(*mySeries.apply(lambda x: str(x).split(sep)).to_list())
class AllTraitStatistics():
def __init__(self):
self.totalTraits = 0
self.filteredTraits = 0
self.clusteredTraits = 0
self.unclusteredTraits = 0
def getFilteredTraits(self, traitSeries):
self.filteredTraits = len(self.mergeSeries2List(traitSeries, sep=","))
self.totalTraits = len(self.mergeSeries2List(traitSeries, sep=","))
def getClusteredTraits(self, labelSeries):
counter = Counter(labelSeries.to_list())
clustered = [i for i in counter if counter[i] != 1]
unclustered = [i for i in counter if counter[i] == 1]
self.clusteredTraits = len(clustered)
self.unclusteredTraits = len(unclustered)
# def getUnclusterTraits(self, labelSeries):
# self.clusteredTraits = len(set(labelSeries.to_list()))
def getInit(self, tableData):
self.getFilteredTraits(tableData.phe_name)
self.getClusteredTraits(tableData.phe_name)
# self.getUnclusterTraits(tableData.phe_name)
def mergeSeries2List(self, mySeries, sep=";"):
return set().union(*mySeries.apply(lambda x: str(x).split(sep)).to_list())
class SingleQtlStatistics():
def __init__(self, myRow, rowIndex):
self.qtlName = "QTL{}_chr{}-{}-{}".format(rowIndex, myRow.CHR, myRow.qtl_start, myRow.qtl_end)
self.qtlPosition = "{}:{}-{}".format(myRow.CHR, myRow.qtl_start, myRow.qtl_end)
self.peakSNP = "{}, {}".format(myRow.SNP, myRow.SNP)
self.traitNames = myRow.phe_name.split(",")
self.totalGenesInQtl = str(myRow.qtl_all_gene).split(";")
# self.targetGenesInQtl = str(myRow.qtl_meta_gene).split(";")
# self.enrichTargetGenes = 0
self.pvalue = myRow.P
################# Functions ##################
def resolveDir(dirName):
if not os.path.exists(dirName):
os.makedirs(dirName)
os.chdir(dirName)
def getGeneFunc(myGeneFuncFile, sep=","):
gene2func = pd.read_csv(myGeneFuncFile, sep=sep)
tmpDict = gene2func.to_dict("index")
geneFuncDict = {}
for i in tmpDict:
geneFuncDict[tmpDict[i]["geneid"]] = tmpDict[i]
return geneFuncDict
def getAllQtlSummary(allQtlStatistics, doc=None, tag=None, text=None, line=None):
with tag("div", id="qtlSummary"):
line("h1", "Summary of QTLs detected by local GWAS", style="text-align: center;")
multi_trait_File = "./Manhattan.multi_trait.jpg"
with tag("div", style="text-align: center;margin-top: 50px;"):
doc.stag("img", klass="img-fluid", src=multi_trait_File, alt="multi_trait")
with tag("div", style="margin-top: 100px;"):
with tag("div"):
line("h2", "QTL summarization criteria")
line("p", "The cutoffs in generating and filtering QTLs")
with tag("div"):
line("h2", "QTL statistics table")
with tag("div", klass="table-responsive"):
with tag("table", klass="table"):
with tag("thead"):
with tag("tr", klass="table-success"):
line("th", "Categories", style="width: 50%")
line("th", "Statistics value", style="width: 50%")
with tag("tbody"):
with tag("tr"):
line("td", "Genotype file")
line("td", "XXXXXX")
with tag("tr"):
line("td", "Number of SNPs for local GWAS")
line("td", str(allQtlStatistics.totalSNPs))
with tag("tr"):
line("td", "Number of detected QTLs")
line("td", str(allQtlStatistics.qtlDetected))
with tag("tr"):
line("td", "Median QTL length")
line("td", str(allQtlStatistics.medianQtlLen))
with tag("tr"):
line("td", "Longest QTL length")
line("td", str(allQtlStatistics.longestQtlLen))
with tag("tr"):
line("td", "Shortest QTL length")
line("td", str(allQtlStatistics.shortestQtlLen))
with tag("tr"):
line("td", "Total genes in the QTLs")
line("td", str(allQtlStatistics.totalAnnoGenes))
# with tag("tr"):
# line("td", "Total target genes in the QTLs")
# line("td", str(allQtlStatistics.totalTargetGenes))
# with tag("tr"):
# line("td", "Total enrichment of target genes")
# line("td", str(allQtlStatistics.totalEnrichTargetGenes))
# with tag("tr"):
# line("td", "Average enrichment of target genes")
# line("td", str(allQtlStatistics.aveEnrichGenes))
def getAllTraitSummary(allTraitStatistics, doc=None, tag=None, text=None, line=None):
# doc, tag, text, line = Doc().ttl()
with tag("div", id="traitSummary"):
line("h1", "Summary of omics traits detected by local GWAS", style="text-align: center;")
# heatmapFile = "/home/xufeng/xufeng/Projects/MODAS/xufeng1/assets/img/heatmap.png"
# with tag("div", style="text-align: center;margin-top: 50px;"):
# doc.stag("img", klass="img-fluid", src=heatmapFile, alt="heatmap")
with tag("div", style="margin-top: 100px;"):
with tag("div"):
line("h2", "Omics trait filtration criteria")
line("p", "Parameters and pipelines in filtering traits")
with tag("div"):
line("h2", "Trait statistics table")
with tag("div", klass="table-responsive"):
with tag("table", klass="table"):
with tag("thead"):
with tag("tr", klass="table-success"):
line("th", "Categories", style="width: 50%")
line("th", "Statistics value", style="width: 50%")
with tag("tbody"):
with tag("tr"):
line("td", "Omics trait type")
line("td", "Metabolome")
with tag("tr"):
line("td", "Total number of traits")
line("td", str(allTraitStatistics.totalTraits))
with tag("tr"):
line("td", "Filtered number of traits")
line("td", str(allTraitStatistics.filteredTraits))
with tag("tr"):
line("td", "Number of clustered traits")
line("td", str(allTraitStatistics.clusteredTraits))
# with tag("tr"):
# line("td", "Number of modules of clustered traits")
# line("td", "Cell 2")
with tag("tr"):
line("td", "Number of unclustered traits")
line("td", str(allTraitStatistics.unclusteredTraits))
# return doc.getvalue()
def getSingleQtlInfo(singleQtl, index, geneFuncDict, doc=None, tag=None, text=None, line=None):
with tag("div", klass="container"):
with tag("div", klass="row"):
with tag("div", klass="col"):
line("h1", "Summary in " + singleQtl.qtlName, style="text-align: center;")
with tag("div", style="margin-top: 100px;"):
with tag("div"):
line("h2", "QTL summarization criteria for whole -genome GWAS")
line("p", "Parameters and pipeline for filtering traits")
with tag("div"):
line("h2", "QTL statistics table")
with tag("div", klass="table-responsive"):
with tag("table", klass="table"):
with tag("thead"):
with tag("tr", klass="table-success"):
line("th", "QTL", style="width: 50%")
line("th", "Statistics", style="width: 50%")
with tag("tbody"):
with tag("tr"):
line("td", "QTL position")
line("td", str(singleQtl.qtlPosition))
with tag("tr"):
line("td", "Peak SNP ID and position")
line("td", str(singleQtl.peakSNP))
with tag("tr"):
line("td", "Number of total genes in the QTL")
line("td", str(len(singleQtl.totalGenesInQtl)))
# with tag("tr"):
# line("td", "Number of target genes in the QTL")
# line("td", str(len(singleQtl.targetGenesInQtl)))
# with tag("tr"):
# line("td", "Enrichment of target genes")
# line("td", str(singleQtl.enrichTargetGenes))
with tag("tr"):
line("td", "Enrichment significance vs background")
line("td", str(singleQtl.pvalue))
# with tag("div"):
# line("h2", "List of target genes in the QTL")
# with tag("div", klass="table-responsive"):
# with tag("table", klass="table"):
# with tag("thead"):
# with tag("tr", klass="table-success"):
# line("th", "Gene ID", style="width: 25%")
# line("th", "Alias ID", style="width: 25%")
# line("th", "Position", style="width: 25%")
# line("th", "Function", style="width: 25%")
# with tag("tbody"):
# for qtl in singleQtl.targetGenesInQtl:
# if qtl == "nan":
# continue
# with tag("tr"):
# line("td", str(geneFuncDict[qtl]["geneId"].strip()))
# line("td", str(geneFuncDict[qtl]["aliasId"].strip()))
# line("td", str(geneFuncDict[qtl]["position"].strip()))
# line("td", str(geneFuncDict[qtl]["function"].strip()))
with tag("div"):
line("h2", "List of total genes in the QTL")
with tag("div", klass="table-responsive"):
with tag("table", klass="table"):
with tag("thead"):
with tag("tr", klass="table-success"):
line("th", "Gene ID", style="width: 25%")
line("th", "Alias ID", style="width: 25%")
line("th", "Position", style="width: 25%")
line("th", "Function", style="width: 25%")
with tag("tbody"):
for qtl in singleQtl.totalGenesInQtl:
if qtl == "nan":
continue
with tag("tr"):
line("td", str(geneFuncDict[qtl]["geneid"].strip()))
line("td", str(geneFuncDict[qtl]["aliasid"].strip()))
line("td", str(geneFuncDict[qtl]["position"].strip()))
line("td", str(geneFuncDict[qtl]["function"].strip()))
# def getSingleTrait(traitName, doc=None, tag=None, text=None, line=None):
# with tag("div", klass="container"):
# with tag("div", klass="row"):
# with tag("div", klass="col"):
# line("h1", "Details in " + traitName, style="text-align: center;")
# doc.stag("img", klass="img-fluid", src="../../assets/img/manhattan.jpg")
# with tag("div", klass="row"):
# with tag("div", klass="col-6"):
# doc.stag("img", klass="img-fluid", src="../../assets/img/qqplot.png")
# with tag("div", klass="col-6"):
# doc.stag("img", klass="img-fluid", src="../../assets/img/boxplot.png")
def getListItem(data, qtlName=None, traitName=None, doc=None, tag=None, text=None, line=None, mainPage=False):
for index, row in data.iterrows():
qtlItem = SingleQtlStatistics(row, index)
if qtlName and qtlName == qtlItem.qtlName:
expand = "true"
faPlusOrMinus = "fa-minus"
myClass = "list-unstyled collapse nav nav-pills show"
active = " active"
else:
expand = "false"
faPlusOrMinus = "fa-plus"
myClass = "list-unstyled collapse nav nav-pills"
active = ""
if mainPage:
relativeDir = os.path.join("", qtlItem.qtlName)
else:
if qtlName == qtlItem.qtlName:
relativeDir = ""
else:
relativeDir = os.path.join("../", qtlItem.qtlName)
with tag("li"):
with tag("div", klass="qtlItem" + active):
with tag("a", ("href", os.path.join(relativeDir, qtlItem.qtlName + ".html")), klass="qtlLink"):
text(qtlItem.qtlName)
with tag("a", ("href", "#" + qtlItem.qtlName), ("data-toggle", "collapse"), ("aria-expanded", expand)):
line("i", "", klass="fa " + faPlusOrMinus)
with tag("ul", ("class", myClass), ("id", qtlItem.qtlName), ("aria-expanded", expand)):
for i in qtlItem.traitNames:
with tag("li"):
href = os.path.join(relativeDir, i + ".html")
if traitName and traitName == i:
with tag("a", ("href", href), ("class", "active"), ("aria-selected", "true")):
line("i", "", klass="fa fa-link")
text(" " + i)
else:
with tag("a", ("href", href), ("aria-selected", "false")):
line("i", "", klass="fa fa-link")
text(" " + i)
def generateMainPage(data, allQtlStatistics, allTraitStatistics):
doc, tag, text, line = Doc().ttl()
doc.asis('<!DOCTYPE html>')
with tag('html'):
with tag('head'):
doc.stag('meta', charset='utf-8')
doc.stag('meta', name='viewport', content='width=device-width, initial-scale=1.0, shrink-to-fit=no')
line('title', 'MODAS main page')
doc.stag('link', rel='stylesheet', href='assets/bootstrap/css/bootstrap.min.css')
doc.stag('link', rel='stylesheet', href='assets/fonts/font-awesome.min.css')
doc.stag('link', rel='stylesheet', href="assets/css/modas.css")
# doc.stag('link', rel='stylesheet', href="assets/css/styles.css")
with tag("body"):
with tag("div", id="sidebar-test"):
with tag("div", klass="sidebar-header"):
with tag("h2"):
line("a", "MODAS", href="mainPage.html", klass="modas")
with tag("ul"):
getListItem(data, doc=doc, tag=tag, text=text, line=line, mainPage=True)
with tag("div", klass="content"):
with tag("div", klass="container"):
with tag("div", klass="row"):
with tag("div", klass="col"):
getAllQtlSummary(allQtlStatistics, doc=doc, tag=tag, text=text, line=line)
doc.stag("hr", style="margin-bottom: 50px;margin-top: 50px;")
getAllTraitSummary(allTraitStatistics, doc=doc, tag=tag, text=text, line=line)
line("script", "", src="assets/js/jquery.min.js")
line("script", "", src="assets/bootstrap/js/bootstrap.min.js")
line("script", "", src="assets/js/modas.js")
mainPageOut = open("mainPage.html", "w")
res = indent(doc.getvalue(), indentation=" ")
print(res, file=mainPageOut)
mainPageOut.close()
def generateSingleQtlPage(data, geneFuncDict):
for index, row in data.iterrows():
qtlItem = SingleQtlStatistics(row, index)
if not os.path.exists(qtlItem.qtlName):
os.makedirs(qtlItem.qtlName)
out = open(os.path.join(qtlItem.qtlName, qtlItem.qtlName + ".html"), "w")
doc, tag, text, line = Doc().ttl()
doc.asis('<!DOCTYPE html>')
with tag('html'):
with tag('head'):
doc.stag('meta', charset='utf-8')
doc.stag('meta', name='viewport', content='width=device-width, initial-scale=1.0, shrink-to-fit=no')
line('title', 'Summary information in QTL ' + qtlItem.qtlName)
doc.stag('link', rel='stylesheet', href='../assets/bootstrap/css/bootstrap.min.css')
doc.stag('link', rel='stylesheet', href='../assets/fonts/font-awesome.min.css')
doc.stag('link', rel='stylesheet', href="../assets/css/modas.css")
# doc.stag('link', rel='stylesheet', href="assets/css/styles.css")
with tag("body"):
with tag("div", id="sidebar-test"):
with tag("div", klass="sidebar-header"):
with tag("h2"):
line("a", "MODAS", href="../mainPage.html", klass="modas")
with tag("ul"):
getListItem(data, qtlName=qtlItem.qtlName, doc=doc, tag=tag, text=text, line=line)
with tag("div", klass="content"):
getSingleQtlInfo(qtlItem, index, geneFuncDict, doc=doc, tag=tag, text=text, line=line)
line("script", "", src="../assets/js/jquery.min.js")
line("script", "", src="../assets/bootstrap/js/bootstrap.min.js")
line("script", "", src="../assets/js/modas.js")
customJs = '''
<script>
var offestFromTop = %d * 45 + 68;
$('#sidebar-test').scrollTop(offestFromTop);
function clickItem(event) {
var target = event.currentTarget;
$(target).parent().removeClass(".active").addClass(".active");
var index = $("div.qtlItem").index($(this).parent());
var offestFromTop = index * 45 + 68;
$('#sidebar-test').scrollTop(offestFromTop);
}
if ($("div.qtlItem .qtlLink")) {
var qtlLink = $("div.qtlItem .qtlLink");
for (var i = 0; i < qtlLink.length; i++) {
var item = qtlLink[i];
item.onclick = clickItem;
}
}
</script>
''' % (index)
doc.asis(customJs)
res = indent(doc.getvalue(), indentation=" ")
print(res, file=out)
out.close()
def generateSingleTraitPage(data, manhattanDir, qqDir, boxplotDir):
for index, row in data.iterrows():
qtlItem = SingleQtlStatistics(row, index)
for traitName in qtlItem.traitNames:
# getListItem(qtlItem)
out = open(os.path.join(qtlItem.qtlName, traitName + ".html"), "w")
doc, tag, text, line = Doc().ttl()
doc.asis('<!DOCTYPE html>')
with tag('html'):
with tag('head'):
doc.stag('meta', charset='utf-8')
doc.stag('meta', name='viewport', content='width=device-width, initial-scale=1.0, shrink-to-fit=no')
line('title', 'Detailed information in trait ' + traitName)
doc.stag('link', rel='stylesheet', href='../assets/bootstrap/css/bootstrap.min.css')
doc.stag('link', rel='stylesheet', href='../assets/fonts/font-awesome.min.css')
doc.stag('link', rel='stylesheet', href="../assets/css/modas.css")
# doc.stag('link', rel='stylesheet', href="assets/css/styles.css")
with tag("body"):
with tag("div", id="sidebar-test"):
with tag("div", klass="sidebar-header"):
with tag("h2"):
line("a", "MODAS", href="../mainPage.html", klass="modas")
with tag("ul"):
getListItem(data, qtlItem.qtlName, traitName, doc=doc, tag=tag, text=text, line=line)
manhattanFile = glob.glob(os.path.join(manhattanDir, "Manhattan.*_{}.jpg".format(traitName)))[0]
boxplotFile = glob.glob(os.path.join(boxplotDir, "{}_*.jpg".format(traitName)))[0]
qqplotFile = glob.glob(os.path.join(qqDir, "QQplot.*_{}.jpg".format(traitName)))[0]
with tag("div", klass="content"):
with tag("div", klass="container"):
with tag("div", klass="row"):
with tag("div", klass="col"):
line("h1", "Details in " + traitName, style="text-align: center;")
doc.stag("img", klass="img-fluid", src='../'+'/'.join(manhattanFile.split('/')[-2:]))
with tag("div", klass="row"):
with tag("div", klass="col-6"):
doc.stag("img", klass="img-fluid", src='../'+'/'.join(qqplotFile.split('/')[-2:]))
with tag("div", klass="col-6"):
doc.stag("img", klass="img-fluid", src='../'+'/'.join(boxplotFile.split('/')[-2:]))
line("script", "", src="../assets/js/jquery.min.js")
line("script", "", src="../assets/bootstrap/js/bootstrap.min.js")
line("script", "", src="../assets/js/modas.js")
customJs = '''
<script>
var offestFromTop = %d * 45 + 68;
$('#sidebar-test').scrollTop(offestFromTop);
function clickItem(event) {
var target = event.currentTarget;
$(target).parent().removeClass(".active").addClass(".active");
var index = $("div.qtlItem").index($(this).parent());
var offestFromTop = index * 45 + 68;
$('#sidebar-test').scrollTop(offestFromTop);
}
if ($("div.qtlItem .qtlLink")) {
var qtlLink = $("div.qtlItem .qtlLink");
for (var i = 0; i < qtlLink.length; i++) {
var item = qtlLink[i];
item.onclick = clickItem;
}
}
</script>
''' % (index)
doc.asis(customJs)
res = indent(doc.getvalue(), indentation=" ")
print(res, file=out)
out.close()
def generateHtml(qtl_anno, myFuncFile, out_dir, totalSNPs):
manhattanDir = os.path.abspath(out_dir+'/manhattan_plot')
qqplotDir = os.path.abspath(out_dir+'/qqplot')
boxplotDir = os.path.abspath(out_dir+'/boxplot')
allQtlStatistics = AllQtlStatistics()
allQtlStatistics.getInit(qtl_anno)
allQtlStatistics.totalSNPs = totalSNPs
allTraitStatistics = AllTraitStatistics()
allTraitStatistics.getInit(qtl_anno)
geneFuncDict = getGeneFunc(myFuncFile, "\t")
resolveDir(out_dir)
generateMainPage(qtl_anno, allQtlStatistics, allTraitStatistics)
generateSingleQtlPage(qtl_anno, geneFuncDict)
generateSingleTraitPage(qtl_anno, manhattanDir, qqplotDir, boxplotDir)