163 lines (162 with data), 4.0 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Downloads for JunD binding prediction"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If necessary, download prerequisites first."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#!conda install --yes -c bioconda bedtools samtools\n",
"#!pip install janggu\n",
"#!conda install --yes tensorflow"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pybedtools import BedTool"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"output = '../data'\n",
"os.makedirs(output, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Downloading the data for JunD prediction\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!wget https://www.encodeproject.org/files/ENCFF446WOD/@@download/ENCFF446WOD.bed.gz -O {output}/jund_peaks.narrowPeak.gz\n",
"!wget https://www.encodeproject.org/files/ENCFF546PJU/@@download/ENCFF546PJU.bam -O {output}/dnase_stam_encode.bam\n",
"!wget https://www.encodeproject.org/files/ENCFF059BEU/@@download/ENCFF059BEU.bam -O {output}/dnase_stam_roadmap.bam\n",
"\n",
"!wget http://mitra.stanford.edu/kundaje/akundaje/release/blacklists/hg38-human/hg38.blacklist.bed.gz -O {output}/hg38.blacklisted.bed.gz\n",
"!gunzip -f {output}/hg38.blacklisted.bed.gz\n",
"\n",
"# human genome sequence hg38\n",
"!wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz -O {output}/hg38.fa.gz\n",
"!gunzip -f {output}/hg38.fa.gz\n",
"\n",
"!wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes -O {output}/hg38.chrom.sizes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Bam files need to be indexed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!samtools index {output}/dnase_stam_encode.bam\n",
"!samtools index {output}/dnase_stam_roadmap.bam"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Prepare the JunD peaks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"BedTool(os.path.join(output, 'jund_peaks.narrowPeak.gz')).sort().merge().saveas(\n",
" os.path.join(output, 'jund_raw_peaks.bed'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create region of interest (ROI)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"BedTool(os.path.join(output, 'jund_raw_peaks.bed')).slop(b=10000, \n",
" g=os.path.join(output, 'hg38.chrom.sizes')) \\\n",
" .sort().merge().subtract(os.path.join(output, 'hg38.blacklisted.bed'))\\\n",
".saveas(os.path.join(output, 'roi_jund_extended.bed'))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!janggu-trim {output}/roi_jund_extended.bed {output}/trim_roi_jund_extended.bed -divby 200"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}