[214c6e]: / 00_preparation / 00_jund_downloads.ipynb

Download this file

163 lines (162 with data), 4.0 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Downloads for JunD binding prediction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If necessary, download prerequisites first."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!conda install --yes -c bioconda bedtools samtools\n",
    "#!pip install janggu\n",
    "#!conda install --yes tensorflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from pybedtools import BedTool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "output = '../data'\n",
    "os.makedirs(output, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Downloading the data for JunD prediction\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget https://www.encodeproject.org/files/ENCFF446WOD/@@download/ENCFF446WOD.bed.gz -O {output}/jund_peaks.narrowPeak.gz\n",
    "!wget https://www.encodeproject.org/files/ENCFF546PJU/@@download/ENCFF546PJU.bam -O  {output}/dnase_stam_encode.bam\n",
    "!wget https://www.encodeproject.org/files/ENCFF059BEU/@@download/ENCFF059BEU.bam -O  {output}/dnase_stam_roadmap.bam\n",
    "\n",
    "!wget http://mitra.stanford.edu/kundaje/akundaje/release/blacklists/hg38-human/hg38.blacklist.bed.gz -O  {output}/hg38.blacklisted.bed.gz\n",
    "!gunzip -f  {output}/hg38.blacklisted.bed.gz\n",
    "\n",
    "# human genome sequence hg38\n",
    "!wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz -O  {output}/hg38.fa.gz\n",
    "!gunzip -f  {output}/hg38.fa.gz\n",
    "\n",
    "!wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes -O {output}/hg38.chrom.sizes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Bam files need to be indexed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!samtools index {output}/dnase_stam_encode.bam\n",
    "!samtools index {output}/dnase_stam_roadmap.bam"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Prepare the JunD peaks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BedTool(os.path.join(output, 'jund_peaks.narrowPeak.gz')).sort().merge().saveas(\n",
    "    os.path.join(output, 'jund_raw_peaks.bed'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create region of interest (ROI)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BedTool(os.path.join(output, 'jund_raw_peaks.bed')).slop(b=10000, \n",
    "                                                               g=os.path.join(output, 'hg38.chrom.sizes')) \\\n",
    " .sort().merge().subtract(os.path.join(output, 'hg38.blacklisted.bed'))\\\n",
    ".saveas(os.path.join(output, 'roi_jund_extended.bed'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!janggu-trim {output}/roi_jund_extended.bed {output}/trim_roi_jund_extended.bed -divby 200"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}