--- a +++ b/notebooks/mapping_zebrafish.ipynb @@ -0,0 +1,2857 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:40:16.457 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n", + "2022-03-24 14:40:16.458 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n", + "2022-03-24 14:40:16.459 | INFO | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n" + ] + } + ], + "source": [ + "sys.path.append('..')\n", + "\n", + "from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n", + "from pyMultiOmics.constants import GENES, PROTEINS, COMPOUNDS, DANIO_RERIO, REACTIONS, PATHWAYS\n", + "from pyMultiOmics.mapping import Mapper\n", + "from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Demonstration of pyMultiOmics mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the processed Zebrafish data from [1]\n", + "\n", + "[1] [Rabinowitz, Jeremy S., et al. \"Transcriptomic, proteomic, and metabolomic landscape of positional memory in the caudal fin of zebrafish.\" Proceedings of the National Academy of Sciences 114.5 (2017): E717-E726.](https://www.pnas.org/content/114/5/E717.short)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:40:25.643 | INFO | pyMultiOmics.common:download_file:59 - Downloading zebrafish_data.zip\n", + "1.75kKB [00:00, 22.0kKB/s] \n", + "2022-03-24 14:40:25.771 | INFO | pyMultiOmics.common:extract_zip_file:71 - Extracting zebrafish_data.zip\n", + "100%|██████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 349.96it/s]\n", + "2022-03-24 14:40:25.823 | INFO | pyMultiOmics.common:extract_zip_file:77 - Deleting zebrafish_data.zip\n" + ] + } + ], + "source": [ + "url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/zebrafish_data.zip'\n", + "out_file = download_file(url)\n", + "extract_zip_file(out_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/joewandy/Work/git/pyMultiOmics/notebooks/zebrafish_data'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DATA_FOLDER = os.path.abspath(os.path.join('zebrafish_data'))\n", + "DATA_FOLDER" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read transcriptomics data (identified by their gene ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "gene_data = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_data_combined.csv'), index_col='Identifier')\n", + "gene_design = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>US-1584693</th>\n", + " <th>US-1584700</th>\n", + " <th>US-1584706</th>\n", + " <th>US-1584712</th>\n", + " <th>US-1584722</th>\n", + " <th>US-1584724</th>\n", + " <th>US-1584725</th>\n", + " <th>US-1584732</th>\n", + " <th>US-1584738</th>\n", + " <th>US-1584744</th>\n", + " <th>...</th>\n", + " <th>US-1584753</th>\n", + " <th>US-1584754</th>\n", + " <th>US-1584758</th>\n", + " <th>US-1584765</th>\n", + " <th>FC_distal_vs_proximal</th>\n", + " <th>padj_distal_vs_proximal</th>\n", + " <th>FC_distal_vs_middle</th>\n", + " <th>padj_distal_vs_middle</th>\n", + " <th>FC_middle_vs_proximal</th>\n", + " <th>padj_middle_vs_proximal</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Identifier</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ENSDARG00000000001</th>\n", + " <td>51</td>\n", + " <td>40</td>\n", + " <td>69</td>\n", + " <td>78</td>\n", + " <td>89</td>\n", + " <td>47</td>\n", + " <td>88</td>\n", + " <td>86</td>\n", + " <td>110</td>\n", + " <td>55</td>\n", + " <td>...</td>\n", + " <td>58</td>\n", + " <td>104</td>\n", + " <td>43</td>\n", + " <td>72</td>\n", + " <td>0.869331</td>\n", + " <td>0.000008</td>\n", + " <td>0.748943</td>\n", + " <td>4.380000e-05</td>\n", + " <td>0.114026</td>\n", + " <td>0.630834</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000000002</th>\n", + " <td>283</td>\n", + " <td>129</td>\n", + " <td>164</td>\n", + " <td>269</td>\n", + " <td>211</td>\n", + " <td>171</td>\n", + " <td>146</td>\n", + " <td>256</td>\n", + " <td>283</td>\n", + " <td>150</td>\n", + " <td>...</td>\n", + " <td>142</td>\n", + " <td>272</td>\n", + " <td>260</td>\n", + " <td>256</td>\n", + " <td>0.287823</td>\n", + " <td>0.031298</td>\n", + " <td>1.005337</td>\n", + " <td>1.310000e-13</td>\n", + " <td>-0.724987</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000000018</th>\n", + " <td>545</td>\n", + " <td>503</td>\n", + " <td>547</td>\n", + " <td>387</td>\n", + " <td>332</td>\n", + " <td>559</td>\n", + " <td>623</td>\n", + " <td>499</td>\n", + " <td>436</td>\n", + " <td>488</td>\n", + " <td>...</td>\n", + " <td>462</td>\n", + " <td>287</td>\n", + " <td>495</td>\n", + " <td>299</td>\n", + " <td>-0.437271</td>\n", + " <td>0.000389</td>\n", + " <td>-0.404770</td>\n", + " <td>6.868400e-04</td>\n", + " <td>-0.040193</td>\n", + " <td>0.760679</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000000019</th>\n", + " <td>437</td>\n", + " <td>469</td>\n", + " <td>538</td>\n", + " <td>557</td>\n", + " <td>550</td>\n", + " <td>404</td>\n", + " <td>544</td>\n", + " <td>443</td>\n", + " <td>623</td>\n", + " <td>502</td>\n", + " <td>...</td>\n", + " <td>470</td>\n", + " <td>460</td>\n", + " <td>329</td>\n", + " <td>480</td>\n", + " <td>0.521291</td>\n", + " <td>0.000015</td>\n", + " <td>0.271082</td>\n", + " <td>1.936266e-02</td>\n", + " <td>0.242435</td>\n", + " <td>0.041606</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000000068</th>\n", + " <td>266</td>\n", + " <td>249</td>\n", + " <td>247</td>\n", + " <td>236</td>\n", + " <td>195</td>\n", + " <td>247</td>\n", + " <td>283</td>\n", + " <td>259</td>\n", + " <td>299</td>\n", + " <td>232</td>\n", + " <td>...</td>\n", + " <td>231</td>\n", + " <td>236</td>\n", + " <td>274</td>\n", + " <td>241</td>\n", + " <td>0.064820</td>\n", + " <td>0.595522</td>\n", + " <td>0.142243</td>\n", + " <td>2.579239e-01</td>\n", + " <td>-0.084764</td>\n", + " <td>0.528336</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 21 columns</p>\n", + "</div>" + ], + "text/plain": [ + " US-1584693 US-1584700 US-1584706 US-1584712 \\\n", + "Identifier \n", + "ENSDARG00000000001 51 40 69 78 \n", + "ENSDARG00000000002 283 129 164 269 \n", + "ENSDARG00000000018 545 503 547 387 \n", + "ENSDARG00000000019 437 469 538 557 \n", + "ENSDARG00000000068 266 249 247 236 \n", + "\n", + " US-1584722 US-1584724 US-1584725 US-1584732 \\\n", + "Identifier \n", + "ENSDARG00000000001 89 47 88 86 \n", + "ENSDARG00000000002 211 171 146 256 \n", + "ENSDARG00000000018 332 559 623 499 \n", + "ENSDARG00000000019 550 404 544 443 \n", + "ENSDARG00000000068 195 247 283 259 \n", + "\n", + " US-1584738 US-1584744 ... US-1584753 US-1584754 \\\n", + "Identifier ... \n", + "ENSDARG00000000001 110 55 ... 58 104 \n", + "ENSDARG00000000002 283 150 ... 142 272 \n", + "ENSDARG00000000018 436 488 ... 462 287 \n", + "ENSDARG00000000019 623 502 ... 470 460 \n", + "ENSDARG00000000068 299 232 ... 231 236 \n", + "\n", + " US-1584758 US-1584765 FC_distal_vs_proximal \\\n", + "Identifier \n", + "ENSDARG00000000001 43 72 0.869331 \n", + "ENSDARG00000000002 260 256 0.287823 \n", + "ENSDARG00000000018 495 299 -0.437271 \n", + "ENSDARG00000000019 329 480 0.521291 \n", + "ENSDARG00000000068 274 241 0.064820 \n", + "\n", + " padj_distal_vs_proximal FC_distal_vs_middle \\\n", + "Identifier \n", + "ENSDARG00000000001 0.000008 0.748943 \n", + "ENSDARG00000000002 0.031298 1.005337 \n", + "ENSDARG00000000018 0.000389 -0.404770 \n", + "ENSDARG00000000019 0.000015 0.271082 \n", + "ENSDARG00000000068 0.595522 0.142243 \n", + "\n", + " padj_distal_vs_middle FC_middle_vs_proximal \\\n", + "Identifier \n", + "ENSDARG00000000001 4.380000e-05 0.114026 \n", + "ENSDARG00000000002 1.310000e-13 -0.724987 \n", + "ENSDARG00000000018 6.868400e-04 -0.040193 \n", + "ENSDARG00000000019 1.936266e-02 0.242435 \n", + "ENSDARG00000000068 2.579239e-01 -0.084764 \n", + "\n", + " padj_middle_vs_proximal \n", + "Identifier \n", + "ENSDARG00000000001 0.630834 \n", + "ENSDARG00000000002 0.000001 \n", + "ENSDARG00000000018 0.760679 \n", + "ENSDARG00000000019 0.041606 \n", + "ENSDARG00000000068 0.528336 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>group</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sample</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>US-1584752</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584732</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584724</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584693</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584758</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584725</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584706</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584700</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584744</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584753</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584738</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584722</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584712</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584754</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US-1584765</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " group\n", + "sample \n", + "US-1584752 Proximal\n", + "US-1584732 Proximal\n", + "US-1584724 Proximal\n", + "US-1584693 Proximal\n", + "US-1584758 Proximal\n", + "US-1584725 Middle\n", + "US-1584706 Middle\n", + "US-1584700 Middle\n", + "US-1584744 Middle\n", + "US-1584753 Middle\n", + "US-1584738 Distal\n", + "US-1584722 Distal\n", + "US-1584712 Distal\n", + "US-1584754 Distal\n", + "US-1584765 Distal" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene_design" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read proteomics data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "protein_data = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Uniprot')\n", + "protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Distal#3_01</th>\n", + " <th>Distal#3_02</th>\n", + " <th>Distal#3_03</th>\n", + " <th>Distal#3_04</th>\n", + " <th>Middle#3_01</th>\n", + " <th>Middle#3_02</th>\n", + " <th>Middle#3_03</th>\n", + " <th>Middle#3_04</th>\n", + " <th>Proximal#3_01</th>\n", + " <th>Proximal#3_02</th>\n", + " <th>Proximal#3_03</th>\n", + " <th>Proximal#3_04</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Uniprot</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>A0A0A0MPL4</th>\n", + " <td>9.955264e+05</td>\n", + " <td>4.946580e+06</td>\n", + " <td>1.377194e+06</td>\n", + " <td>2.208140e+06</td>\n", + " <td>2.907807e+06</td>\n", + " <td>4.231976e+06</td>\n", + " <td>1.560849e+06</td>\n", + " <td>2.852904e+06</td>\n", + " <td>1781795.086</td>\n", + " <td>2.668135e+06</td>\n", + " <td>3.079148e+06</td>\n", + " <td>2.840473e+06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0A0B4J1A5</th>\n", + " <td>2.982519e+06</td>\n", + " <td>8.816655e+06</td>\n", + " <td>7.668431e+06</td>\n", + " <td>4.632309e+06</td>\n", + " <td>7.672153e+06</td>\n", + " <td>7.776017e+06</td>\n", + " <td>6.633781e+06</td>\n", + " <td>8.242783e+06</td>\n", + " <td>5475654.544</td>\n", + " <td>5.703832e+06</td>\n", + " <td>8.294364e+06</td>\n", + " <td>1.334874e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0A0B4J1A7</th>\n", + " <td>1.553049e+07</td>\n", + " <td>1.037155e+06</td>\n", + " <td>1.856137e+07</td>\n", + " <td>1.767859e+07</td>\n", + " <td>1.375736e+07</td>\n", + " <td>1.747998e+07</td>\n", + " <td>1.517507e+07</td>\n", + " <td>2.394465e+07</td>\n", + " <td>3157387.719</td>\n", + " <td>1.794775e+07</td>\n", + " <td>2.300430e+07</td>\n", + " <td>2.063800e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0AUQ3</th>\n", + " <td>2.012699e+06</td>\n", + " <td>3.088982e+06</td>\n", + " <td>2.455865e+06</td>\n", + " <td>9.448331e+05</td>\n", + " <td>2.866780e+06</td>\n", + " <td>2.661669e+06</td>\n", + " <td>2.100352e+06</td>\n", + " <td>2.133662e+06</td>\n", + " <td>1738244.989</td>\n", + " <td>2.629396e+06</td>\n", + " <td>2.900560e+06</td>\n", + " <td>2.416018e+06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0AUR9</th>\n", + " <td>3.640487e+06</td>\n", + " <td>2.588477e+07</td>\n", + " <td>3.415989e+07</td>\n", + " <td>2.868569e+06</td>\n", + " <td>1.971142e+06</td>\n", + " <td>2.472776e+06</td>\n", + " <td>5.615177e+06</td>\n", + " <td>1.303356e+06</td>\n", + " <td>3263299.566</td>\n", + " <td>6.866769e+06</td>\n", + " <td>2.465929e+06</td>\n", + " <td>4.515643e+06</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Distal#3_01 Distal#3_02 Distal#3_03 Distal#3_04 \\\n", + "Uniprot \n", + "A0A0A0MPL4 9.955264e+05 4.946580e+06 1.377194e+06 2.208140e+06 \n", + "A0A0B4J1A5 2.982519e+06 8.816655e+06 7.668431e+06 4.632309e+06 \n", + "A0A0B4J1A7 1.553049e+07 1.037155e+06 1.856137e+07 1.767859e+07 \n", + "A0AUQ3 2.012699e+06 3.088982e+06 2.455865e+06 9.448331e+05 \n", + "A0AUR9 3.640487e+06 2.588477e+07 3.415989e+07 2.868569e+06 \n", + "\n", + " Middle#3_01 Middle#3_02 Middle#3_03 Middle#3_04 \\\n", + "Uniprot \n", + "A0A0A0MPL4 2.907807e+06 4.231976e+06 1.560849e+06 2.852904e+06 \n", + "A0A0B4J1A5 7.672153e+06 7.776017e+06 6.633781e+06 8.242783e+06 \n", + "A0A0B4J1A7 1.375736e+07 1.747998e+07 1.517507e+07 2.394465e+07 \n", + "A0AUQ3 2.866780e+06 2.661669e+06 2.100352e+06 2.133662e+06 \n", + "A0AUR9 1.971142e+06 2.472776e+06 5.615177e+06 1.303356e+06 \n", + "\n", + " Proximal#3_01 Proximal#3_02 Proximal#3_03 Proximal#3_04 \n", + "Uniprot \n", + "A0A0A0MPL4 1781795.086 2.668135e+06 3.079148e+06 2.840473e+06 \n", + "A0A0B4J1A5 5475654.544 5.703832e+06 8.294364e+06 1.334874e+07 \n", + "A0A0B4J1A7 3157387.719 1.794775e+07 2.300430e+07 2.063800e+07 \n", + "A0AUQ3 1738244.989 2.629396e+06 2.900560e+06 2.416018e+06 \n", + "A0AUR9 3263299.566 6.866769e+06 2.465929e+06 4.515643e+06 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>group</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sample</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Distal#3_01</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Distal#3_02</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Distal#3_03</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Distal#3_04</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Middle#3_01</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Middle#3_02</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Middle#3_03</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Middle#3_04</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Proximal#3_01</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Proximal#3_02</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Proximal#3_03</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Proximal#3_04</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " group\n", + "sample \n", + "Distal#3_01 Distal\n", + "Distal#3_02 Distal\n", + "Distal#3_03 Distal\n", + "Distal#3_04 Distal\n", + "Middle#3_01 Middle\n", + "Middle#3_02 Middle\n", + "Middle#3_03 Middle\n", + "Middle#3_04 Middle\n", + "Proximal#3_01 Proximal\n", + "Proximal#3_02 Proximal\n", + "Proximal#3_03 Proximal\n", + "Proximal#3_04 Proximal" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_design" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read metabolomics data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "compound_data = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data_kegg.csv'), index_col='Identifier')\n", + "compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>distal_M1</th>\n", + " <th>distal_M2</th>\n", + " <th>distal_M3</th>\n", + " <th>distal_F1</th>\n", + " <th>distal_F2</th>\n", + " <th>distal_F3</th>\n", + " <th>middle_M1</th>\n", + " <th>middle_M2</th>\n", + " <th>middle_M3</th>\n", + " <th>middle_F1</th>\n", + " <th>middle_F2</th>\n", + " <th>middle_F3</th>\n", + " <th>proximal_M1</th>\n", + " <th>proximal_M2</th>\n", + " <th>proximal_M3</th>\n", + " <th>proximal_F1</th>\n", + " <th>proximal_F2</th>\n", + " <th>proximal_F3</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Identifier</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>C00565</th>\n", + " <td>75170.0</td>\n", + " <td>57052</td>\n", + " <td>39170.0</td>\n", + " <td>84057</td>\n", + " <td>38608.0</td>\n", + " <td>64126.0</td>\n", + " <td>50214.0</td>\n", + " <td>75680</td>\n", + " <td>165178</td>\n", + " <td>121856</td>\n", + " <td>77061</td>\n", + " <td>98015.0</td>\n", + " <td>113765</td>\n", + " <td>96098</td>\n", + " <td>84198</td>\n", + " <td>117644</td>\n", + " <td>169459</td>\n", + " <td>169669</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C00037</th>\n", + " <td>64511.0</td>\n", + " <td>33658</td>\n", + " <td>23565.0</td>\n", + " <td>52102</td>\n", + " <td>49508.0</td>\n", + " <td>37498.0</td>\n", + " <td>30417.0</td>\n", + " <td>55728</td>\n", + " <td>88519</td>\n", + " <td>103871</td>\n", + " <td>45974</td>\n", + " <td>73101.0</td>\n", + " <td>72725</td>\n", + " <td>66008</td>\n", + " <td>54220</td>\n", + " <td>95341</td>\n", + " <td>110192</td>\n", + " <td>291598</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C01104</th>\n", + " <td>5787534.0</td>\n", + " <td>4351239</td>\n", + " <td>4401036.0</td>\n", + " <td>8187282</td>\n", + " <td>8431125.0</td>\n", + " <td>5082056.0</td>\n", + " <td>5138937.0</td>\n", + " <td>7341351</td>\n", + " <td>7837293</td>\n", + " <td>9256269</td>\n", + " <td>9934066</td>\n", + " <td>10243285.0</td>\n", + " <td>7344406</td>\n", + " <td>5524811</td>\n", + " <td>4809250</td>\n", + " <td>9279874</td>\n", + " <td>9047339</td>\n", + " <td>9211255</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C00134</th>\n", + " <td>3430897.0</td>\n", + " <td>1877785</td>\n", + " <td>1225710.0</td>\n", + " <td>2326620</td>\n", + " <td>2421267.0</td>\n", + " <td>2595529.0</td>\n", + " <td>2003627.0</td>\n", + " <td>2120053</td>\n", + " <td>2269318</td>\n", + " <td>3220850</td>\n", + " <td>4596854</td>\n", + " <td>3155377.0</td>\n", + " <td>3760854</td>\n", + " <td>2658833</td>\n", + " <td>2488025</td>\n", + " <td>2506550</td>\n", + " <td>4000703</td>\n", + " <td>3292566</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C00213</th>\n", + " <td>112845.0</td>\n", + " <td>129977</td>\n", + " <td>122292.0</td>\n", + " <td>63219</td>\n", + " <td>50113.0</td>\n", + " <td>100343.0</td>\n", + " <td>156651.0</td>\n", + " <td>176682</td>\n", + " <td>379322</td>\n", + " <td>160906</td>\n", + " <td>56802</td>\n", + " <td>107161.0</td>\n", + " <td>235982</td>\n", + " <td>181200</td>\n", + " <td>142994</td>\n", + " <td>116132</td>\n", + " <td>94589</td>\n", + " <td>167280</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " distal_M1 distal_M2 distal_M3 distal_F1 distal_F2 distal_F3 \\\n", + "Identifier \n", + "C00565 75170.0 57052 39170.0 84057 38608.0 64126.0 \n", + "C00037 64511.0 33658 23565.0 52102 49508.0 37498.0 \n", + "C01104 5787534.0 4351239 4401036.0 8187282 8431125.0 5082056.0 \n", + "C00134 3430897.0 1877785 1225710.0 2326620 2421267.0 2595529.0 \n", + "C00213 112845.0 129977 122292.0 63219 50113.0 100343.0 \n", + "\n", + " middle_M1 middle_M2 middle_M3 middle_F1 middle_F2 middle_F3 \\\n", + "Identifier \n", + "C00565 50214.0 75680 165178 121856 77061 98015.0 \n", + "C00037 30417.0 55728 88519 103871 45974 73101.0 \n", + "C01104 5138937.0 7341351 7837293 9256269 9934066 10243285.0 \n", + "C00134 2003627.0 2120053 2269318 3220850 4596854 3155377.0 \n", + "C00213 156651.0 176682 379322 160906 56802 107161.0 \n", + "\n", + " proximal_M1 proximal_M2 proximal_M3 proximal_F1 proximal_F2 \\\n", + "Identifier \n", + "C00565 113765 96098 84198 117644 169459 \n", + "C00037 72725 66008 54220 95341 110192 \n", + "C01104 7344406 5524811 4809250 9279874 9047339 \n", + "C00134 3760854 2658833 2488025 2506550 4000703 \n", + "C00213 235982 181200 142994 116132 94589 \n", + "\n", + " proximal_F3 \n", + "Identifier \n", + "C00565 169669 \n", + "C00037 291598 \n", + "C01104 9211255 \n", + "C00134 3292566 \n", + "C00213 167280 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>group</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sample</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>distal_M1</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>distal_M2</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>distal_M3</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>distal_F1</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>distal_F2</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>distal_F3</th>\n", + " <td>Distal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>middle_M1</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>middle_M2</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>middle_M3</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>middle_F1</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>middle_F2</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>middle_F3</th>\n", + " <td>Middle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>proximal_M1</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>proximal_M2</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>proximal_M3</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>proximal_F1</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>proximal_F2</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " <tr>\n", + " <th>proximal_F3</th>\n", + " <td>Proximal</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " group\n", + "sample \n", + "distal_M1 Distal\n", + "distal_M2 Distal\n", + "distal_M3 Distal\n", + "distal_F1 Distal\n", + "distal_F2 Distal\n", + "distal_F3 Distal\n", + "middle_M1 Middle\n", + "middle_M2 Middle\n", + "middle_M3 Middle\n", + "middle_F1 Middle\n", + "middle_F2 Middle\n", + "middle_F3 Middle\n", + "proximal_M1 Proximal\n", + "proximal_M2 Proximal\n", + "proximal_M3 Proximal\n", + "proximal_F1 Proximal\n", + "proximal_F2 Proximal\n", + "proximal_F3 Proximal" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_design" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set_log_level_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create omics data" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "genes data with (31953, 15) measurements" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transcript_data = SingleOmicsData(GENES, gene_data, gene_design)\n", + "transcript_data" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "proteins data with (3061, 12) measurements" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_data = SingleOmicsData(PROTEINS, protein_data, protein_design)\n", + "protein_data" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "compounds data with (130, 18) measurements" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_data = SingleOmicsData(COMPOUNDS, compound_data, compound_design)\n", + "compound_data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "publication = 'Rabinowitz, Jeremy S., et al. \"Transcriptomic, proteomic, and metabolomic landscape of positional memory in the caudal fin of zebrafish.\" Proceedings of the National Academy of Sciences 114.5 (2017): E717-E726.'\n", + "url = 'https://www.pnas.org/content/114/5/E717.short'" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Multi-omics data container\n", + "- publication: Rabinowitz, Jeremy S., et al. \"Transcriptomic, proteomic, and metabolomic landscape of positional memory in the caudal fin of zebrafish.\" Proceedings of the National Academy of Sciences 114.5 (2017): E717-E726.\n", + "- URL: https://www.pnas.org/content/114/5/E717.short\n", + "- Views: 3 modalities\n", + "\t - genes data with (31953, 15) measurements\n", + "\t - proteins data with (3061, 12) measurements\n", + "\t - compounds data with (130, 18) measurements" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mo = MultiOmicsData(publication=publication, url=url)\n", + "mo.add_data([transcript_data, protein_data, compound_data])\n", + "mo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a mapping object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The mapping object uses Reactome to map the different biological entities in the data:\n", + "- Transcripts (or genes) are connected to the proteins they encode\n", + "- Proteins and compounds are connected to reactions they're involved in\n", + "- Reactions are connected to pathways" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:40:26.569 | INFO | pyMultiOmics.functions:remove_dupes:385 - Removing 2 rows with duplicate identifiers\n", + "2022-03-24 14:40:26.570 | INFO | pyMultiOmics.functions:reactome_mapping:78 - There are 128 observed compound ids\n", + "2022-03-24 14:40:26.571 | INFO | pyMultiOmics.functions:reactome_mapping:81 - Mapping genes -> proteins\n", + "2022-03-24 14:40:34.971 | INFO | pyMultiOmics.functions:reactome_mapping:86 - Mapping proteins -> reactions\n", + "2022-03-24 14:40:46.041 | INFO | pyMultiOmics.functions:reactome_mapping:94 - Mapping compounds -> reactions\n", + "2022-03-24 14:40:49.516 | INFO | pyMultiOmics.functions:reactome_mapping:100 - Mapping reactions -> pathways\n", + "2022-03-24 14:40:50.558 | INFO | pyMultiOmics.functions:reactome_mapping:111 - Mapping reactions -> proteins\n", + "2022-03-24 14:40:57.492 | INFO | pyMultiOmics.functions:reactome_mapping:118 - Mapping reactions -> compounds\n", + "2022-03-24 14:41:01.232 | INFO | pyMultiOmics.functions:reactome_mapping:130 - Mapping proteins -> genes\n", + "2022-03-24 14:41:18.641 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: genes\n", + "2022-03-24 14:41:19.903 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: proteins\n", + "2022-03-24 14:41:20.231 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: compounds\n", + "2022-03-24 14:41:20.289 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: reactions\n", + "2022-03-24 14:41:20.463 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: pathways\n", + "2022-03-24 14:41:20.470 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: gene_proteins\n", + "2022-03-24 14:41:20.647 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: protein_reactions\n", + "2022-03-24 14:41:21.793 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: compound_reactions\n", + "2022-03-24 14:41:22.134 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: reaction_pathways\n", + "2022-03-24 14:41:22.366 | INFO | pyMultiOmics.mapping:build:51 - Created a multi-omics network with 21264 nodes and 22682 edges\n", + "2022-03-24 14:41:22.385 | INFO | pyMultiOmics.mapping:build:53 - node_counts = {'genes': 9371, 'proteins': 8221, 'compounds': 1794, 'reactions': 1629, 'pathways': 249}\n" + ] + }, + { + "data": { + "text/plain": [ + "<pyMultiOmics.mapping.Mapper at 0x174b0a440>" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m = Mapper(mo, DANIO_RERIO, metabolic_pathway_only=True)\n", + "m.build()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<pyMultiOmics.mapping.Mapper at 0x174b0a440>" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Querying mapping object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below shows some example queries we can perform with the mapping object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find reactions that are connected to some observed genes, proteins and compounds in the data" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>reaction_id</th>\n", + " <th>reaction_name</th>\n", + " <th>num_genes</th>\n", + " <th>num_proteins</th>\n", + " <th>num_compounds</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>R-DRE-109278</td>\n", + " <td>Nt5e:zn2+ hydrolyses amp,damp,gmp, imp</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>R-DRE-109291</td>\n", + " <td>Cmp or tmp or ump + h2o => cytidine, thymidine...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>R-DRE-109415</td>\n", + " <td>Amp + h2o => adenosine + orthophosphate [nt5c1b]</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>R-DRE-109624</td>\n", + " <td>(2-deoxy)adenosine + atp => (d)amp + adp (adk)</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>R-DRE-1237160</td>\n", + " <td>Mta is cleaved and phosphorylated</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>R-DRE-1247910</td>\n", + " <td>Cndp2:2mn2+ dimer hydrolyses cysgly</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>R-DRE-139970</td>\n", + " <td>Fmo3:fad n-oxidises tma to tmao</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>R-DRE-1482976</td>\n", + " <td>Cdp-dag is converted to pi by cdipt</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>R-DRE-1614583</td>\n", + " <td>Pxlp-k212-cth cleaves l-cystathionine</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>R-DRE-174401</td>\n", + " <td>Ahcy:nad+ tetramer hydrolyses adohcy</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>R-DRE-1855154</td>\n", + " <td>I1p is dephosphorylated to ins by impa1/2 in t...</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>R-DRE-1855210</td>\n", + " <td>I3p is dephosphorylated to ins by impa1/2 in t...</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>R-DRE-1855211</td>\n", + " <td>I4p is dephosphorylated to ins by impa1/2 in t...</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>R-DRE-188467</td>\n", + " <td>Gly-3-p+fad->dhap+fadh2 (catalyzed by mitochon...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>R-DRE-197250</td>\n", + " <td>Nampt transfers prib to nam to form namn</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>R-DRE-198813</td>\n", + " <td>Gsto dimers reduce deha to asch-</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>R-DRE-200318</td>\n", + " <td>Creatine + atp => phosphocreatine + adp [ckb,ckm]</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>R-DRE-200326</td>\n", + " <td>Creatine + atp => phosphocreatine + adp [ck oc...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>R-DRE-202127</td>\n", + " <td>Enos synthesizes no</td>\n", + " <td>4</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>R-DRE-2162066</td>\n", + " <td>Carbovir + imp => carbovir monophosphate + ino...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>R-DRE-2993447</td>\n", + " <td>Hlcs biotinylates 6x(pcca:pccb)</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>R-DRE-2993799</td>\n", + " <td>Hlcs biotinylates 6xmccc1:6xmccc2</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>R-DRE-3301943</td>\n", + " <td>Gstk1 dimer transfers gs from gsh to cdnb</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>R-DRE-350604</td>\n", + " <td>Agmatine + h2o <=> putrescine + urea</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>R-DRE-351215</td>\n", + " <td>Putrescine + dc-adenosyl methionine => spermi...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>R-DRE-372519</td>\n", + " <td>Accho is hydrolyzed to cho and acetate by ache</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>R-DRE-372819</td>\n", + " <td>Oxaloacetate + gtp => phosphoenolpyruvate + gd...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>R-DRE-416530</td>\n", + " <td>Ffar1:ffar1 ligands activates gq</td>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>R-DRE-5652172</td>\n", + " <td>Akr1b1 reduces glc to d-sorbitol</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>R-DRE-5693373</td>\n", + " <td>Ddah1,2 hydrolyses adma to dma and l-cit</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>R-DRE-5693724</td>\n", + " <td>Esd dimer hydrolyses s-fgsh to gsh</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>R-DRE-6783221</td>\n", + " <td>Hagh hydrolyses (r)-s-lgsh to gsh and lact</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>R-DRE-6783880</td>\n", + " <td>Pipox oxidises ppca to p6c</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>R-DRE-6784393</td>\n", + " <td>Pxlp-k279-got2 dimer transaminates 4-oh-l-glut...</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>R-DRE-6797653</td>\n", + " <td>Dmgdh:fad oxidatively demethylates dmgly to sarc</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>R-DRE-6797955</td>\n", + " <td>Aldh7a1 oxidises betald to bet</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>R-DRE-6798317</td>\n", + " <td>Gnmt tetramer transfers methyl group from adom...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>R-DRE-6807826</td>\n", + " <td>Ldhal6b reduces pyr to lact</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>R-DRE-70342</td>\n", + " <td>Aldob tetramer cleaves fru-1-p to ga and dhap</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>R-DRE-70349</td>\n", + " <td>Dak dimer phosphorylates d-glyceraldehyde to f...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>R-DRE-70510</td>\n", + " <td>Ldh tetramer oxidises lact to pyr</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>R-DRE-70573</td>\n", + " <td>Argininosuccinate <=> fumarate + arginine</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>R-DRE-70596</td>\n", + " <td>Got2 dimer deaminates l-asp</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>R-DRE-70613</td>\n", + " <td>Oxaloacetate + glutamate <=> aspartate + alpha...</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>R-DRE-70979</td>\n", + " <td>(s)-malate + nad+ <=> oxaloacetate + nadh + h+</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>R-DRE-71163</td>\n", + " <td>P-hydroxyphenylpyruvate + o2 => homogentisate ...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>R-DRE-71260</td>\n", + " <td>Aldh9a1 tetramer dehydrogenates teabl to form ...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>R-DRE-71783</td>\n", + " <td>Oxaloacetate + nadh + h+ <=> (s)-malate + nad+</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>R-DRE-71849</td>\n", + " <td>Ldh tetramer reduces pyr to lact</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>R-DRE-74241</td>\n", + " <td>Ada catalyzes the deamination of (deoxy)adenosine</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50</th>\n", + " <td>R-DRE-74248</td>\n", + " <td>(d)gmp or (d)imp + h2o => (2-deoxy)guanosine o...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>51</th>\n", + " <td>R-DRE-74372</td>\n", + " <td>Uracil + (deoxy)ribose 1-phosphate <=> (deoxy)...</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>52</th>\n", + " <td>R-DRE-74376</td>\n", + " <td>(deoxy)uridine + orthophosphate <=> uracil + (...</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>53</th>\n", + " <td>R-DRE-75848</td>\n", + " <td>Acly tetramer transforms cit to ac-coa</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>R-DRE-75889</td>\n", + " <td>Dhap is converted to g3p by gpd1/gpd1l</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " reaction_id reaction_name \\\n", + "0 R-DRE-109278 Nt5e:zn2+ hydrolyses amp,damp,gmp, imp \n", + "1 R-DRE-109291 Cmp or tmp or ump + h2o => cytidine, thymidine... \n", + "2 R-DRE-109415 Amp + h2o => adenosine + orthophosphate [nt5c1b] \n", + "3 R-DRE-109624 (2-deoxy)adenosine + atp => (d)amp + adp (adk) \n", + "4 R-DRE-1237160 Mta is cleaved and phosphorylated \n", + "5 R-DRE-1247910 Cndp2:2mn2+ dimer hydrolyses cysgly \n", + "6 R-DRE-139970 Fmo3:fad n-oxidises tma to tmao \n", + "7 R-DRE-1482976 Cdp-dag is converted to pi by cdipt \n", + "8 R-DRE-1614583 Pxlp-k212-cth cleaves l-cystathionine \n", + "9 R-DRE-174401 Ahcy:nad+ tetramer hydrolyses adohcy \n", + "10 R-DRE-1855154 I1p is dephosphorylated to ins by impa1/2 in t... \n", + "11 R-DRE-1855210 I3p is dephosphorylated to ins by impa1/2 in t... \n", + "12 R-DRE-1855211 I4p is dephosphorylated to ins by impa1/2 in t... \n", + "13 R-DRE-188467 Gly-3-p+fad->dhap+fadh2 (catalyzed by mitochon... \n", + "14 R-DRE-197250 Nampt transfers prib to nam to form namn \n", + "15 R-DRE-198813 Gsto dimers reduce deha to asch- \n", + "16 R-DRE-200318 Creatine + atp => phosphocreatine + adp [ckb,ckm] \n", + "17 R-DRE-200326 Creatine + atp => phosphocreatine + adp [ck oc... \n", + "18 R-DRE-202127 Enos synthesizes no \n", + "19 R-DRE-2162066 Carbovir + imp => carbovir monophosphate + ino... \n", + "20 R-DRE-2993447 Hlcs biotinylates 6x(pcca:pccb) \n", + "21 R-DRE-2993799 Hlcs biotinylates 6xmccc1:6xmccc2 \n", + "22 R-DRE-3301943 Gstk1 dimer transfers gs from gsh to cdnb \n", + "23 R-DRE-350604 Agmatine + h2o <=> putrescine + urea \n", + "24 R-DRE-351215 Putrescine + dc-adenosyl methionine => spermi... \n", + "25 R-DRE-372519 Accho is hydrolyzed to cho and acetate by ache \n", + "26 R-DRE-372819 Oxaloacetate + gtp => phosphoenolpyruvate + gd... \n", + "27 R-DRE-416530 Ffar1:ffar1 ligands activates gq \n", + "28 R-DRE-5652172 Akr1b1 reduces glc to d-sorbitol \n", + "29 R-DRE-5693373 Ddah1,2 hydrolyses adma to dma and l-cit \n", + "30 R-DRE-5693724 Esd dimer hydrolyses s-fgsh to gsh \n", + "31 R-DRE-6783221 Hagh hydrolyses (r)-s-lgsh to gsh and lact \n", + "32 R-DRE-6783880 Pipox oxidises ppca to p6c \n", + "33 R-DRE-6784393 Pxlp-k279-got2 dimer transaminates 4-oh-l-glut... \n", + "34 R-DRE-6797653 Dmgdh:fad oxidatively demethylates dmgly to sarc \n", + "35 R-DRE-6797955 Aldh7a1 oxidises betald to bet \n", + "36 R-DRE-6798317 Gnmt tetramer transfers methyl group from adom... \n", + "37 R-DRE-6807826 Ldhal6b reduces pyr to lact \n", + "38 R-DRE-70342 Aldob tetramer cleaves fru-1-p to ga and dhap \n", + "39 R-DRE-70349 Dak dimer phosphorylates d-glyceraldehyde to f... \n", + "40 R-DRE-70510 Ldh tetramer oxidises lact to pyr \n", + "41 R-DRE-70573 Argininosuccinate <=> fumarate + arginine \n", + "42 R-DRE-70596 Got2 dimer deaminates l-asp \n", + "43 R-DRE-70613 Oxaloacetate + glutamate <=> aspartate + alpha... \n", + "44 R-DRE-70979 (s)-malate + nad+ <=> oxaloacetate + nadh + h+ \n", + "45 R-DRE-71163 P-hydroxyphenylpyruvate + o2 => homogentisate ... \n", + "46 R-DRE-71260 Aldh9a1 tetramer dehydrogenates teabl to form ... \n", + "47 R-DRE-71783 Oxaloacetate + nadh + h+ <=> (s)-malate + nad+ \n", + "48 R-DRE-71849 Ldh tetramer reduces pyr to lact \n", + "49 R-DRE-74241 Ada catalyzes the deamination of (deoxy)adenosine \n", + "50 R-DRE-74248 (d)gmp or (d)imp + h2o => (2-deoxy)guanosine o... \n", + "51 R-DRE-74372 Uracil + (deoxy)ribose 1-phosphate <=> (deoxy)... \n", + "52 R-DRE-74376 (deoxy)uridine + orthophosphate <=> uracil + (... \n", + "53 R-DRE-75848 Acly tetramer transforms cit to ac-coa \n", + "54 R-DRE-75889 Dhap is converted to g3p by gpd1/gpd1l \n", + "\n", + " num_genes num_proteins num_compounds \n", + "0 1 1 3 \n", + "1 1 1 2 \n", + "2 1 1 1 \n", + "3 1 1 1 \n", + "4 1 1 1 \n", + "5 1 1 1 \n", + "6 3 1 2 \n", + "7 1 1 1 \n", + "8 1 1 1 \n", + "9 1 1 1 \n", + "10 2 1 1 \n", + "11 2 1 1 \n", + "12 2 1 1 \n", + "13 1 1 1 \n", + "14 1 1 1 \n", + "15 2 2 1 \n", + "16 2 2 1 \n", + "17 1 1 1 \n", + "18 4 2 1 \n", + "19 1 1 1 \n", + "20 3 1 1 \n", + "21 3 1 1 \n", + "22 1 1 1 \n", + "23 1 1 1 \n", + "24 1 1 2 \n", + "25 2 2 2 \n", + "26 1 1 1 \n", + "27 5 1 1 \n", + "28 2 1 1 \n", + "29 1 1 1 \n", + "30 1 1 1 \n", + "31 1 1 1 \n", + "32 1 1 1 \n", + "33 2 2 1 \n", + "34 1 1 2 \n", + "35 1 1 1 \n", + "36 1 1 1 \n", + "37 1 1 1 \n", + "38 1 1 1 \n", + "39 1 1 1 \n", + "40 2 2 1 \n", + "41 1 1 1 \n", + "42 2 2 1 \n", + "43 2 2 1 \n", + "44 1 1 1 \n", + "45 1 1 1 \n", + "46 1 1 1 \n", + "47 1 1 1 \n", + "48 2 2 1 \n", + "49 1 1 2 \n", + "50 1 1 2 \n", + "51 2 2 2 \n", + "52 2 2 2 \n", + "53 3 1 1 \n", + "54 2 1 1 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reactions = m.get_nodes(types=REACTIONS)\n", + "\n", + "data = []\n", + "for reaction_id, reaction_data in reactions:\n", + " reaction_name = reaction_data['display_name']\n", + " genes = m.get_connected(reaction_id, dest_type=GENES, observed=True)\n", + " proteins = m.get_connected(reaction_id, dest_type=PROTEINS, observed=True)\n", + " compounds = m.get_connected(reaction_id, dest_type=COMPOUNDS, observed=True)\n", + " \n", + " if len(genes) > 0 and len(proteins) > 0 and len(compounds) > 0:\n", + " row = [reaction_id, reaction_name, len(genes), len(proteins), len(compounds)]\n", + " data.append(row)\n", + "\n", + "df = pd.DataFrame(data, columns=['reaction_id', 'reaction_name', 'num_genes', 'num_proteins', 'num_compounds'])\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find everything connected to protein 'F1QAA7'" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ENSDARG00000037781</th>\n", + " <td>Acss2</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>456215</th>\n", + " <td>Adenosine 5-monophosphate(2-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33019</th>\n", + " <td>Diphosphate(3-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57288</th>\n", + " <td>Acetyl-coa(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57287</th>\n", + " <td>Coenzyme a(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30616</th>\n", + " <td>Atp(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15366</th>\n", + " <td>Acetic acid</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>R-DRE-71735</th>\n", + " <td>Acetate + coa + atp => acetyl-coa + amp + pyro...</td>\n", + " <td>reactions</td>\n", + " <td>None</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>R-DRE-71384</th>\n", + " <td>Ethanol oxidation</td>\n", + " <td>pathways</td>\n", + " <td>None</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name \\\n", + "entity_id \n", + "ENSDARG00000037781 Acss2 \n", + "456215 Adenosine 5-monophosphate(2-) \n", + "33019 Diphosphate(3-) \n", + "57288 Acetyl-coa(4-) \n", + "57287 Coenzyme a(4-) \n", + "30616 Atp(4-) \n", + "15366 Acetic acid \n", + "R-DRE-71735 Acetate + coa + atp => acetyl-coa + amp + pyro... \n", + "R-DRE-71384 Ethanol oxidation \n", + "\n", + " data_type observed source_id \n", + "entity_id \n", + "ENSDARG00000037781 genes True F1QAA7 \n", + "456215 compounds False F1QAA7 \n", + "33019 compounds False F1QAA7 \n", + "57288 compounds False F1QAA7 \n", + "57287 compounds False F1QAA7 \n", + "30616 compounds False F1QAA7 \n", + "15366 compounds False F1QAA7 \n", + "R-DRE-71735 reactions None F1QAA7 \n", + "R-DRE-71384 pathways None F1QAA7 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = 'F1QAA7'\n", + "m.get_connected(query_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find compounds in the data connected to protein 'F1QAA7'" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>456215</th>\n", + " <td>Adenosine 5-monophosphate(2-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33019</th>\n", + " <td>Diphosphate(3-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57288</th>\n", + " <td>Acetyl-coa(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57287</th>\n", + " <td>Coenzyme a(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30616</th>\n", + " <td>Atp(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15366</th>\n", + " <td>Acetic acid</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>F1QAA7</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed source_id\n", + "entity_id \n", + "456215 Adenosine 5-monophosphate(2-) compounds False F1QAA7\n", + "33019 Diphosphate(3-) compounds False F1QAA7\n", + "57288 Acetyl-coa(4-) compounds False F1QAA7\n", + "57287 Coenzyme a(4-) compounds False F1QAA7\n", + "30616 Atp(4-) compounds False F1QAA7\n", + "15366 Acetic acid compounds False F1QAA7" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = 'F1QAA7'\n", + "m.get_connected(query_id, dest_type=COMPOUNDS)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find observed genes and proteins connected to compound '33019'" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ENSDARG00000058162</th>\n", + " <td>Pcyt1ba</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000011233</th>\n", + " <td>Pcyt1aa</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000004517</th>\n", + " <td>Ppat</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000039934</th>\n", + " <td>Hlcs</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000061994</th>\n", + " <td>Acacb</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>B0V0X1</th>\n", + " <td>B0V0X1</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>B8JLW8</th>\n", + " <td>B8JLW8</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>F1QYS7</th>\n", + " <td>F1QYS7</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>B0S5C4</th>\n", + " <td>B0S5C4</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q802U9</th>\n", + " <td>Q802U9</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>33019</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>87 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed source_id\n", + "entity_id \n", + "ENSDARG00000058162 Pcyt1ba genes True 33019\n", + "ENSDARG00000011233 Pcyt1aa genes True 33019\n", + "ENSDARG00000004517 Ppat genes True 33019\n", + "ENSDARG00000039934 Hlcs genes True 33019\n", + "ENSDARG00000061994 Acacb genes True 33019\n", + "... ... ... ... ...\n", + "B0V0X1 B0V0X1 proteins True 33019\n", + "B8JLW8 B8JLW8 proteins True 33019\n", + "F1QYS7 F1QYS7 proteins True 33019\n", + "B0S5C4 B0S5C4 proteins True 33019\n", + "Q802U9 Q802U9 proteins True 33019\n", + "\n", + "[87 rows x 4 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = '33019'\n", + "genes = m.get_connected(query_id, dest_type=[GENES, PROTEINS], observed=True)\n", + "genes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find pathways connected to genes 'ENSDARG00000087927'" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>R-DRE-2393930</th>\n", + " <td>Phosphate bond hydrolysis by nudt proteins</td>\n", + " <td>pathways</td>\n", + " <td>None</td>\n", + " <td>ENSDARG00000087927</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed \\\n", + "entity_id \n", + "R-DRE-2393930 Phosphate bond hydrolysis by nudt proteins pathways None \n", + "\n", + " source_id \n", + "entity_id \n", + "R-DRE-2393930 ENSDARG00000087927 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = 'ENSDARG00000087927'\n", + "m.get_connected(query_id, dest_type=PATHWAYS)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find genes, proteins, compounds and pathways connected to reaction 'R-DRE-2395818'" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ENSDARG00000030573</th>\n", + " <td>Nudt1</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q7ZWC3</th>\n", + " <td>Q7ZWC3</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15377</th>\n", + " <td>Water</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18420</th>\n", + " <td>Magnesium(2+)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>63212</th>\n", + " <td>2-hydroxy-damp(2-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>77897</th>\n", + " <td>2-hydroxy-datp(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15378</th>\n", + " <td>Hydron</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33019</th>\n", + " <td>Diphosphate(3-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " <tr>\n", + " <th>R-DRE-2393930</th>\n", + " <td>Phosphate bond hydrolysis by nudt proteins</td>\n", + " <td>pathways</td>\n", + " <td>None</td>\n", + " <td>R-DRE-2395818</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type \\\n", + "entity_id \n", + "ENSDARG00000030573 Nudt1 genes \n", + "Q7ZWC3 Q7ZWC3 proteins \n", + "15377 Water compounds \n", + "18420 Magnesium(2+) compounds \n", + "63212 2-hydroxy-damp(2-) compounds \n", + "77897 2-hydroxy-datp(4-) compounds \n", + "15378 Hydron compounds \n", + "33019 Diphosphate(3-) compounds \n", + "R-DRE-2393930 Phosphate bond hydrolysis by nudt proteins pathways \n", + "\n", + " observed source_id \n", + "entity_id \n", + "ENSDARG00000030573 True R-DRE-2395818 \n", + "Q7ZWC3 False R-DRE-2395818 \n", + "15377 False R-DRE-2395818 \n", + "18420 False R-DRE-2395818 \n", + "63212 False R-DRE-2395818 \n", + "77897 False R-DRE-2395818 \n", + "15378 False R-DRE-2395818 \n", + "33019 False R-DRE-2395818 \n", + "R-DRE-2393930 None R-DRE-2395818 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m.get_connected('R-DRE-2395818')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Find observed genes, proteins, compounds and reactions involved in the pathway 'R-DRE-2393930'" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ENSDARG00000030573</th>\n", + " <td>Nudt1</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000019503</th>\n", + " <td>Zgc:103759</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000078073</th>\n", + " <td>Nudt5</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000087927</th>\n", + " <td>Nudt9</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000041576</th>\n", + " <td>Nudt18</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>ENSDARG00000026090</th>\n", + " <td>Adprm</td>\n", + " <td>genes</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q6IQ66</th>\n", + " <td>Q6IQ66</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " <tr>\n", + " <th>F1QL34</th>\n", + " <td>F1QL34</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>R-DRE-2393930</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed source_id\n", + "entity_id \n", + "ENSDARG00000030573 Nudt1 genes True R-DRE-2393930\n", + "ENSDARG00000019503 Zgc:103759 genes True R-DRE-2393930\n", + "ENSDARG00000078073 Nudt5 genes True R-DRE-2393930\n", + "ENSDARG00000087927 Nudt9 genes True R-DRE-2393930\n", + "ENSDARG00000041576 Nudt18 genes True R-DRE-2393930\n", + "ENSDARG00000026090 Adprm genes True R-DRE-2393930\n", + "Q6IQ66 Q6IQ66 proteins True R-DRE-2393930\n", + "F1QL34 F1QL34 proteins True R-DRE-2393930" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m.get_connected('R-DRE-2393930', dest_type=[GENES, PROTEINS, COMPOUNDS], observed=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}