2858 lines (2857 with data), 98.2 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os, sys"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:40:16.457 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n",
"2022-03-24 14:40:16.458 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n",
"2022-03-24 14:40:16.459 | INFO | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n"
]
}
],
"source": [
"sys.path.append('..')\n",
"\n",
"from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n",
"from pyMultiOmics.constants import GENES, PROTEINS, COMPOUNDS, DANIO_RERIO, REACTIONS, PATHWAYS\n",
"from pyMultiOmics.mapping import Mapper\n",
"from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demonstration of pyMultiOmics mapping"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the processed Zebrafish data from [1]\n",
"\n",
"[1] [Rabinowitz, Jeremy S., et al. \"Transcriptomic, proteomic, and metabolomic landscape of positional memory in the caudal fin of zebrafish.\" Proceedings of the National Academy of Sciences 114.5 (2017): E717-E726.](https://www.pnas.org/content/114/5/E717.short)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:40:25.643 | INFO | pyMultiOmics.common:download_file:59 - Downloading zebrafish_data.zip\n",
"1.75kKB [00:00, 22.0kKB/s] \n",
"2022-03-24 14:40:25.771 | INFO | pyMultiOmics.common:extract_zip_file:71 - Extracting zebrafish_data.zip\n",
"100%|██████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 349.96it/s]\n",
"2022-03-24 14:40:25.823 | INFO | pyMultiOmics.common:extract_zip_file:77 - Deleting zebrafish_data.zip\n"
]
}
],
"source": [
"url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/zebrafish_data.zip'\n",
"out_file = download_file(url)\n",
"extract_zip_file(out_file)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/joewandy/Work/git/pyMultiOmics/notebooks/zebrafish_data'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DATA_FOLDER = os.path.abspath(os.path.join('zebrafish_data'))\n",
"DATA_FOLDER"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read transcriptomics data (identified by their gene ids)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"gene_data = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_data_combined.csv'), index_col='Identifier')\n",
"gene_design = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>US-1584693</th>\n",
" <th>US-1584700</th>\n",
" <th>US-1584706</th>\n",
" <th>US-1584712</th>\n",
" <th>US-1584722</th>\n",
" <th>US-1584724</th>\n",
" <th>US-1584725</th>\n",
" <th>US-1584732</th>\n",
" <th>US-1584738</th>\n",
" <th>US-1584744</th>\n",
" <th>...</th>\n",
" <th>US-1584753</th>\n",
" <th>US-1584754</th>\n",
" <th>US-1584758</th>\n",
" <th>US-1584765</th>\n",
" <th>FC_distal_vs_proximal</th>\n",
" <th>padj_distal_vs_proximal</th>\n",
" <th>FC_distal_vs_middle</th>\n",
" <th>padj_distal_vs_middle</th>\n",
" <th>FC_middle_vs_proximal</th>\n",
" <th>padj_middle_vs_proximal</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Identifier</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ENSDARG00000000001</th>\n",
" <td>51</td>\n",
" <td>40</td>\n",
" <td>69</td>\n",
" <td>78</td>\n",
" <td>89</td>\n",
" <td>47</td>\n",
" <td>88</td>\n",
" <td>86</td>\n",
" <td>110</td>\n",
" <td>55</td>\n",
" <td>...</td>\n",
" <td>58</td>\n",
" <td>104</td>\n",
" <td>43</td>\n",
" <td>72</td>\n",
" <td>0.869331</td>\n",
" <td>0.000008</td>\n",
" <td>0.748943</td>\n",
" <td>4.380000e-05</td>\n",
" <td>0.114026</td>\n",
" <td>0.630834</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000000002</th>\n",
" <td>283</td>\n",
" <td>129</td>\n",
" <td>164</td>\n",
" <td>269</td>\n",
" <td>211</td>\n",
" <td>171</td>\n",
" <td>146</td>\n",
" <td>256</td>\n",
" <td>283</td>\n",
" <td>150</td>\n",
" <td>...</td>\n",
" <td>142</td>\n",
" <td>272</td>\n",
" <td>260</td>\n",
" <td>256</td>\n",
" <td>0.287823</td>\n",
" <td>0.031298</td>\n",
" <td>1.005337</td>\n",
" <td>1.310000e-13</td>\n",
" <td>-0.724987</td>\n",
" <td>0.000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000000018</th>\n",
" <td>545</td>\n",
" <td>503</td>\n",
" <td>547</td>\n",
" <td>387</td>\n",
" <td>332</td>\n",
" <td>559</td>\n",
" <td>623</td>\n",
" <td>499</td>\n",
" <td>436</td>\n",
" <td>488</td>\n",
" <td>...</td>\n",
" <td>462</td>\n",
" <td>287</td>\n",
" <td>495</td>\n",
" <td>299</td>\n",
" <td>-0.437271</td>\n",
" <td>0.000389</td>\n",
" <td>-0.404770</td>\n",
" <td>6.868400e-04</td>\n",
" <td>-0.040193</td>\n",
" <td>0.760679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000000019</th>\n",
" <td>437</td>\n",
" <td>469</td>\n",
" <td>538</td>\n",
" <td>557</td>\n",
" <td>550</td>\n",
" <td>404</td>\n",
" <td>544</td>\n",
" <td>443</td>\n",
" <td>623</td>\n",
" <td>502</td>\n",
" <td>...</td>\n",
" <td>470</td>\n",
" <td>460</td>\n",
" <td>329</td>\n",
" <td>480</td>\n",
" <td>0.521291</td>\n",
" <td>0.000015</td>\n",
" <td>0.271082</td>\n",
" <td>1.936266e-02</td>\n",
" <td>0.242435</td>\n",
" <td>0.041606</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000000068</th>\n",
" <td>266</td>\n",
" <td>249</td>\n",
" <td>247</td>\n",
" <td>236</td>\n",
" <td>195</td>\n",
" <td>247</td>\n",
" <td>283</td>\n",
" <td>259</td>\n",
" <td>299</td>\n",
" <td>232</td>\n",
" <td>...</td>\n",
" <td>231</td>\n",
" <td>236</td>\n",
" <td>274</td>\n",
" <td>241</td>\n",
" <td>0.064820</td>\n",
" <td>0.595522</td>\n",
" <td>0.142243</td>\n",
" <td>2.579239e-01</td>\n",
" <td>-0.084764</td>\n",
" <td>0.528336</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" US-1584693 US-1584700 US-1584706 US-1584712 \\\n",
"Identifier \n",
"ENSDARG00000000001 51 40 69 78 \n",
"ENSDARG00000000002 283 129 164 269 \n",
"ENSDARG00000000018 545 503 547 387 \n",
"ENSDARG00000000019 437 469 538 557 \n",
"ENSDARG00000000068 266 249 247 236 \n",
"\n",
" US-1584722 US-1584724 US-1584725 US-1584732 \\\n",
"Identifier \n",
"ENSDARG00000000001 89 47 88 86 \n",
"ENSDARG00000000002 211 171 146 256 \n",
"ENSDARG00000000018 332 559 623 499 \n",
"ENSDARG00000000019 550 404 544 443 \n",
"ENSDARG00000000068 195 247 283 259 \n",
"\n",
" US-1584738 US-1584744 ... US-1584753 US-1584754 \\\n",
"Identifier ... \n",
"ENSDARG00000000001 110 55 ... 58 104 \n",
"ENSDARG00000000002 283 150 ... 142 272 \n",
"ENSDARG00000000018 436 488 ... 462 287 \n",
"ENSDARG00000000019 623 502 ... 470 460 \n",
"ENSDARG00000000068 299 232 ... 231 236 \n",
"\n",
" US-1584758 US-1584765 FC_distal_vs_proximal \\\n",
"Identifier \n",
"ENSDARG00000000001 43 72 0.869331 \n",
"ENSDARG00000000002 260 256 0.287823 \n",
"ENSDARG00000000018 495 299 -0.437271 \n",
"ENSDARG00000000019 329 480 0.521291 \n",
"ENSDARG00000000068 274 241 0.064820 \n",
"\n",
" padj_distal_vs_proximal FC_distal_vs_middle \\\n",
"Identifier \n",
"ENSDARG00000000001 0.000008 0.748943 \n",
"ENSDARG00000000002 0.031298 1.005337 \n",
"ENSDARG00000000018 0.000389 -0.404770 \n",
"ENSDARG00000000019 0.000015 0.271082 \n",
"ENSDARG00000000068 0.595522 0.142243 \n",
"\n",
" padj_distal_vs_middle FC_middle_vs_proximal \\\n",
"Identifier \n",
"ENSDARG00000000001 4.380000e-05 0.114026 \n",
"ENSDARG00000000002 1.310000e-13 -0.724987 \n",
"ENSDARG00000000018 6.868400e-04 -0.040193 \n",
"ENSDARG00000000019 1.936266e-02 0.242435 \n",
"ENSDARG00000000068 2.579239e-01 -0.084764 \n",
"\n",
" padj_middle_vs_proximal \n",
"Identifier \n",
"ENSDARG00000000001 0.630834 \n",
"ENSDARG00000000002 0.000001 \n",
"ENSDARG00000000018 0.760679 \n",
"ENSDARG00000000019 0.041606 \n",
"ENSDARG00000000068 0.528336 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gene_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>group</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>US-1584752</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584732</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584724</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584693</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584758</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584725</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584706</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584700</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584744</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584753</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584738</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584722</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584712</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584754</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>US-1584765</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" group\n",
"sample \n",
"US-1584752 Proximal\n",
"US-1584732 Proximal\n",
"US-1584724 Proximal\n",
"US-1584693 Proximal\n",
"US-1584758 Proximal\n",
"US-1584725 Middle\n",
"US-1584706 Middle\n",
"US-1584700 Middle\n",
"US-1584744 Middle\n",
"US-1584753 Middle\n",
"US-1584738 Distal\n",
"US-1584722 Distal\n",
"US-1584712 Distal\n",
"US-1584754 Distal\n",
"US-1584765 Distal"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gene_design"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read proteomics data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"protein_data = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Uniprot')\n",
"protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Distal#3_01</th>\n",
" <th>Distal#3_02</th>\n",
" <th>Distal#3_03</th>\n",
" <th>Distal#3_04</th>\n",
" <th>Middle#3_01</th>\n",
" <th>Middle#3_02</th>\n",
" <th>Middle#3_03</th>\n",
" <th>Middle#3_04</th>\n",
" <th>Proximal#3_01</th>\n",
" <th>Proximal#3_02</th>\n",
" <th>Proximal#3_03</th>\n",
" <th>Proximal#3_04</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Uniprot</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A0A0A0MPL4</th>\n",
" <td>9.955264e+05</td>\n",
" <td>4.946580e+06</td>\n",
" <td>1.377194e+06</td>\n",
" <td>2.208140e+06</td>\n",
" <td>2.907807e+06</td>\n",
" <td>4.231976e+06</td>\n",
" <td>1.560849e+06</td>\n",
" <td>2.852904e+06</td>\n",
" <td>1781795.086</td>\n",
" <td>2.668135e+06</td>\n",
" <td>3.079148e+06</td>\n",
" <td>2.840473e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0A0B4J1A5</th>\n",
" <td>2.982519e+06</td>\n",
" <td>8.816655e+06</td>\n",
" <td>7.668431e+06</td>\n",
" <td>4.632309e+06</td>\n",
" <td>7.672153e+06</td>\n",
" <td>7.776017e+06</td>\n",
" <td>6.633781e+06</td>\n",
" <td>8.242783e+06</td>\n",
" <td>5475654.544</td>\n",
" <td>5.703832e+06</td>\n",
" <td>8.294364e+06</td>\n",
" <td>1.334874e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0A0B4J1A7</th>\n",
" <td>1.553049e+07</td>\n",
" <td>1.037155e+06</td>\n",
" <td>1.856137e+07</td>\n",
" <td>1.767859e+07</td>\n",
" <td>1.375736e+07</td>\n",
" <td>1.747998e+07</td>\n",
" <td>1.517507e+07</td>\n",
" <td>2.394465e+07</td>\n",
" <td>3157387.719</td>\n",
" <td>1.794775e+07</td>\n",
" <td>2.300430e+07</td>\n",
" <td>2.063800e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0AUQ3</th>\n",
" <td>2.012699e+06</td>\n",
" <td>3.088982e+06</td>\n",
" <td>2.455865e+06</td>\n",
" <td>9.448331e+05</td>\n",
" <td>2.866780e+06</td>\n",
" <td>2.661669e+06</td>\n",
" <td>2.100352e+06</td>\n",
" <td>2.133662e+06</td>\n",
" <td>1738244.989</td>\n",
" <td>2.629396e+06</td>\n",
" <td>2.900560e+06</td>\n",
" <td>2.416018e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0AUR9</th>\n",
" <td>3.640487e+06</td>\n",
" <td>2.588477e+07</td>\n",
" <td>3.415989e+07</td>\n",
" <td>2.868569e+06</td>\n",
" <td>1.971142e+06</td>\n",
" <td>2.472776e+06</td>\n",
" <td>5.615177e+06</td>\n",
" <td>1.303356e+06</td>\n",
" <td>3263299.566</td>\n",
" <td>6.866769e+06</td>\n",
" <td>2.465929e+06</td>\n",
" <td>4.515643e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Distal#3_01 Distal#3_02 Distal#3_03 Distal#3_04 \\\n",
"Uniprot \n",
"A0A0A0MPL4 9.955264e+05 4.946580e+06 1.377194e+06 2.208140e+06 \n",
"A0A0B4J1A5 2.982519e+06 8.816655e+06 7.668431e+06 4.632309e+06 \n",
"A0A0B4J1A7 1.553049e+07 1.037155e+06 1.856137e+07 1.767859e+07 \n",
"A0AUQ3 2.012699e+06 3.088982e+06 2.455865e+06 9.448331e+05 \n",
"A0AUR9 3.640487e+06 2.588477e+07 3.415989e+07 2.868569e+06 \n",
"\n",
" Middle#3_01 Middle#3_02 Middle#3_03 Middle#3_04 \\\n",
"Uniprot \n",
"A0A0A0MPL4 2.907807e+06 4.231976e+06 1.560849e+06 2.852904e+06 \n",
"A0A0B4J1A5 7.672153e+06 7.776017e+06 6.633781e+06 8.242783e+06 \n",
"A0A0B4J1A7 1.375736e+07 1.747998e+07 1.517507e+07 2.394465e+07 \n",
"A0AUQ3 2.866780e+06 2.661669e+06 2.100352e+06 2.133662e+06 \n",
"A0AUR9 1.971142e+06 2.472776e+06 5.615177e+06 1.303356e+06 \n",
"\n",
" Proximal#3_01 Proximal#3_02 Proximal#3_03 Proximal#3_04 \n",
"Uniprot \n",
"A0A0A0MPL4 1781795.086 2.668135e+06 3.079148e+06 2.840473e+06 \n",
"A0A0B4J1A5 5475654.544 5.703832e+06 8.294364e+06 1.334874e+07 \n",
"A0A0B4J1A7 3157387.719 1.794775e+07 2.300430e+07 2.063800e+07 \n",
"A0AUQ3 1738244.989 2.629396e+06 2.900560e+06 2.416018e+06 \n",
"A0AUR9 3263299.566 6.866769e+06 2.465929e+06 4.515643e+06 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>group</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Distal#3_01</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Distal#3_02</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Distal#3_03</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Distal#3_04</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Middle#3_01</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Middle#3_02</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Middle#3_03</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Middle#3_04</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Proximal#3_01</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Proximal#3_02</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Proximal#3_03</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Proximal#3_04</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" group\n",
"sample \n",
"Distal#3_01 Distal\n",
"Distal#3_02 Distal\n",
"Distal#3_03 Distal\n",
"Distal#3_04 Distal\n",
"Middle#3_01 Middle\n",
"Middle#3_02 Middle\n",
"Middle#3_03 Middle\n",
"Middle#3_04 Middle\n",
"Proximal#3_01 Proximal\n",
"Proximal#3_02 Proximal\n",
"Proximal#3_03 Proximal\n",
"Proximal#3_04 Proximal"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_design"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read metabolomics data"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"compound_data = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data_kegg.csv'), index_col='Identifier')\n",
"compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>distal_M1</th>\n",
" <th>distal_M2</th>\n",
" <th>distal_M3</th>\n",
" <th>distal_F1</th>\n",
" <th>distal_F2</th>\n",
" <th>distal_F3</th>\n",
" <th>middle_M1</th>\n",
" <th>middle_M2</th>\n",
" <th>middle_M3</th>\n",
" <th>middle_F1</th>\n",
" <th>middle_F2</th>\n",
" <th>middle_F3</th>\n",
" <th>proximal_M1</th>\n",
" <th>proximal_M2</th>\n",
" <th>proximal_M3</th>\n",
" <th>proximal_F1</th>\n",
" <th>proximal_F2</th>\n",
" <th>proximal_F3</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Identifier</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>C00565</th>\n",
" <td>75170.0</td>\n",
" <td>57052</td>\n",
" <td>39170.0</td>\n",
" <td>84057</td>\n",
" <td>38608.0</td>\n",
" <td>64126.0</td>\n",
" <td>50214.0</td>\n",
" <td>75680</td>\n",
" <td>165178</td>\n",
" <td>121856</td>\n",
" <td>77061</td>\n",
" <td>98015.0</td>\n",
" <td>113765</td>\n",
" <td>96098</td>\n",
" <td>84198</td>\n",
" <td>117644</td>\n",
" <td>169459</td>\n",
" <td>169669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C00037</th>\n",
" <td>64511.0</td>\n",
" <td>33658</td>\n",
" <td>23565.0</td>\n",
" <td>52102</td>\n",
" <td>49508.0</td>\n",
" <td>37498.0</td>\n",
" <td>30417.0</td>\n",
" <td>55728</td>\n",
" <td>88519</td>\n",
" <td>103871</td>\n",
" <td>45974</td>\n",
" <td>73101.0</td>\n",
" <td>72725</td>\n",
" <td>66008</td>\n",
" <td>54220</td>\n",
" <td>95341</td>\n",
" <td>110192</td>\n",
" <td>291598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C01104</th>\n",
" <td>5787534.0</td>\n",
" <td>4351239</td>\n",
" <td>4401036.0</td>\n",
" <td>8187282</td>\n",
" <td>8431125.0</td>\n",
" <td>5082056.0</td>\n",
" <td>5138937.0</td>\n",
" <td>7341351</td>\n",
" <td>7837293</td>\n",
" <td>9256269</td>\n",
" <td>9934066</td>\n",
" <td>10243285.0</td>\n",
" <td>7344406</td>\n",
" <td>5524811</td>\n",
" <td>4809250</td>\n",
" <td>9279874</td>\n",
" <td>9047339</td>\n",
" <td>9211255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C00134</th>\n",
" <td>3430897.0</td>\n",
" <td>1877785</td>\n",
" <td>1225710.0</td>\n",
" <td>2326620</td>\n",
" <td>2421267.0</td>\n",
" <td>2595529.0</td>\n",
" <td>2003627.0</td>\n",
" <td>2120053</td>\n",
" <td>2269318</td>\n",
" <td>3220850</td>\n",
" <td>4596854</td>\n",
" <td>3155377.0</td>\n",
" <td>3760854</td>\n",
" <td>2658833</td>\n",
" <td>2488025</td>\n",
" <td>2506550</td>\n",
" <td>4000703</td>\n",
" <td>3292566</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C00213</th>\n",
" <td>112845.0</td>\n",
" <td>129977</td>\n",
" <td>122292.0</td>\n",
" <td>63219</td>\n",
" <td>50113.0</td>\n",
" <td>100343.0</td>\n",
" <td>156651.0</td>\n",
" <td>176682</td>\n",
" <td>379322</td>\n",
" <td>160906</td>\n",
" <td>56802</td>\n",
" <td>107161.0</td>\n",
" <td>235982</td>\n",
" <td>181200</td>\n",
" <td>142994</td>\n",
" <td>116132</td>\n",
" <td>94589</td>\n",
" <td>167280</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" distal_M1 distal_M2 distal_M3 distal_F1 distal_F2 distal_F3 \\\n",
"Identifier \n",
"C00565 75170.0 57052 39170.0 84057 38608.0 64126.0 \n",
"C00037 64511.0 33658 23565.0 52102 49508.0 37498.0 \n",
"C01104 5787534.0 4351239 4401036.0 8187282 8431125.0 5082056.0 \n",
"C00134 3430897.0 1877785 1225710.0 2326620 2421267.0 2595529.0 \n",
"C00213 112845.0 129977 122292.0 63219 50113.0 100343.0 \n",
"\n",
" middle_M1 middle_M2 middle_M3 middle_F1 middle_F2 middle_F3 \\\n",
"Identifier \n",
"C00565 50214.0 75680 165178 121856 77061 98015.0 \n",
"C00037 30417.0 55728 88519 103871 45974 73101.0 \n",
"C01104 5138937.0 7341351 7837293 9256269 9934066 10243285.0 \n",
"C00134 2003627.0 2120053 2269318 3220850 4596854 3155377.0 \n",
"C00213 156651.0 176682 379322 160906 56802 107161.0 \n",
"\n",
" proximal_M1 proximal_M2 proximal_M3 proximal_F1 proximal_F2 \\\n",
"Identifier \n",
"C00565 113765 96098 84198 117644 169459 \n",
"C00037 72725 66008 54220 95341 110192 \n",
"C01104 7344406 5524811 4809250 9279874 9047339 \n",
"C00134 3760854 2658833 2488025 2506550 4000703 \n",
"C00213 235982 181200 142994 116132 94589 \n",
"\n",
" proximal_F3 \n",
"Identifier \n",
"C00565 169669 \n",
"C00037 291598 \n",
"C01104 9211255 \n",
"C00134 3292566 \n",
"C00213 167280 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>group</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>distal_M1</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>distal_M2</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>distal_M3</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>distal_F1</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>distal_F2</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>distal_F3</th>\n",
" <td>Distal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>middle_M1</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>middle_M2</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>middle_M3</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>middle_F1</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>middle_F2</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>middle_F3</th>\n",
" <td>Middle</td>\n",
" </tr>\n",
" <tr>\n",
" <th>proximal_M1</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>proximal_M2</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>proximal_M3</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>proximal_F1</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>proximal_F2</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>proximal_F3</th>\n",
" <td>Proximal</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" group\n",
"sample \n",
"distal_M1 Distal\n",
"distal_M2 Distal\n",
"distal_M3 Distal\n",
"distal_F1 Distal\n",
"distal_F2 Distal\n",
"distal_F3 Distal\n",
"middle_M1 Middle\n",
"middle_M2 Middle\n",
"middle_M3 Middle\n",
"middle_F1 Middle\n",
"middle_F2 Middle\n",
"middle_F3 Middle\n",
"proximal_M1 Proximal\n",
"proximal_M2 Proximal\n",
"proximal_M3 Proximal\n",
"proximal_F1 Proximal\n",
"proximal_F2 Proximal\n",
"proximal_F3 Proximal"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_design"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set_log_level_info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create omics data"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"genes data with (31953, 15) measurements"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transcript_data = SingleOmicsData(GENES, gene_data, gene_design)\n",
"transcript_data"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"proteins data with (3061, 12) measurements"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_data = SingleOmicsData(PROTEINS, protein_data, protein_design)\n",
"protein_data"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"compounds data with (130, 18) measurements"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_data = SingleOmicsData(COMPOUNDS, compound_data, compound_design)\n",
"compound_data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"publication = 'Rabinowitz, Jeremy S., et al. \"Transcriptomic, proteomic, and metabolomic landscape of positional memory in the caudal fin of zebrafish.\" Proceedings of the National Academy of Sciences 114.5 (2017): E717-E726.'\n",
"url = 'https://www.pnas.org/content/114/5/E717.short'"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Multi-omics data container\n",
"- publication: Rabinowitz, Jeremy S., et al. \"Transcriptomic, proteomic, and metabolomic landscape of positional memory in the caudal fin of zebrafish.\" Proceedings of the National Academy of Sciences 114.5 (2017): E717-E726.\n",
"- URL: https://www.pnas.org/content/114/5/E717.short\n",
"- Views: 3 modalities\n",
"\t - genes data with (31953, 15) measurements\n",
"\t - proteins data with (3061, 12) measurements\n",
"\t - compounds data with (130, 18) measurements"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mo = MultiOmicsData(publication=publication, url=url)\n",
"mo.add_data([transcript_data, protein_data, compound_data])\n",
"mo"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a mapping object"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The mapping object uses Reactome to map the different biological entities in the data:\n",
"- Transcripts (or genes) are connected to the proteins they encode\n",
"- Proteins and compounds are connected to reactions they're involved in\n",
"- Reactions are connected to pathways"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:40:26.569 | INFO | pyMultiOmics.functions:remove_dupes:385 - Removing 2 rows with duplicate identifiers\n",
"2022-03-24 14:40:26.570 | INFO | pyMultiOmics.functions:reactome_mapping:78 - There are 128 observed compound ids\n",
"2022-03-24 14:40:26.571 | INFO | pyMultiOmics.functions:reactome_mapping:81 - Mapping genes -> proteins\n",
"2022-03-24 14:40:34.971 | INFO | pyMultiOmics.functions:reactome_mapping:86 - Mapping proteins -> reactions\n",
"2022-03-24 14:40:46.041 | INFO | pyMultiOmics.functions:reactome_mapping:94 - Mapping compounds -> reactions\n",
"2022-03-24 14:40:49.516 | INFO | pyMultiOmics.functions:reactome_mapping:100 - Mapping reactions -> pathways\n",
"2022-03-24 14:40:50.558 | INFO | pyMultiOmics.functions:reactome_mapping:111 - Mapping reactions -> proteins\n",
"2022-03-24 14:40:57.492 | INFO | pyMultiOmics.functions:reactome_mapping:118 - Mapping reactions -> compounds\n",
"2022-03-24 14:41:01.232 | INFO | pyMultiOmics.functions:reactome_mapping:130 - Mapping proteins -> genes\n",
"2022-03-24 14:41:18.641 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: genes\n",
"2022-03-24 14:41:19.903 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: proteins\n",
"2022-03-24 14:41:20.231 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: compounds\n",
"2022-03-24 14:41:20.289 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: reactions\n",
"2022-03-24 14:41:20.463 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: pathways\n",
"2022-03-24 14:41:20.470 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: gene_proteins\n",
"2022-03-24 14:41:20.647 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: protein_reactions\n",
"2022-03-24 14:41:21.793 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: compound_reactions\n",
"2022-03-24 14:41:22.134 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: reaction_pathways\n",
"2022-03-24 14:41:22.366 | INFO | pyMultiOmics.mapping:build:51 - Created a multi-omics network with 21264 nodes and 22682 edges\n",
"2022-03-24 14:41:22.385 | INFO | pyMultiOmics.mapping:build:53 - node_counts = {'genes': 9371, 'proteins': 8221, 'compounds': 1794, 'reactions': 1629, 'pathways': 249}\n"
]
},
{
"data": {
"text/plain": [
"<pyMultiOmics.mapping.Mapper at 0x174b0a440>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = Mapper(mo, DANIO_RERIO, metabolic_pathway_only=True)\n",
"m.build()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pyMultiOmics.mapping.Mapper at 0x174b0a440>"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Querying mapping object"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below shows some example queries we can perform with the mapping object"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find reactions that are connected to some observed genes, proteins and compounds in the data"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>reaction_id</th>\n",
" <th>reaction_name</th>\n",
" <th>num_genes</th>\n",
" <th>num_proteins</th>\n",
" <th>num_compounds</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>R-DRE-109278</td>\n",
" <td>Nt5e:zn2+ hydrolyses amp,damp,gmp, imp</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>R-DRE-109291</td>\n",
" <td>Cmp or tmp or ump + h2o => cytidine, thymidine...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>R-DRE-109415</td>\n",
" <td>Amp + h2o => adenosine + orthophosphate [nt5c1b]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>R-DRE-109624</td>\n",
" <td>(2-deoxy)adenosine + atp => (d)amp + adp (adk)</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>R-DRE-1237160</td>\n",
" <td>Mta is cleaved and phosphorylated</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>R-DRE-1247910</td>\n",
" <td>Cndp2:2mn2+ dimer hydrolyses cysgly</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>R-DRE-139970</td>\n",
" <td>Fmo3:fad n-oxidises tma to tmao</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>R-DRE-1482976</td>\n",
" <td>Cdp-dag is converted to pi by cdipt</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>R-DRE-1614583</td>\n",
" <td>Pxlp-k212-cth cleaves l-cystathionine</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>R-DRE-174401</td>\n",
" <td>Ahcy:nad+ tetramer hydrolyses adohcy</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>R-DRE-1855154</td>\n",
" <td>I1p is dephosphorylated to ins by impa1/2 in t...</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>R-DRE-1855210</td>\n",
" <td>I3p is dephosphorylated to ins by impa1/2 in t...</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>R-DRE-1855211</td>\n",
" <td>I4p is dephosphorylated to ins by impa1/2 in t...</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>R-DRE-188467</td>\n",
" <td>Gly-3-p+fad->dhap+fadh2 (catalyzed by mitochon...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>R-DRE-197250</td>\n",
" <td>Nampt transfers prib to nam to form namn</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>R-DRE-198813</td>\n",
" <td>Gsto dimers reduce deha to asch-</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>R-DRE-200318</td>\n",
" <td>Creatine + atp => phosphocreatine + adp [ckb,ckm]</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>R-DRE-200326</td>\n",
" <td>Creatine + atp => phosphocreatine + adp [ck oc...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>R-DRE-202127</td>\n",
" <td>Enos synthesizes no</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>R-DRE-2162066</td>\n",
" <td>Carbovir + imp => carbovir monophosphate + ino...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>R-DRE-2993447</td>\n",
" <td>Hlcs biotinylates 6x(pcca:pccb)</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>R-DRE-2993799</td>\n",
" <td>Hlcs biotinylates 6xmccc1:6xmccc2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>R-DRE-3301943</td>\n",
" <td>Gstk1 dimer transfers gs from gsh to cdnb</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>R-DRE-350604</td>\n",
" <td>Agmatine + h2o <=> putrescine + urea</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>R-DRE-351215</td>\n",
" <td>Putrescine + dc-adenosyl methionine => spermi...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>R-DRE-372519</td>\n",
" <td>Accho is hydrolyzed to cho and acetate by ache</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>R-DRE-372819</td>\n",
" <td>Oxaloacetate + gtp => phosphoenolpyruvate + gd...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>R-DRE-416530</td>\n",
" <td>Ffar1:ffar1 ligands activates gq</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>R-DRE-5652172</td>\n",
" <td>Akr1b1 reduces glc to d-sorbitol</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>R-DRE-5693373</td>\n",
" <td>Ddah1,2 hydrolyses adma to dma and l-cit</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>R-DRE-5693724</td>\n",
" <td>Esd dimer hydrolyses s-fgsh to gsh</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>R-DRE-6783221</td>\n",
" <td>Hagh hydrolyses (r)-s-lgsh to gsh and lact</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>R-DRE-6783880</td>\n",
" <td>Pipox oxidises ppca to p6c</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>R-DRE-6784393</td>\n",
" <td>Pxlp-k279-got2 dimer transaminates 4-oh-l-glut...</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>R-DRE-6797653</td>\n",
" <td>Dmgdh:fad oxidatively demethylates dmgly to sarc</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>R-DRE-6797955</td>\n",
" <td>Aldh7a1 oxidises betald to bet</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>R-DRE-6798317</td>\n",
" <td>Gnmt tetramer transfers methyl group from adom...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>R-DRE-6807826</td>\n",
" <td>Ldhal6b reduces pyr to lact</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>R-DRE-70342</td>\n",
" <td>Aldob tetramer cleaves fru-1-p to ga and dhap</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>R-DRE-70349</td>\n",
" <td>Dak dimer phosphorylates d-glyceraldehyde to f...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>R-DRE-70510</td>\n",
" <td>Ldh tetramer oxidises lact to pyr</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>R-DRE-70573</td>\n",
" <td>Argininosuccinate <=> fumarate + arginine</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>R-DRE-70596</td>\n",
" <td>Got2 dimer deaminates l-asp</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>R-DRE-70613</td>\n",
" <td>Oxaloacetate + glutamate <=> aspartate + alpha...</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>R-DRE-70979</td>\n",
" <td>(s)-malate + nad+ <=> oxaloacetate + nadh + h+</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>R-DRE-71163</td>\n",
" <td>P-hydroxyphenylpyruvate + o2 => homogentisate ...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>R-DRE-71260</td>\n",
" <td>Aldh9a1 tetramer dehydrogenates teabl to form ...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>R-DRE-71783</td>\n",
" <td>Oxaloacetate + nadh + h+ <=> (s)-malate + nad+</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>R-DRE-71849</td>\n",
" <td>Ldh tetramer reduces pyr to lact</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>R-DRE-74241</td>\n",
" <td>Ada catalyzes the deamination of (deoxy)adenosine</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>R-DRE-74248</td>\n",
" <td>(d)gmp or (d)imp + h2o => (2-deoxy)guanosine o...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>R-DRE-74372</td>\n",
" <td>Uracil + (deoxy)ribose 1-phosphate <=> (deoxy)...</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>R-DRE-74376</td>\n",
" <td>(deoxy)uridine + orthophosphate <=> uracil + (...</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>R-DRE-75848</td>\n",
" <td>Acly tetramer transforms cit to ac-coa</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>R-DRE-75889</td>\n",
" <td>Dhap is converted to g3p by gpd1/gpd1l</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" reaction_id reaction_name \\\n",
"0 R-DRE-109278 Nt5e:zn2+ hydrolyses amp,damp,gmp, imp \n",
"1 R-DRE-109291 Cmp or tmp or ump + h2o => cytidine, thymidine... \n",
"2 R-DRE-109415 Amp + h2o => adenosine + orthophosphate [nt5c1b] \n",
"3 R-DRE-109624 (2-deoxy)adenosine + atp => (d)amp + adp (adk) \n",
"4 R-DRE-1237160 Mta is cleaved and phosphorylated \n",
"5 R-DRE-1247910 Cndp2:2mn2+ dimer hydrolyses cysgly \n",
"6 R-DRE-139970 Fmo3:fad n-oxidises tma to tmao \n",
"7 R-DRE-1482976 Cdp-dag is converted to pi by cdipt \n",
"8 R-DRE-1614583 Pxlp-k212-cth cleaves l-cystathionine \n",
"9 R-DRE-174401 Ahcy:nad+ tetramer hydrolyses adohcy \n",
"10 R-DRE-1855154 I1p is dephosphorylated to ins by impa1/2 in t... \n",
"11 R-DRE-1855210 I3p is dephosphorylated to ins by impa1/2 in t... \n",
"12 R-DRE-1855211 I4p is dephosphorylated to ins by impa1/2 in t... \n",
"13 R-DRE-188467 Gly-3-p+fad->dhap+fadh2 (catalyzed by mitochon... \n",
"14 R-DRE-197250 Nampt transfers prib to nam to form namn \n",
"15 R-DRE-198813 Gsto dimers reduce deha to asch- \n",
"16 R-DRE-200318 Creatine + atp => phosphocreatine + adp [ckb,ckm] \n",
"17 R-DRE-200326 Creatine + atp => phosphocreatine + adp [ck oc... \n",
"18 R-DRE-202127 Enos synthesizes no \n",
"19 R-DRE-2162066 Carbovir + imp => carbovir monophosphate + ino... \n",
"20 R-DRE-2993447 Hlcs biotinylates 6x(pcca:pccb) \n",
"21 R-DRE-2993799 Hlcs biotinylates 6xmccc1:6xmccc2 \n",
"22 R-DRE-3301943 Gstk1 dimer transfers gs from gsh to cdnb \n",
"23 R-DRE-350604 Agmatine + h2o <=> putrescine + urea \n",
"24 R-DRE-351215 Putrescine + dc-adenosyl methionine => spermi... \n",
"25 R-DRE-372519 Accho is hydrolyzed to cho and acetate by ache \n",
"26 R-DRE-372819 Oxaloacetate + gtp => phosphoenolpyruvate + gd... \n",
"27 R-DRE-416530 Ffar1:ffar1 ligands activates gq \n",
"28 R-DRE-5652172 Akr1b1 reduces glc to d-sorbitol \n",
"29 R-DRE-5693373 Ddah1,2 hydrolyses adma to dma and l-cit \n",
"30 R-DRE-5693724 Esd dimer hydrolyses s-fgsh to gsh \n",
"31 R-DRE-6783221 Hagh hydrolyses (r)-s-lgsh to gsh and lact \n",
"32 R-DRE-6783880 Pipox oxidises ppca to p6c \n",
"33 R-DRE-6784393 Pxlp-k279-got2 dimer transaminates 4-oh-l-glut... \n",
"34 R-DRE-6797653 Dmgdh:fad oxidatively demethylates dmgly to sarc \n",
"35 R-DRE-6797955 Aldh7a1 oxidises betald to bet \n",
"36 R-DRE-6798317 Gnmt tetramer transfers methyl group from adom... \n",
"37 R-DRE-6807826 Ldhal6b reduces pyr to lact \n",
"38 R-DRE-70342 Aldob tetramer cleaves fru-1-p to ga and dhap \n",
"39 R-DRE-70349 Dak dimer phosphorylates d-glyceraldehyde to f... \n",
"40 R-DRE-70510 Ldh tetramer oxidises lact to pyr \n",
"41 R-DRE-70573 Argininosuccinate <=> fumarate + arginine \n",
"42 R-DRE-70596 Got2 dimer deaminates l-asp \n",
"43 R-DRE-70613 Oxaloacetate + glutamate <=> aspartate + alpha... \n",
"44 R-DRE-70979 (s)-malate + nad+ <=> oxaloacetate + nadh + h+ \n",
"45 R-DRE-71163 P-hydroxyphenylpyruvate + o2 => homogentisate ... \n",
"46 R-DRE-71260 Aldh9a1 tetramer dehydrogenates teabl to form ... \n",
"47 R-DRE-71783 Oxaloacetate + nadh + h+ <=> (s)-malate + nad+ \n",
"48 R-DRE-71849 Ldh tetramer reduces pyr to lact \n",
"49 R-DRE-74241 Ada catalyzes the deamination of (deoxy)adenosine \n",
"50 R-DRE-74248 (d)gmp or (d)imp + h2o => (2-deoxy)guanosine o... \n",
"51 R-DRE-74372 Uracil + (deoxy)ribose 1-phosphate <=> (deoxy)... \n",
"52 R-DRE-74376 (deoxy)uridine + orthophosphate <=> uracil + (... \n",
"53 R-DRE-75848 Acly tetramer transforms cit to ac-coa \n",
"54 R-DRE-75889 Dhap is converted to g3p by gpd1/gpd1l \n",
"\n",
" num_genes num_proteins num_compounds \n",
"0 1 1 3 \n",
"1 1 1 2 \n",
"2 1 1 1 \n",
"3 1 1 1 \n",
"4 1 1 1 \n",
"5 1 1 1 \n",
"6 3 1 2 \n",
"7 1 1 1 \n",
"8 1 1 1 \n",
"9 1 1 1 \n",
"10 2 1 1 \n",
"11 2 1 1 \n",
"12 2 1 1 \n",
"13 1 1 1 \n",
"14 1 1 1 \n",
"15 2 2 1 \n",
"16 2 2 1 \n",
"17 1 1 1 \n",
"18 4 2 1 \n",
"19 1 1 1 \n",
"20 3 1 1 \n",
"21 3 1 1 \n",
"22 1 1 1 \n",
"23 1 1 1 \n",
"24 1 1 2 \n",
"25 2 2 2 \n",
"26 1 1 1 \n",
"27 5 1 1 \n",
"28 2 1 1 \n",
"29 1 1 1 \n",
"30 1 1 1 \n",
"31 1 1 1 \n",
"32 1 1 1 \n",
"33 2 2 1 \n",
"34 1 1 2 \n",
"35 1 1 1 \n",
"36 1 1 1 \n",
"37 1 1 1 \n",
"38 1 1 1 \n",
"39 1 1 1 \n",
"40 2 2 1 \n",
"41 1 1 1 \n",
"42 2 2 1 \n",
"43 2 2 1 \n",
"44 1 1 1 \n",
"45 1 1 1 \n",
"46 1 1 1 \n",
"47 1 1 1 \n",
"48 2 2 1 \n",
"49 1 1 2 \n",
"50 1 1 2 \n",
"51 2 2 2 \n",
"52 2 2 2 \n",
"53 3 1 1 \n",
"54 2 1 1 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reactions = m.get_nodes(types=REACTIONS)\n",
"\n",
"data = []\n",
"for reaction_id, reaction_data in reactions:\n",
" reaction_name = reaction_data['display_name']\n",
" genes = m.get_connected(reaction_id, dest_type=GENES, observed=True)\n",
" proteins = m.get_connected(reaction_id, dest_type=PROTEINS, observed=True)\n",
" compounds = m.get_connected(reaction_id, dest_type=COMPOUNDS, observed=True)\n",
" \n",
" if len(genes) > 0 and len(proteins) > 0 and len(compounds) > 0:\n",
" row = [reaction_id, reaction_name, len(genes), len(proteins), len(compounds)]\n",
" data.append(row)\n",
"\n",
"df = pd.DataFrame(data, columns=['reaction_id', 'reaction_name', 'num_genes', 'num_proteins', 'num_compounds'])\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find everything connected to protein 'F1QAA7'"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ENSDARG00000037781</th>\n",
" <td>Acss2</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>456215</th>\n",
" <td>Adenosine 5-monophosphate(2-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33019</th>\n",
" <td>Diphosphate(3-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57288</th>\n",
" <td>Acetyl-coa(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57287</th>\n",
" <td>Coenzyme a(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30616</th>\n",
" <td>Atp(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15366</th>\n",
" <td>Acetic acid</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R-DRE-71735</th>\n",
" <td>Acetate + coa + atp => acetyl-coa + amp + pyro...</td>\n",
" <td>reactions</td>\n",
" <td>None</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R-DRE-71384</th>\n",
" <td>Ethanol oxidation</td>\n",
" <td>pathways</td>\n",
" <td>None</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name \\\n",
"entity_id \n",
"ENSDARG00000037781 Acss2 \n",
"456215 Adenosine 5-monophosphate(2-) \n",
"33019 Diphosphate(3-) \n",
"57288 Acetyl-coa(4-) \n",
"57287 Coenzyme a(4-) \n",
"30616 Atp(4-) \n",
"15366 Acetic acid \n",
"R-DRE-71735 Acetate + coa + atp => acetyl-coa + amp + pyro... \n",
"R-DRE-71384 Ethanol oxidation \n",
"\n",
" data_type observed source_id \n",
"entity_id \n",
"ENSDARG00000037781 genes True F1QAA7 \n",
"456215 compounds False F1QAA7 \n",
"33019 compounds False F1QAA7 \n",
"57288 compounds False F1QAA7 \n",
"57287 compounds False F1QAA7 \n",
"30616 compounds False F1QAA7 \n",
"15366 compounds False F1QAA7 \n",
"R-DRE-71735 reactions None F1QAA7 \n",
"R-DRE-71384 pathways None F1QAA7 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = 'F1QAA7'\n",
"m.get_connected(query_id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find compounds in the data connected to protein 'F1QAA7'"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>456215</th>\n",
" <td>Adenosine 5-monophosphate(2-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33019</th>\n",
" <td>Diphosphate(3-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57288</th>\n",
" <td>Acetyl-coa(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57287</th>\n",
" <td>Coenzyme a(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30616</th>\n",
" <td>Atp(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15366</th>\n",
" <td>Acetic acid</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>F1QAA7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed source_id\n",
"entity_id \n",
"456215 Adenosine 5-monophosphate(2-) compounds False F1QAA7\n",
"33019 Diphosphate(3-) compounds False F1QAA7\n",
"57288 Acetyl-coa(4-) compounds False F1QAA7\n",
"57287 Coenzyme a(4-) compounds False F1QAA7\n",
"30616 Atp(4-) compounds False F1QAA7\n",
"15366 Acetic acid compounds False F1QAA7"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = 'F1QAA7'\n",
"m.get_connected(query_id, dest_type=COMPOUNDS)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find observed genes and proteins connected to compound '33019'"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ENSDARG00000058162</th>\n",
" <td>Pcyt1ba</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000011233</th>\n",
" <td>Pcyt1aa</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000004517</th>\n",
" <td>Ppat</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000039934</th>\n",
" <td>Hlcs</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000061994</th>\n",
" <td>Acacb</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B0V0X1</th>\n",
" <td>B0V0X1</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B8JLW8</th>\n",
" <td>B8JLW8</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F1QYS7</th>\n",
" <td>F1QYS7</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B0S5C4</th>\n",
" <td>B0S5C4</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q802U9</th>\n",
" <td>Q802U9</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>33019</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>87 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed source_id\n",
"entity_id \n",
"ENSDARG00000058162 Pcyt1ba genes True 33019\n",
"ENSDARG00000011233 Pcyt1aa genes True 33019\n",
"ENSDARG00000004517 Ppat genes True 33019\n",
"ENSDARG00000039934 Hlcs genes True 33019\n",
"ENSDARG00000061994 Acacb genes True 33019\n",
"... ... ... ... ...\n",
"B0V0X1 B0V0X1 proteins True 33019\n",
"B8JLW8 B8JLW8 proteins True 33019\n",
"F1QYS7 F1QYS7 proteins True 33019\n",
"B0S5C4 B0S5C4 proteins True 33019\n",
"Q802U9 Q802U9 proteins True 33019\n",
"\n",
"[87 rows x 4 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = '33019'\n",
"genes = m.get_connected(query_id, dest_type=[GENES, PROTEINS], observed=True)\n",
"genes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find pathways connected to genes 'ENSDARG00000087927'"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>R-DRE-2393930</th>\n",
" <td>Phosphate bond hydrolysis by nudt proteins</td>\n",
" <td>pathways</td>\n",
" <td>None</td>\n",
" <td>ENSDARG00000087927</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed \\\n",
"entity_id \n",
"R-DRE-2393930 Phosphate bond hydrolysis by nudt proteins pathways None \n",
"\n",
" source_id \n",
"entity_id \n",
"R-DRE-2393930 ENSDARG00000087927 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = 'ENSDARG00000087927'\n",
"m.get_connected(query_id, dest_type=PATHWAYS)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find genes, proteins, compounds and pathways connected to reaction 'R-DRE-2395818'"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ENSDARG00000030573</th>\n",
" <td>Nudt1</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q7ZWC3</th>\n",
" <td>Q7ZWC3</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15377</th>\n",
" <td>Water</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18420</th>\n",
" <td>Magnesium(2+)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63212</th>\n",
" <td>2-hydroxy-damp(2-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77897</th>\n",
" <td>2-hydroxy-datp(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15378</th>\n",
" <td>Hydron</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33019</th>\n",
" <td>Diphosphate(3-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R-DRE-2393930</th>\n",
" <td>Phosphate bond hydrolysis by nudt proteins</td>\n",
" <td>pathways</td>\n",
" <td>None</td>\n",
" <td>R-DRE-2395818</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name data_type \\\n",
"entity_id \n",
"ENSDARG00000030573 Nudt1 genes \n",
"Q7ZWC3 Q7ZWC3 proteins \n",
"15377 Water compounds \n",
"18420 Magnesium(2+) compounds \n",
"63212 2-hydroxy-damp(2-) compounds \n",
"77897 2-hydroxy-datp(4-) compounds \n",
"15378 Hydron compounds \n",
"33019 Diphosphate(3-) compounds \n",
"R-DRE-2393930 Phosphate bond hydrolysis by nudt proteins pathways \n",
"\n",
" observed source_id \n",
"entity_id \n",
"ENSDARG00000030573 True R-DRE-2395818 \n",
"Q7ZWC3 False R-DRE-2395818 \n",
"15377 False R-DRE-2395818 \n",
"18420 False R-DRE-2395818 \n",
"63212 False R-DRE-2395818 \n",
"77897 False R-DRE-2395818 \n",
"15378 False R-DRE-2395818 \n",
"33019 False R-DRE-2395818 \n",
"R-DRE-2393930 None R-DRE-2395818 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m.get_connected('R-DRE-2395818')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Find observed genes, proteins, compounds and reactions involved in the pathway 'R-DRE-2393930'"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ENSDARG00000030573</th>\n",
" <td>Nudt1</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000019503</th>\n",
" <td>Zgc:103759</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000078073</th>\n",
" <td>Nudt5</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000087927</th>\n",
" <td>Nudt9</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000041576</th>\n",
" <td>Nudt18</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ENSDARG00000026090</th>\n",
" <td>Adprm</td>\n",
" <td>genes</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q6IQ66</th>\n",
" <td>Q6IQ66</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F1QL34</th>\n",
" <td>F1QL34</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>R-DRE-2393930</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed source_id\n",
"entity_id \n",
"ENSDARG00000030573 Nudt1 genes True R-DRE-2393930\n",
"ENSDARG00000019503 Zgc:103759 genes True R-DRE-2393930\n",
"ENSDARG00000078073 Nudt5 genes True R-DRE-2393930\n",
"ENSDARG00000087927 Nudt9 genes True R-DRE-2393930\n",
"ENSDARG00000041576 Nudt18 genes True R-DRE-2393930\n",
"ENSDARG00000026090 Adprm genes True R-DRE-2393930\n",
"Q6IQ66 Q6IQ66 proteins True R-DRE-2393930\n",
"F1QL34 F1QL34 proteins True R-DRE-2393930"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m.get_connected('R-DRE-2393930', dest_type=[GENES, PROTEINS, COMPOUNDS], observed=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}