pyMultiOmics / Git / Diff of /notebooks/mapping

Models:
AlyssaS/
pyMultiOmics
Downloads: 1
Diff of /notebooks/mapping_covid.ipynb [000000] .. [7d5693]
Switch to side-by-side view

--- a
+++ b/notebooks/mapping_covid.ipynb
@@ -0,0 +1,1795 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, sys"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pylab as plt\n",
+    "import matplotlib\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-03-24 14:40:01.676 | WARNING  | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n",
+      "2022-03-24 14:40:01.676 | WARNING  | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n",
+      "2022-03-24 14:40:01.677 | INFO     | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n"
+     ]
+    }
+   ],
+   "source": [
+    "sys.path.append('..')\n",
+    "\n",
+    "from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n",
+    "from pyMultiOmics.constants import HOMO_SAPIENS, PROTEINS, COMPOUNDS, REACTIONS\n",
+    "from pyMultiOmics.mapping import Mapper\n",
+    "from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Demonstration of pyMultiOmics mapping"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load the processed Covid data from [1]\n",
+    "\n",
+    "[1] [Shen, Bo, et al. \"Proteomic and metabolomic characterization of COVID-19 patient sera.\" Cell 182.1 (2020): 59-72.](https://www.sciencedirect.com/science/article/pii/S0092867420306279?casa_token=wKwWn9P4MK0AAAAA:v8z5MVnQ1ONrcatncCsigSDoxeOq2ZOcN4da9SofGDcpeDqrC76QK8yNKrKtVUrMWBBAntI8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-03-24 14:40:02.949 | INFO     | pyMultiOmics.common:download_file:59 - Downloading covid19_dualomics_data.zip\n",
+      "551KB [00:00, 8.89kKB/s]                                                                                                \n",
+      "2022-03-24 14:40:03.061 | INFO     | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_dualomics_data.zip\n",
+      "100%|█████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 1191.03it/s]\n",
+      "2022-03-24 14:40:03.075 | INFO     | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_dualomics_data.zip\n"
+     ]
+    }
+   ],
+   "source": [
+    "url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_dualomics_data.zip'\n",
+    "out_file = download_file(url)\n",
+    "extract_zip_file(out_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_dualomics_data'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "DATA_FOLDER = os.path.abspath(os.path.join('covid19_dualomics_data'))\n",
+    "DATA_FOLDER"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read proteomics data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Identifier')\n",
+    "protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>h_F1_131N</th>\n",
+       "      <th>h_F1_131C</th>\n",
+       "      <th>h_F1_132C</th>\n",
+       "      <th>h_F2_131N</th>\n",
+       "      <th>h_F2_131C</th>\n",
+       "      <th>h_F2_132C</th>\n",
+       "      <th>h_F3_131N</th>\n",
+       "      <th>h_F3_131C</th>\n",
+       "      <th>h_F3_132C</th>\n",
+       "      <th>h_F4_131N</th>\n",
+       "      <th>...</th>\n",
+       "      <th>s_F3_128N</th>\n",
+       "      <th>s_F3_128C</th>\n",
+       "      <th>s_F3_129C</th>\n",
+       "      <th>s_F4_128N</th>\n",
+       "      <th>s_F4_128C</th>\n",
+       "      <th>s_F5_128N</th>\n",
+       "      <th>s_F5_128C</th>\n",
+       "      <th>s_F6_128N</th>\n",
+       "      <th>s_F6_128C</th>\n",
+       "      <th>s_F6_133N</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Identifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>P04114</th>\n",
+       "      <td>0.750</td>\n",
+       "      <td>0.853</td>\n",
+       "      <td>0.822</td>\n",
+       "      <td>1.191</td>\n",
+       "      <td>1.175</td>\n",
+       "      <td>1.078</td>\n",
+       "      <td>0.693</td>\n",
+       "      <td>0.947</td>\n",
+       "      <td>0.931</td>\n",
+       "      <td>1.057</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.044</td>\n",
+       "      <td>1.305</td>\n",
+       "      <td>1.657</td>\n",
+       "      <td>1.323</td>\n",
+       "      <td>1.624</td>\n",
+       "      <td>1.170</td>\n",
+       "      <td>0.981</td>\n",
+       "      <td>0.791</td>\n",
+       "      <td>1.029</td>\n",
+       "      <td>1.195</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P01024</th>\n",
+       "      <td>0.782</td>\n",
+       "      <td>1.057</td>\n",
+       "      <td>0.994</td>\n",
+       "      <td>0.864</td>\n",
+       "      <td>0.917</td>\n",
+       "      <td>0.790</td>\n",
+       "      <td>0.823</td>\n",
+       "      <td>1.152</td>\n",
+       "      <td>0.816</td>\n",
+       "      <td>0.920</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.100</td>\n",
+       "      <td>0.986</td>\n",
+       "      <td>1.114</td>\n",
+       "      <td>1.210</td>\n",
+       "      <td>1.289</td>\n",
+       "      <td>1.104</td>\n",
+       "      <td>1.111</td>\n",
+       "      <td>1.007</td>\n",
+       "      <td>1.159</td>\n",
+       "      <td>0.979</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P02768</th>\n",
+       "      <td>1.183</td>\n",
+       "      <td>1.101</td>\n",
+       "      <td>1.045</td>\n",
+       "      <td>1.086</td>\n",
+       "      <td>1.041</td>\n",
+       "      <td>1.187</td>\n",
+       "      <td>1.234</td>\n",
+       "      <td>1.079</td>\n",
+       "      <td>1.011</td>\n",
+       "      <td>1.099</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.786</td>\n",
+       "      <td>0.706</td>\n",
+       "      <td>0.947</td>\n",
+       "      <td>0.831</td>\n",
+       "      <td>0.717</td>\n",
+       "      <td>0.795</td>\n",
+       "      <td>0.776</td>\n",
+       "      <td>0.938</td>\n",
+       "      <td>0.903</td>\n",
+       "      <td>0.743</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P01023</th>\n",
+       "      <td>1.066</td>\n",
+       "      <td>1.278</td>\n",
+       "      <td>0.959</td>\n",
+       "      <td>0.811</td>\n",
+       "      <td>0.789</td>\n",
+       "      <td>0.931</td>\n",
+       "      <td>0.971</td>\n",
+       "      <td>0.769</td>\n",
+       "      <td>1.011</td>\n",
+       "      <td>0.866</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.817</td>\n",
+       "      <td>0.728</td>\n",
+       "      <td>0.861</td>\n",
+       "      <td>0.798</td>\n",
+       "      <td>0.751</td>\n",
+       "      <td>0.917</td>\n",
+       "      <td>0.809</td>\n",
+       "      <td>0.780</td>\n",
+       "      <td>1.195</td>\n",
+       "      <td>0.706</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P02751</th>\n",
+       "      <td>1.085</td>\n",
+       "      <td>0.947</td>\n",
+       "      <td>0.993</td>\n",
+       "      <td>1.343</td>\n",
+       "      <td>1.130</td>\n",
+       "      <td>0.778</td>\n",
+       "      <td>0.731</td>\n",
+       "      <td>1.084</td>\n",
+       "      <td>1.107</td>\n",
+       "      <td>0.909</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.566</td>\n",
+       "      <td>0.854</td>\n",
+       "      <td>1.109</td>\n",
+       "      <td>0.630</td>\n",
+       "      <td>0.850</td>\n",
+       "      <td>0.661</td>\n",
+       "      <td>0.848</td>\n",
+       "      <td>0.829</td>\n",
+       "      <td>0.760</td>\n",
+       "      <td>0.811</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 70 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            h_F1_131N  h_F1_131C  h_F1_132C  h_F2_131N  h_F2_131C  h_F2_132C  \\\n",
+       "Identifier                                                                     \n",
+       "P04114          0.750      0.853      0.822      1.191      1.175      1.078   \n",
+       "P01024          0.782      1.057      0.994      0.864      0.917      0.790   \n",
+       "P02768          1.183      1.101      1.045      1.086      1.041      1.187   \n",
+       "P01023          1.066      1.278      0.959      0.811      0.789      0.931   \n",
+       "P02751          1.085      0.947      0.993      1.343      1.130      0.778   \n",
+       "\n",
+       "            h_F3_131N  h_F3_131C  h_F3_132C  h_F4_131N  ...  s_F3_128N  \\\n",
+       "Identifier                                              ...              \n",
+       "P04114          0.693      0.947      0.931      1.057  ...      1.044   \n",
+       "P01024          0.823      1.152      0.816      0.920  ...      1.100   \n",
+       "P02768          1.234      1.079      1.011      1.099  ...      0.786   \n",
+       "P01023          0.971      0.769      1.011      0.866  ...      0.817   \n",
+       "P02751          0.731      1.084      1.107      0.909  ...      0.566   \n",
+       "\n",
+       "            s_F3_128C  s_F3_129C  s_F4_128N  s_F4_128C  s_F5_128N  s_F5_128C  \\\n",
+       "Identifier                                                                     \n",
+       "P04114          1.305      1.657      1.323      1.624      1.170      0.981   \n",
+       "P01024          0.986      1.114      1.210      1.289      1.104      1.111   \n",
+       "P02768          0.706      0.947      0.831      0.717      0.795      0.776   \n",
+       "P01023          0.728      0.861      0.798      0.751      0.917      0.809   \n",
+       "P02751          0.854      1.109      0.630      0.850      0.661      0.848   \n",
+       "\n",
+       "            s_F6_128N  s_F6_128C  s_F6_133N  \n",
+       "Identifier                                   \n",
+       "P04114          0.791      1.029      1.195  \n",
+       "P01024          1.007      1.159      0.979  \n",
+       "P02768          0.938      0.903      0.743  \n",
+       "P01023          0.780      1.195      0.706  \n",
+       "P02751          0.829      0.760      0.811  \n",
+       "\n",
+       "[5 rows x 70 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "protein_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>group</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sample</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>h_F1_131N</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_F1_131C</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_F1_132C</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_F2_131N</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_F2_131C</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_F5_128N</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_F5_128C</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_F6_128N</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_F6_128C</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_F6_133N</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>70 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             group\n",
+       "sample            \n",
+       "h_F1_131N  healthy\n",
+       "h_F1_131C  healthy\n",
+       "h_F1_132C  healthy\n",
+       "h_F2_131N  healthy\n",
+       "h_F2_131C  healthy\n",
+       "...            ...\n",
+       "s_F5_128N   severe\n",
+       "s_F5_128C   severe\n",
+       "s_F6_128N   severe\n",
+       "s_F6_128C   severe\n",
+       "s_F6_133N   severe\n",
+       "\n",
+       "[70 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "protein_design"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read metabolomics data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data.csv'), index_col='Identifier')\n",
+    "compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>h_jkdz1</th>\n",
+       "      <th>h_jkdz2</th>\n",
+       "      <th>h_jkdz3</th>\n",
+       "      <th>h_jkdz4</th>\n",
+       "      <th>h_jkdz5</th>\n",
+       "      <th>h_jkdz6</th>\n",
+       "      <th>h_jkdz7</th>\n",
+       "      <th>h_jkdz8</th>\n",
+       "      <th>h_jkdz9</th>\n",
+       "      <th>h_jkdz10</th>\n",
+       "      <th>...</th>\n",
+       "      <th>s_ZX12</th>\n",
+       "      <th>s_ZX13</th>\n",
+       "      <th>s_ZX14</th>\n",
+       "      <th>s_ZX15</th>\n",
+       "      <th>s_ZX16</th>\n",
+       "      <th>s_ZX17</th>\n",
+       "      <th>s_ZX18</th>\n",
+       "      <th>s_ZX19</th>\n",
+       "      <th>s_ZX20</th>\n",
+       "      <th>s_ZX21</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Identifier</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>C21482</th>\n",
+       "      <td>19413052.00</td>\n",
+       "      <td>6.381812e+06</td>\n",
+       "      <td>9.748316e+06</td>\n",
+       "      <td>5.326872e+06</td>\n",
+       "      <td>1.998072e+07</td>\n",
+       "      <td>3.580375e+06</td>\n",
+       "      <td>8.256121e+06</td>\n",
+       "      <td>8.079382e+06</td>\n",
+       "      <td>1.559659e+07</td>\n",
+       "      <td>1.520363e+07</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.904349e+06</td>\n",
+       "      <td>3.226016e+06</td>\n",
+       "      <td>7.378147e+05</td>\n",
+       "      <td>2.817698e+06</td>\n",
+       "      <td>3.329101e+06</td>\n",
+       "      <td>3206752.75</td>\n",
+       "      <td>1.466174e+06</td>\n",
+       "      <td>2.779301e+06</td>\n",
+       "      <td>2.117668e+06</td>\n",
+       "      <td>2.184310e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C18218</th>\n",
+       "      <td>2711915.25</td>\n",
+       "      <td>2.056393e+06</td>\n",
+       "      <td>1.445594e+06</td>\n",
+       "      <td>2.038765e+06</td>\n",
+       "      <td>2.536996e+06</td>\n",
+       "      <td>2.638198e+06</td>\n",
+       "      <td>2.285757e+06</td>\n",
+       "      <td>1.973140e+06</td>\n",
+       "      <td>2.015425e+06</td>\n",
+       "      <td>2.290842e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.409720e+06</td>\n",
+       "      <td>1.413307e+06</td>\n",
+       "      <td>3.218834e+06</td>\n",
+       "      <td>1.602131e+06</td>\n",
+       "      <td>1.317878e+06</td>\n",
+       "      <td>2930312.75</td>\n",
+       "      <td>1.168094e+06</td>\n",
+       "      <td>2.946776e+06</td>\n",
+       "      <td>1.417311e+06</td>\n",
+       "      <td>1.474166e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C05127</th>\n",
+       "      <td>87727.25</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.238706e+04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.597879e+05</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.055130e+04</td>\n",
+       "      <td>1.214114e+05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.382788e+05</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C01152</th>\n",
+       "      <td>58832828.00</td>\n",
+       "      <td>5.843934e+07</td>\n",
+       "      <td>5.552133e+07</td>\n",
+       "      <td>4.516214e+07</td>\n",
+       "      <td>5.478952e+07</td>\n",
+       "      <td>3.941259e+07</td>\n",
+       "      <td>2.987876e+07</td>\n",
+       "      <td>6.751726e+07</td>\n",
+       "      <td>4.666031e+07</td>\n",
+       "      <td>9.118524e+07</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.881314e+07</td>\n",
+       "      <td>3.164358e+07</td>\n",
+       "      <td>2.538767e+07</td>\n",
+       "      <td>3.307604e+07</td>\n",
+       "      <td>3.915698e+07</td>\n",
+       "      <td>24400592.00</td>\n",
+       "      <td>2.593375e+07</td>\n",
+       "      <td>6.413868e+07</td>\n",
+       "      <td>4.020588e+07</td>\n",
+       "      <td>4.904488e+07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C02918</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.815549e+05</td>\n",
+       "      <td>2.240392e+05</td>\n",
+       "      <td>1.609397e+05</td>\n",
+       "      <td>3.206194e+05</td>\n",
+       "      <td>7.176557e+05</td>\n",
+       "      <td>3.268182e+05</td>\n",
+       "      <td>5.135810e+05</td>\n",
+       "      <td>2.734582e+05</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.337245e+05</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.347152e+05</td>\n",
+       "      <td>3.532118e+04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>655827.25</td>\n",
+       "      <td>8.359706e+05</td>\n",
+       "      <td>4.034381e+06</td>\n",
+       "      <td>2.839358e+05</td>\n",
+       "      <td>8.062160e+04</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 96 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                h_jkdz1       h_jkdz2       h_jkdz3       h_jkdz4  \\\n",
+       "Identifier                                                          \n",
+       "C21482      19413052.00  6.381812e+06  9.748316e+06  5.326872e+06   \n",
+       "C18218       2711915.25  2.056393e+06  1.445594e+06  2.038765e+06   \n",
+       "C05127         87727.25           NaN  9.238706e+04           NaN   \n",
+       "C01152      58832828.00  5.843934e+07  5.552133e+07  4.516214e+07   \n",
+       "C02918              NaN  1.815549e+05  2.240392e+05  1.609397e+05   \n",
+       "\n",
+       "                 h_jkdz5       h_jkdz6       h_jkdz7       h_jkdz8  \\\n",
+       "Identifier                                                           \n",
+       "C21482      1.998072e+07  3.580375e+06  8.256121e+06  8.079382e+06   \n",
+       "C18218      2.536996e+06  2.638198e+06  2.285757e+06  1.973140e+06   \n",
+       "C05127      1.597879e+05           NaN           NaN           NaN   \n",
+       "C01152      5.478952e+07  3.941259e+07  2.987876e+07  6.751726e+07   \n",
+       "C02918      3.206194e+05  7.176557e+05  3.268182e+05  5.135810e+05   \n",
+       "\n",
+       "                 h_jkdz9      h_jkdz10  ...        s_ZX12        s_ZX13  \\\n",
+       "Identifier                              ...                               \n",
+       "C21482      1.559659e+07  1.520363e+07  ...  1.904349e+06  3.226016e+06   \n",
+       "C18218      2.015425e+06  2.290842e+06  ...  1.409720e+06  1.413307e+06   \n",
+       "C05127      9.055130e+04  1.214114e+05  ...           NaN           NaN   \n",
+       "C01152      4.666031e+07  9.118524e+07  ...  2.881314e+07  3.164358e+07   \n",
+       "C02918      2.734582e+05           NaN  ...  3.337245e+05           NaN   \n",
+       "\n",
+       "                  s_ZX14        s_ZX15        s_ZX16       s_ZX17  \\\n",
+       "Identifier                                                          \n",
+       "C21482      7.378147e+05  2.817698e+06  3.329101e+06   3206752.75   \n",
+       "C18218      3.218834e+06  1.602131e+06  1.317878e+06   2930312.75   \n",
+       "C05127               NaN           NaN           NaN          NaN   \n",
+       "C01152      2.538767e+07  3.307604e+07  3.915698e+07  24400592.00   \n",
+       "C02918      4.347152e+05  3.532118e+04           NaN    655827.25   \n",
+       "\n",
+       "                  s_ZX18        s_ZX19        s_ZX20        s_ZX21  \n",
+       "Identifier                                                          \n",
+       "C21482      1.466174e+06  2.779301e+06  2.117668e+06  2.184310e+06  \n",
+       "C18218      1.168094e+06  2.946776e+06  1.417311e+06  1.474166e+06  \n",
+       "C05127               NaN  1.382788e+05           NaN           NaN  \n",
+       "C01152      2.593375e+07  6.413868e+07  4.020588e+07  4.904488e+07  \n",
+       "C02918      8.359706e+05  4.034381e+06  2.839358e+05  8.062160e+04  \n",
+       "\n",
+       "[5 rows x 96 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compound_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>group</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sample</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>h_jkdz1</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_jkdz2</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_jkdz3</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_jkdz4</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h_jkdz5</th>\n",
+       "      <td>healthy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_ZX17</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_ZX18</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_ZX19</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_ZX20</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>s_ZX21</th>\n",
+       "      <td>severe</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>96 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           group\n",
+       "sample          \n",
+       "h_jkdz1  healthy\n",
+       "h_jkdz2  healthy\n",
+       "h_jkdz3  healthy\n",
+       "h_jkdz4  healthy\n",
+       "h_jkdz5  healthy\n",
+       "...          ...\n",
+       "s_ZX17    severe\n",
+       "s_ZX18    severe\n",
+       "s_ZX19    severe\n",
+       "s_ZX20    severe\n",
+       "s_ZX21    severe\n",
+       "\n",
+       "[96 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compound_design"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "set_log_level_info()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create multi-omics container object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "proteins data with (791, 70) measurements"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n",
+    "protein_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "compounds data with (220, 96) measurements"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n",
+    "compound_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "publication = 'Proteomic and Metabolomic Characterization of COVID-19 Patient Sera'\n",
+    "url = 'https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Multi-omics data container\n",
+       "- publication: Proteomic and Metabolomic Characterization of COVID-19 Patient Sera\n",
+       "- URL: https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9\n",
+       "- Views: 2 modalities\n",
+       "\t - proteins data with (791, 70) measurements\n",
+       "\t - compounds data with (220, 96) measurements"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mo = MultiOmicsData(publication=publication, url=url)\n",
+    "mo.add_data([protein_data, compound_data])\n",
+    "mo"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a mapping object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The mapping object uses Reactome to map the different biological entities in the data:\n",
+    "- Transcripts (or genes) are connected to the proteins they encode\n",
+    "- Proteins and compounds are connected to reactions they're involved in\n",
+    "- Reactions are connected to pathways"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-03-24 14:40:03.534 | INFO     | pyMultiOmics.functions:remove_dupes:385 - Removing 9 rows with duplicate identifiers\n",
+      "2022-03-24 14:40:03.535 | INFO     | pyMultiOmics.functions:reactome_mapping:78 - There are 211 observed compound ids\n",
+      "2022-03-24 14:40:03.535 | INFO     | pyMultiOmics.functions:reactome_mapping:81 - Mapping genes -> proteins\n",
+      "2022-03-24 14:40:06.215 | INFO     | pyMultiOmics.functions:reactome_mapping:86 - Mapping proteins -> reactions\n",
+      "2022-03-24 14:40:07.489 | INFO     | pyMultiOmics.functions:reactome_mapping:94 - Mapping compounds -> reactions\n",
+      "2022-03-24 14:40:11.539 | INFO     | pyMultiOmics.functions:reactome_mapping:100 - Mapping reactions -> pathways\n",
+      "2022-03-24 14:40:12.580 | INFO     | pyMultiOmics.functions:reactome_mapping:111 - Mapping reactions -> proteins\n",
+      "2022-03-24 14:40:18.631 | INFO     | pyMultiOmics.functions:reactome_mapping:118 - Mapping reactions -> compounds\n",
+      "2022-03-24 14:40:21.469 | INFO     | pyMultiOmics.functions:reactome_mapping:130 - Mapping proteins -> genes\n",
+      "2022-03-24 14:40:36.997 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: genes\n",
+      "2022-03-24 14:40:37.211 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: proteins\n",
+      "2022-03-24 14:40:37.901 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: compounds\n",
+      "2022-03-24 14:40:37.981 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: reactions\n",
+      "2022-03-24 14:40:38.092 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: pathways\n",
+      "2022-03-24 14:40:38.120 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: gene_proteins\n",
+      "2022-03-24 14:40:38.287 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: protein_reactions\n",
+      "2022-03-24 14:40:39.634 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: compound_reactions\n",
+      "2022-03-24 14:40:39.880 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: reaction_pathways\n",
+      "2022-03-24 14:40:40.005 | INFO     | pyMultiOmics.mapping:build:51 - Created a multi-omics network with 19645 nodes and 80442 edges\n",
+      "2022-03-24 14:40:40.499 | INFO     | pyMultiOmics.mapping:build:53 - node_counts = {'genes': 7054, 'proteins': 6590, 'compounds': 1078, 'reactions': 3926, 'pathways': 997}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<pyMultiOmics.mapping.Mapper at 0x1762619f0>"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m = Mapper(mo, HOMO_SAPIENS, metabolic_pathway_only=False)\n",
+    "m.build()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<pyMultiOmics.mapping.Mapper at 0x1762619f0>"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Query mapping object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below shows some example queries we can perform with the mapping object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>reaction_id</th>\n",
+       "      <th>reaction_name</th>\n",
+       "      <th>num_proteins</th>\n",
+       "      <th>num_compounds</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>R-HSA-114552</td>\n",
+       "      <td>Thrombin-activated pars activate g12/13</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>R-HSA-114558</td>\n",
+       "      <td>Thrombin-activated pars activate gq</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>R-HSA-1214188</td>\n",
+       "      <td>Prdm9 trimethylates histone h3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>R-HSA-1605591</td>\n",
+       "      <td>Glucosylceramidase cleaves the glucosidic bond...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>R-HSA-163432</td>\n",
+       "      <td>Cholesterol ester + h2o -&gt; cholesterol + fatty...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>223</th>\n",
+       "      <td>R-HSA-9710490</td>\n",
+       "      <td>The gsdme gene promoter is hypermethylated</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>224</th>\n",
+       "      <td>R-HSA-9733545</td>\n",
+       "      <td>Bile salts and acids bind alb</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>225</th>\n",
+       "      <td>R-HSA-9733960</td>\n",
+       "      <td>Bile salts and acids dissociate from alb</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>226</th>\n",
+       "      <td>R-HSA-977071</td>\n",
+       "      <td>Sialyltransferase i can add sialic acid to the...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>227</th>\n",
+       "      <td>R-HSA-977228</td>\n",
+       "      <td>Sialyltransferase i can add sialic acid to the...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>228 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       reaction_id                                      reaction_name  \\\n",
+       "0     R-HSA-114552            Thrombin-activated pars activate g12/13   \n",
+       "1     R-HSA-114558                Thrombin-activated pars activate gq   \n",
+       "2    R-HSA-1214188                     Prdm9 trimethylates histone h3   \n",
+       "3    R-HSA-1605591  Glucosylceramidase cleaves the glucosidic bond...   \n",
+       "4     R-HSA-163432  Cholesterol ester + h2o -> cholesterol + fatty...   \n",
+       "..             ...                                                ...   \n",
+       "223  R-HSA-9710490         The gsdme gene promoter is hypermethylated   \n",
+       "224  R-HSA-9733545                      Bile salts and acids bind alb   \n",
+       "225  R-HSA-9733960           Bile salts and acids dissociate from alb   \n",
+       "226   R-HSA-977071  Sialyltransferase i can add sialic acid to the...   \n",
+       "227   R-HSA-977228  Sialyltransferase i can add sialic acid to the...   \n",
+       "\n",
+       "     num_proteins  num_compounds  \n",
+       "0               1              1  \n",
+       "1               1              1  \n",
+       "2               3              1  \n",
+       "3               1              1  \n",
+       "4               1              1  \n",
+       "..            ...            ...  \n",
+       "223             3              1  \n",
+       "224             1              5  \n",
+       "225             1              5  \n",
+       "226             1              1  \n",
+       "227             1              1  \n",
+       "\n",
+       "[228 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "reactions = m.get_nodes(types=REACTIONS)\n",
+    "\n",
+    "data = []\n",
+    "for reaction_id, reaction_data in reactions:\n",
+    "    reaction_name = reaction_data['display_name']\n",
+    "    proteins = m.get_connected(reaction_id, dest_type=PROTEINS, observed=True)\n",
+    "    compounds = m.get_connected(reaction_id, dest_type=COMPOUNDS, observed=True)\n",
+    "    \n",
+    "    if len(proteins) > 0 and len(compounds) > 0:\n",
+    "        row = [reaction_id, reaction_name, len(proteins), len(compounds)]\n",
+    "        data.append(row)\n",
+    "\n",
+    "df = pd.DataFrame(data, columns=['reaction_id', 'reaction_name', 'num_proteins', 'num_compounds'])\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### List all entities connected to reaction R-HSA-194153"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>display_name</th>\n",
+       "      <th>data_type</th>\n",
+       "      <th>observed</th>\n",
+       "      <th>source_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>entity_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>ENSG00000108846</th>\n",
+       "      <td>Abcc3</td>\n",
+       "      <td>genes</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>O15438</th>\n",
+       "      <td>O15438</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28865</th>\n",
+       "      <td>Taurocholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30616</th>\n",
+       "      <td>Atp(4-)</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17687</th>\n",
+       "      <td>Glycocholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36274</th>\n",
+       "      <td>Glycochenodeoxycholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16525</th>\n",
+       "      <td>Taurochenodeoxycholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43474</th>\n",
+       "      <td>Hydrogenphosphate</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>456216</th>\n",
+       "      <td>Adp(3-)</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16359</th>\n",
+       "      <td>Cholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9407</th>\n",
+       "      <td>Taurochenodeoxycholate</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16755</th>\n",
+       "      <td>Chenodeoxycholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>R-HSA-159418</th>\n",
+       "      <td>Recycling of bile acids and salts</td>\n",
+       "      <td>pathways</td>\n",
+       "      <td>None</td>\n",
+       "      <td>R-HSA-194153</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                      display_name  data_type observed  \\\n",
+       "entity_id                                                                \n",
+       "ENSG00000108846                              Abcc3      genes    False   \n",
+       "O15438                                      O15438   proteins    False   \n",
+       "28865                             Taurocholic acid  compounds     True   \n",
+       "30616                                      Atp(4-)  compounds    False   \n",
+       "17687                             Glycocholic acid  compounds    False   \n",
+       "36274                   Glycochenodeoxycholic acid  compounds     True   \n",
+       "16525                   Taurochenodeoxycholic acid  compounds    False   \n",
+       "43474                            Hydrogenphosphate  compounds    False   \n",
+       "456216                                     Adp(3-)  compounds    False   \n",
+       "16359                                  Cholic acid  compounds     True   \n",
+       "9407                        Taurochenodeoxycholate  compounds     True   \n",
+       "16755                        Chenodeoxycholic acid  compounds     True   \n",
+       "R-HSA-159418     Recycling of bile acids and salts   pathways     None   \n",
+       "\n",
+       "                    source_id  \n",
+       "entity_id                      \n",
+       "ENSG00000108846  R-HSA-194153  \n",
+       "O15438           R-HSA-194153  \n",
+       "28865            R-HSA-194153  \n",
+       "30616            R-HSA-194153  \n",
+       "17687            R-HSA-194153  \n",
+       "36274            R-HSA-194153  \n",
+       "16525            R-HSA-194153  \n",
+       "43474            R-HSA-194153  \n",
+       "456216           R-HSA-194153  \n",
+       "16359            R-HSA-194153  \n",
+       "9407             R-HSA-194153  \n",
+       "16755            R-HSA-194153  \n",
+       "R-HSA-159418     R-HSA-194153  "
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_id = 'R-HSA-194153'\n",
+    "m.get_connected(query_id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Query the connections between proteins and compounds (through their shared reactions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>display_name</th>\n",
+       "      <th>data_type</th>\n",
+       "      <th>observed</th>\n",
+       "      <th>source_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>entity_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>18421</th>\n",
+       "      <td>Superoxide</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17033</th>\n",
+       "      <td>Biliverdin</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16990</th>\n",
+       "      <td>Bilirubin ixalpha</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17627</th>\n",
+       "      <td>Ferroheme b</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36144</th>\n",
+       "      <td>Ferriheme b</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>456216</th>\n",
+       "      <td>Adp(3-)</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30616</th>\n",
+       "      <td>Atp(4-)</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36274</th>\n",
+       "      <td>Glycochenodeoxycholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16755</th>\n",
+       "      <td>Chenodeoxycholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16359</th>\n",
+       "      <td>Cholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28865</th>\n",
+       "      <td>Taurocholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17687</th>\n",
+       "      <td>Glycocholic acid</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9407</th>\n",
+       "      <td>Taurochenodeoxycholate</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>True</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17504</th>\n",
+       "      <td>1-o-acyl-sn-glycero-3-phosphocholine(1+)</td>\n",
+       "      <td>compounds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>P02768</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                       display_name  data_type  observed  \\\n",
+       "entity_id                                                                  \n",
+       "18421                                    Superoxide  compounds     False   \n",
+       "17033                                    Biliverdin  compounds      True   \n",
+       "16990                             Bilirubin ixalpha  compounds      True   \n",
+       "17627                                   Ferroheme b  compounds     False   \n",
+       "36144                                   Ferriheme b  compounds     False   \n",
+       "456216                                      Adp(3-)  compounds     False   \n",
+       "30616                                       Atp(4-)  compounds     False   \n",
+       "36274                    Glycochenodeoxycholic acid  compounds      True   \n",
+       "16755                         Chenodeoxycholic acid  compounds      True   \n",
+       "16359                                   Cholic acid  compounds      True   \n",
+       "28865                              Taurocholic acid  compounds      True   \n",
+       "17687                              Glycocholic acid  compounds     False   \n",
+       "9407                         Taurochenodeoxycholate  compounds      True   \n",
+       "17504      1-o-acyl-sn-glycero-3-phosphocholine(1+)  compounds     False   \n",
+       "\n",
+       "          source_id  \n",
+       "entity_id            \n",
+       "18421        P02768  \n",
+       "17033        P02768  \n",
+       "16990        P02768  \n",
+       "17627        P02768  \n",
+       "36144        P02768  \n",
+       "456216       P02768  \n",
+       "30616        P02768  \n",
+       "36274        P02768  \n",
+       "16755        P02768  \n",
+       "16359        P02768  \n",
+       "28865        P02768  \n",
+       "17687        P02768  \n",
+       "9407         P02768  \n",
+       "17504        P02768  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_id = 'P02768'\n",
+    "m.get_connected(query_id, dest_type=COMPOUNDS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>display_name</th>\n",
+       "      <th>data_type</th>\n",
+       "      <th>observed</th>\n",
+       "      <th>source_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>entity_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Q9NPD5</th>\n",
+       "      <td>Q9NPD5</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P33527</th>\n",
+       "      <td>P33527</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Q9BYK8</th>\n",
+       "      <td>Q9BYK8</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Q96RS0</th>\n",
+       "      <td>Q96RS0</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Q92793</th>\n",
+       "      <td>Q92793</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P08047</th>\n",
+       "      <td>P08047</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P30043</th>\n",
+       "      <td>P30043</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>True</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>P53004</th>\n",
+       "      <td>P53004</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>O75182</th>\n",
+       "      <td>O75182</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Q96ST3</th>\n",
+       "      <td>Q96ST3</td>\n",
+       "      <td>proteins</td>\n",
+       "      <td>False</td>\n",
+       "      <td>16990</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>130 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          display_name data_type  observed source_id\n",
+       "entity_id                                           \n",
+       "Q9NPD5          Q9NPD5  proteins     False     16990\n",
+       "P33527          P33527  proteins     False     16990\n",
+       "Q9BYK8          Q9BYK8  proteins     False     16990\n",
+       "Q96RS0          Q96RS0  proteins     False     16990\n",
+       "Q92793          Q92793  proteins     False     16990\n",
+       "...                ...       ...       ...       ...\n",
+       "P08047          P08047  proteins     False     16990\n",
+       "P30043          P30043  proteins      True     16990\n",
+       "P53004          P53004  proteins     False     16990\n",
+       "O75182          O75182  proteins     False     16990\n",
+       "Q96ST3          Q96ST3  proteins     False     16990\n",
+       "\n",
+       "[130 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_id = '16990'\n",
+    "m.get_connected(query_id, dest_type=PROTEINS)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}