pyMultiOmics / Git / [7d5693] /notebooks/mapping

Models:
AlyssaS/
pyMultiOmics
Downloads: 1
[7d5693]: / notebooks / mapping_covid.ipynb
History
Download this file
1796 lines (1795 with data), 59.6 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pylab as plt\n",
    "import matplotlib\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-03-24 14:40:01.676 | WARNING  | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n",
      "2022-03-24 14:40:01.676 | WARNING  | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n",
      "2022-03-24 14:40:01.677 | INFO     | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n"
     ]
    }
   ],
   "source": [
    "sys.path.append('..')\n",
    "\n",
    "from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n",
    "from pyMultiOmics.constants import HOMO_SAPIENS, PROTEINS, COMPOUNDS, REACTIONS\n",
    "from pyMultiOmics.mapping import Mapper\n",
    "from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstration of pyMultiOmics mapping"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load the processed Covid data from [1]\n",
    "\n",
    "[1] [Shen, Bo, et al. \"Proteomic and metabolomic characterization of COVID-19 patient sera.\" Cell 182.1 (2020): 59-72.](https://www.sciencedirect.com/science/article/pii/S0092867420306279?casa_token=wKwWn9P4MK0AAAAA:v8z5MVnQ1ONrcatncCsigSDoxeOq2ZOcN4da9SofGDcpeDqrC76QK8yNKrKtVUrMWBBAntI8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-03-24 14:40:02.949 | INFO     | pyMultiOmics.common:download_file:59 - Downloading covid19_dualomics_data.zip\n",
      "551KB [00:00, 8.89kKB/s]                                                                                                \n",
      "2022-03-24 14:40:03.061 | INFO     | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_dualomics_data.zip\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 1191.03it/s]\n",
      "2022-03-24 14:40:03.075 | INFO     | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_dualomics_data.zip\n"
     ]
    }
   ],
   "source": [
    "url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_dualomics_data.zip'\n",
    "out_file = download_file(url)\n",
    "extract_zip_file(out_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_dualomics_data'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DATA_FOLDER = os.path.abspath(os.path.join('covid19_dualomics_data'))\n",
    "DATA_FOLDER"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read proteomics data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Identifier')\n",
    "protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>h_F1_131N</th>\n",
       "      <th>h_F1_131C</th>\n",
       "      <th>h_F1_132C</th>\n",
       "      <th>h_F2_131N</th>\n",
       "      <th>h_F2_131C</th>\n",
       "      <th>h_F2_132C</th>\n",
       "      <th>h_F3_131N</th>\n",
       "      <th>h_F3_131C</th>\n",
       "      <th>h_F3_132C</th>\n",
       "      <th>h_F4_131N</th>\n",
       "      <th>...</th>\n",
       "      <th>s_F3_128N</th>\n",
       "      <th>s_F3_128C</th>\n",
       "      <th>s_F3_129C</th>\n",
       "      <th>s_F4_128N</th>\n",
       "      <th>s_F4_128C</th>\n",
       "      <th>s_F5_128N</th>\n",
       "      <th>s_F5_128C</th>\n",
       "      <th>s_F6_128N</th>\n",
       "      <th>s_F6_128C</th>\n",
       "      <th>s_F6_133N</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Identifier</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>P04114</th>\n",
       "      <td>0.750</td>\n",
       "      <td>0.853</td>\n",
       "      <td>0.822</td>\n",
       "      <td>1.191</td>\n",
       "      <td>1.175</td>\n",
       "      <td>1.078</td>\n",
       "      <td>0.693</td>\n",
       "      <td>0.947</td>\n",
       "      <td>0.931</td>\n",
       "      <td>1.057</td>\n",
       "      <td>...</td>\n",
       "      <td>1.044</td>\n",
       "      <td>1.305</td>\n",
       "      <td>1.657</td>\n",
       "      <td>1.323</td>\n",
       "      <td>1.624</td>\n",
       "      <td>1.170</td>\n",
       "      <td>0.981</td>\n",
       "      <td>0.791</td>\n",
       "      <td>1.029</td>\n",
       "      <td>1.195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P01024</th>\n",
       "      <td>0.782</td>\n",
       "      <td>1.057</td>\n",
       "      <td>0.994</td>\n",
       "      <td>0.864</td>\n",
       "      <td>0.917</td>\n",
       "      <td>0.790</td>\n",
       "      <td>0.823</td>\n",
       "      <td>1.152</td>\n",
       "      <td>0.816</td>\n",
       "      <td>0.920</td>\n",
       "      <td>...</td>\n",
       "      <td>1.100</td>\n",
       "      <td>0.986</td>\n",
       "      <td>1.114</td>\n",
       "      <td>1.210</td>\n",
       "      <td>1.289</td>\n",
       "      <td>1.104</td>\n",
       "      <td>1.111</td>\n",
       "      <td>1.007</td>\n",
       "      <td>1.159</td>\n",
       "      <td>0.979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P02768</th>\n",
       "      <td>1.183</td>\n",
       "      <td>1.101</td>\n",
       "      <td>1.045</td>\n",
       "      <td>1.086</td>\n",
       "      <td>1.041</td>\n",
       "      <td>1.187</td>\n",
       "      <td>1.234</td>\n",
       "      <td>1.079</td>\n",
       "      <td>1.011</td>\n",
       "      <td>1.099</td>\n",
       "      <td>...</td>\n",
       "      <td>0.786</td>\n",
       "      <td>0.706</td>\n",
       "      <td>0.947</td>\n",
       "      <td>0.831</td>\n",
       "      <td>0.717</td>\n",
       "      <td>0.795</td>\n",
       "      <td>0.776</td>\n",
       "      <td>0.938</td>\n",
       "      <td>0.903</td>\n",
       "      <td>0.743</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P01023</th>\n",
       "      <td>1.066</td>\n",
       "      <td>1.278</td>\n",
       "      <td>0.959</td>\n",
       "      <td>0.811</td>\n",
       "      <td>0.789</td>\n",
       "      <td>0.931</td>\n",
       "      <td>0.971</td>\n",
       "      <td>0.769</td>\n",
       "      <td>1.011</td>\n",
       "      <td>0.866</td>\n",
       "      <td>...</td>\n",
       "      <td>0.817</td>\n",
       "      <td>0.728</td>\n",
       "      <td>0.861</td>\n",
       "      <td>0.798</td>\n",
       "      <td>0.751</td>\n",
       "      <td>0.917</td>\n",
       "      <td>0.809</td>\n",
       "      <td>0.780</td>\n",
       "      <td>1.195</td>\n",
       "      <td>0.706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P02751</th>\n",
       "      <td>1.085</td>\n",
       "      <td>0.947</td>\n",
       "      <td>0.993</td>\n",
       "      <td>1.343</td>\n",
       "      <td>1.130</td>\n",
       "      <td>0.778</td>\n",
       "      <td>0.731</td>\n",
       "      <td>1.084</td>\n",
       "      <td>1.107</td>\n",
       "      <td>0.909</td>\n",
       "      <td>...</td>\n",
       "      <td>0.566</td>\n",
       "      <td>0.854</td>\n",
       "      <td>1.109</td>\n",
       "      <td>0.630</td>\n",
       "      <td>0.850</td>\n",
       "      <td>0.661</td>\n",
       "      <td>0.848</td>\n",
       "      <td>0.829</td>\n",
       "      <td>0.760</td>\n",
       "      <td>0.811</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 70 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            h_F1_131N  h_F1_131C  h_F1_132C  h_F2_131N  h_F2_131C  h_F2_132C  \\\n",
       "Identifier                                                                     \n",
       "P04114          0.750      0.853      0.822      1.191      1.175      1.078   \n",
       "P01024          0.782      1.057      0.994      0.864      0.917      0.790   \n",
       "P02768          1.183      1.101      1.045      1.086      1.041      1.187   \n",
       "P01023          1.066      1.278      0.959      0.811      0.789      0.931   \n",
       "P02751          1.085      0.947      0.993      1.343      1.130      0.778   \n",
       "\n",
       "            h_F3_131N  h_F3_131C  h_F3_132C  h_F4_131N  ...  s_F3_128N  \\\n",
       "Identifier                                              ...              \n",
       "P04114          0.693      0.947      0.931      1.057  ...      1.044   \n",
       "P01024          0.823      1.152      0.816      0.920  ...      1.100   \n",
       "P02768          1.234      1.079      1.011      1.099  ...      0.786   \n",
       "P01023          0.971      0.769      1.011      0.866  ...      0.817   \n",
       "P02751          0.731      1.084      1.107      0.909  ...      0.566   \n",
       "\n",
       "            s_F3_128C  s_F3_129C  s_F4_128N  s_F4_128C  s_F5_128N  s_F5_128C  \\\n",
       "Identifier                                                                     \n",
       "P04114          1.305      1.657      1.323      1.624      1.170      0.981   \n",
       "P01024          0.986      1.114      1.210      1.289      1.104      1.111   \n",
       "P02768          0.706      0.947      0.831      0.717      0.795      0.776   \n",
       "P01023          0.728      0.861      0.798      0.751      0.917      0.809   \n",
       "P02751          0.854      1.109      0.630      0.850      0.661      0.848   \n",
       "\n",
       "            s_F6_128N  s_F6_128C  s_F6_133N  \n",
       "Identifier                                   \n",
       "P04114          0.791      1.029      1.195  \n",
       "P01024          1.007      1.159      0.979  \n",
       "P02768          0.938      0.903      0.743  \n",
       "P01023          0.780      1.195      0.706  \n",
       "P02751          0.829      0.760      0.811  \n",
       "\n",
       "[5 rows x 70 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "protein_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>group</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>h_F1_131N</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_F1_131C</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_F1_132C</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_F2_131N</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_F2_131C</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_F5_128N</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_F5_128C</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_F6_128N</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_F6_128C</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_F6_133N</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>70 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             group\n",
       "sample            \n",
       "h_F1_131N  healthy\n",
       "h_F1_131C  healthy\n",
       "h_F1_132C  healthy\n",
       "h_F2_131N  healthy\n",
       "h_F2_131C  healthy\n",
       "...            ...\n",
       "s_F5_128N   severe\n",
       "s_F5_128C   severe\n",
       "s_F6_128N   severe\n",
       "s_F6_128C   severe\n",
       "s_F6_133N   severe\n",
       "\n",
       "[70 rows x 1 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "protein_design"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read metabolomics data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data.csv'), index_col='Identifier')\n",
    "compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>h_jkdz1</th>\n",
       "      <th>h_jkdz2</th>\n",
       "      <th>h_jkdz3</th>\n",
       "      <th>h_jkdz4</th>\n",
       "      <th>h_jkdz5</th>\n",
       "      <th>h_jkdz6</th>\n",
       "      <th>h_jkdz7</th>\n",
       "      <th>h_jkdz8</th>\n",
       "      <th>h_jkdz9</th>\n",
       "      <th>h_jkdz10</th>\n",
       "      <th>...</th>\n",
       "      <th>s_ZX12</th>\n",
       "      <th>s_ZX13</th>\n",
       "      <th>s_ZX14</th>\n",
       "      <th>s_ZX15</th>\n",
       "      <th>s_ZX16</th>\n",
       "      <th>s_ZX17</th>\n",
       "      <th>s_ZX18</th>\n",
       "      <th>s_ZX19</th>\n",
       "      <th>s_ZX20</th>\n",
       "      <th>s_ZX21</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Identifier</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C21482</th>\n",
       "      <td>19413052.00</td>\n",
       "      <td>6.381812e+06</td>\n",
       "      <td>9.748316e+06</td>\n",
       "      <td>5.326872e+06</td>\n",
       "      <td>1.998072e+07</td>\n",
       "      <td>3.580375e+06</td>\n",
       "      <td>8.256121e+06</td>\n",
       "      <td>8.079382e+06</td>\n",
       "      <td>1.559659e+07</td>\n",
       "      <td>1.520363e+07</td>\n",
       "      <td>...</td>\n",
       "      <td>1.904349e+06</td>\n",
       "      <td>3.226016e+06</td>\n",
       "      <td>7.378147e+05</td>\n",
       "      <td>2.817698e+06</td>\n",
       "      <td>3.329101e+06</td>\n",
       "      <td>3206752.75</td>\n",
       "      <td>1.466174e+06</td>\n",
       "      <td>2.779301e+06</td>\n",
       "      <td>2.117668e+06</td>\n",
       "      <td>2.184310e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C18218</th>\n",
       "      <td>2711915.25</td>\n",
       "      <td>2.056393e+06</td>\n",
       "      <td>1.445594e+06</td>\n",
       "      <td>2.038765e+06</td>\n",
       "      <td>2.536996e+06</td>\n",
       "      <td>2.638198e+06</td>\n",
       "      <td>2.285757e+06</td>\n",
       "      <td>1.973140e+06</td>\n",
       "      <td>2.015425e+06</td>\n",
       "      <td>2.290842e+06</td>\n",
       "      <td>...</td>\n",
       "      <td>1.409720e+06</td>\n",
       "      <td>1.413307e+06</td>\n",
       "      <td>3.218834e+06</td>\n",
       "      <td>1.602131e+06</td>\n",
       "      <td>1.317878e+06</td>\n",
       "      <td>2930312.75</td>\n",
       "      <td>1.168094e+06</td>\n",
       "      <td>2.946776e+06</td>\n",
       "      <td>1.417311e+06</td>\n",
       "      <td>1.474166e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C05127</th>\n",
       "      <td>87727.25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.238706e+04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.597879e+05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.055130e+04</td>\n",
       "      <td>1.214114e+05</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.382788e+05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C01152</th>\n",
       "      <td>58832828.00</td>\n",
       "      <td>5.843934e+07</td>\n",
       "      <td>5.552133e+07</td>\n",
       "      <td>4.516214e+07</td>\n",
       "      <td>5.478952e+07</td>\n",
       "      <td>3.941259e+07</td>\n",
       "      <td>2.987876e+07</td>\n",
       "      <td>6.751726e+07</td>\n",
       "      <td>4.666031e+07</td>\n",
       "      <td>9.118524e+07</td>\n",
       "      <td>...</td>\n",
       "      <td>2.881314e+07</td>\n",
       "      <td>3.164358e+07</td>\n",
       "      <td>2.538767e+07</td>\n",
       "      <td>3.307604e+07</td>\n",
       "      <td>3.915698e+07</td>\n",
       "      <td>24400592.00</td>\n",
       "      <td>2.593375e+07</td>\n",
       "      <td>6.413868e+07</td>\n",
       "      <td>4.020588e+07</td>\n",
       "      <td>4.904488e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C02918</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.815549e+05</td>\n",
       "      <td>2.240392e+05</td>\n",
       "      <td>1.609397e+05</td>\n",
       "      <td>3.206194e+05</td>\n",
       "      <td>7.176557e+05</td>\n",
       "      <td>3.268182e+05</td>\n",
       "      <td>5.135810e+05</td>\n",
       "      <td>2.734582e+05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>3.337245e+05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.347152e+05</td>\n",
       "      <td>3.532118e+04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>655827.25</td>\n",
       "      <td>8.359706e+05</td>\n",
       "      <td>4.034381e+06</td>\n",
       "      <td>2.839358e+05</td>\n",
       "      <td>8.062160e+04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 96 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                h_jkdz1       h_jkdz2       h_jkdz3       h_jkdz4  \\\n",
       "Identifier                                                          \n",
       "C21482      19413052.00  6.381812e+06  9.748316e+06  5.326872e+06   \n",
       "C18218       2711915.25  2.056393e+06  1.445594e+06  2.038765e+06   \n",
       "C05127         87727.25           NaN  9.238706e+04           NaN   \n",
       "C01152      58832828.00  5.843934e+07  5.552133e+07  4.516214e+07   \n",
       "C02918              NaN  1.815549e+05  2.240392e+05  1.609397e+05   \n",
       "\n",
       "                 h_jkdz5       h_jkdz6       h_jkdz7       h_jkdz8  \\\n",
       "Identifier                                                           \n",
       "C21482      1.998072e+07  3.580375e+06  8.256121e+06  8.079382e+06   \n",
       "C18218      2.536996e+06  2.638198e+06  2.285757e+06  1.973140e+06   \n",
       "C05127      1.597879e+05           NaN           NaN           NaN   \n",
       "C01152      5.478952e+07  3.941259e+07  2.987876e+07  6.751726e+07   \n",
       "C02918      3.206194e+05  7.176557e+05  3.268182e+05  5.135810e+05   \n",
       "\n",
       "                 h_jkdz9      h_jkdz10  ...        s_ZX12        s_ZX13  \\\n",
       "Identifier                              ...                               \n",
       "C21482      1.559659e+07  1.520363e+07  ...  1.904349e+06  3.226016e+06   \n",
       "C18218      2.015425e+06  2.290842e+06  ...  1.409720e+06  1.413307e+06   \n",
       "C05127      9.055130e+04  1.214114e+05  ...           NaN           NaN   \n",
       "C01152      4.666031e+07  9.118524e+07  ...  2.881314e+07  3.164358e+07   \n",
       "C02918      2.734582e+05           NaN  ...  3.337245e+05           NaN   \n",
       "\n",
       "                  s_ZX14        s_ZX15        s_ZX16       s_ZX17  \\\n",
       "Identifier                                                          \n",
       "C21482      7.378147e+05  2.817698e+06  3.329101e+06   3206752.75   \n",
       "C18218      3.218834e+06  1.602131e+06  1.317878e+06   2930312.75   \n",
       "C05127               NaN           NaN           NaN          NaN   \n",
       "C01152      2.538767e+07  3.307604e+07  3.915698e+07  24400592.00   \n",
       "C02918      4.347152e+05  3.532118e+04           NaN    655827.25   \n",
       "\n",
       "                  s_ZX18        s_ZX19        s_ZX20        s_ZX21  \n",
       "Identifier                                                          \n",
       "C21482      1.466174e+06  2.779301e+06  2.117668e+06  2.184310e+06  \n",
       "C18218      1.168094e+06  2.946776e+06  1.417311e+06  1.474166e+06  \n",
       "C05127               NaN  1.382788e+05           NaN           NaN  \n",
       "C01152      2.593375e+07  6.413868e+07  4.020588e+07  4.904488e+07  \n",
       "C02918      8.359706e+05  4.034381e+06  2.839358e+05  8.062160e+04  \n",
       "\n",
       "[5 rows x 96 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compound_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>group</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>h_jkdz1</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_jkdz2</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_jkdz3</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_jkdz4</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>h_jkdz5</th>\n",
       "      <td>healthy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_ZX17</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_ZX18</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_ZX19</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_ZX20</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s_ZX21</th>\n",
       "      <td>severe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>96 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           group\n",
       "sample          \n",
       "h_jkdz1  healthy\n",
       "h_jkdz2  healthy\n",
       "h_jkdz3  healthy\n",
       "h_jkdz4  healthy\n",
       "h_jkdz5  healthy\n",
       "...          ...\n",
       "s_ZX17    severe\n",
       "s_ZX18    severe\n",
       "s_ZX19    severe\n",
       "s_ZX20    severe\n",
       "s_ZX21    severe\n",
       "\n",
       "[96 rows x 1 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compound_design"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set_log_level_info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create multi-omics container object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "proteins data with (791, 70) measurements"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n",
    "protein_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "compounds data with (220, 96) measurements"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n",
    "compound_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "publication = 'Proteomic and Metabolomic Characterization of COVID-19 Patient Sera'\n",
    "url = 'https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Multi-omics data container\n",
       "- publication: Proteomic and Metabolomic Characterization of COVID-19 Patient Sera\n",
       "- URL: https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9\n",
       "- Views: 2 modalities\n",
       "\t - proteins data with (791, 70) measurements\n",
       "\t - compounds data with (220, 96) measurements"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mo = MultiOmicsData(publication=publication, url=url)\n",
    "mo.add_data([protein_data, compound_data])\n",
    "mo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a mapping object"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The mapping object uses Reactome to map the different biological entities in the data:\n",
    "- Transcripts (or genes) are connected to the proteins they encode\n",
    "- Proteins and compounds are connected to reactions they're involved in\n",
    "- Reactions are connected to pathways"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-03-24 14:40:03.534 | INFO     | pyMultiOmics.functions:remove_dupes:385 - Removing 9 rows with duplicate identifiers\n",
      "2022-03-24 14:40:03.535 | INFO     | pyMultiOmics.functions:reactome_mapping:78 - There are 211 observed compound ids\n",
      "2022-03-24 14:40:03.535 | INFO     | pyMultiOmics.functions:reactome_mapping:81 - Mapping genes -> proteins\n",
      "2022-03-24 14:40:06.215 | INFO     | pyMultiOmics.functions:reactome_mapping:86 - Mapping proteins -> reactions\n",
      "2022-03-24 14:40:07.489 | INFO     | pyMultiOmics.functions:reactome_mapping:94 - Mapping compounds -> reactions\n",
      "2022-03-24 14:40:11.539 | INFO     | pyMultiOmics.functions:reactome_mapping:100 - Mapping reactions -> pathways\n",
      "2022-03-24 14:40:12.580 | INFO     | pyMultiOmics.functions:reactome_mapping:111 - Mapping reactions -> proteins\n",
      "2022-03-24 14:40:18.631 | INFO     | pyMultiOmics.functions:reactome_mapping:118 - Mapping reactions -> compounds\n",
      "2022-03-24 14:40:21.469 | INFO     | pyMultiOmics.functions:reactome_mapping:130 - Mapping proteins -> genes\n",
      "2022-03-24 14:40:36.997 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: genes\n",
      "2022-03-24 14:40:37.211 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: proteins\n",
      "2022-03-24 14:40:37.901 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: compounds\n",
      "2022-03-24 14:40:37.981 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: reactions\n",
      "2022-03-24 14:40:38.092 | INFO     | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: pathways\n",
      "2022-03-24 14:40:38.120 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: gene_proteins\n",
      "2022-03-24 14:40:38.287 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: protein_reactions\n",
      "2022-03-24 14:40:39.634 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: compound_reactions\n",
      "2022-03-24 14:40:39.880 | INFO     | pyMultiOmics.mapping:_add_edges:201 - Processing edges: reaction_pathways\n",
      "2022-03-24 14:40:40.005 | INFO     | pyMultiOmics.mapping:build:51 - Created a multi-omics network with 19645 nodes and 80442 edges\n",
      "2022-03-24 14:40:40.499 | INFO     | pyMultiOmics.mapping:build:53 - node_counts = {'genes': 7054, 'proteins': 6590, 'compounds': 1078, 'reactions': 3926, 'pathways': 997}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<pyMultiOmics.mapping.Mapper at 0x1762619f0>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = Mapper(mo, HOMO_SAPIENS, metabolic_pathway_only=False)\n",
    "m.build()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pyMultiOmics.mapping.Mapper at 0x1762619f0>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query mapping object"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Below shows some example queries we can perform with the mapping object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>reaction_id</th>\n",
       "      <th>reaction_name</th>\n",
       "      <th>num_proteins</th>\n",
       "      <th>num_compounds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R-HSA-114552</td>\n",
       "      <td>Thrombin-activated pars activate g12/13</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R-HSA-114558</td>\n",
       "      <td>Thrombin-activated pars activate gq</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>R-HSA-1214188</td>\n",
       "      <td>Prdm9 trimethylates histone h3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>R-HSA-1605591</td>\n",
       "      <td>Glucosylceramidase cleaves the glucosidic bond...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>R-HSA-163432</td>\n",
       "      <td>Cholesterol ester + h2o -&gt; cholesterol + fatty...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223</th>\n",
       "      <td>R-HSA-9710490</td>\n",
       "      <td>The gsdme gene promoter is hypermethylated</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>R-HSA-9733545</td>\n",
       "      <td>Bile salts and acids bind alb</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225</th>\n",
       "      <td>R-HSA-9733960</td>\n",
       "      <td>Bile salts and acids dissociate from alb</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226</th>\n",
       "      <td>R-HSA-977071</td>\n",
       "      <td>Sialyltransferase i can add sialic acid to the...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>R-HSA-977228</td>\n",
       "      <td>Sialyltransferase i can add sialic acid to the...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>228 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       reaction_id                                      reaction_name  \\\n",
       "0     R-HSA-114552            Thrombin-activated pars activate g12/13   \n",
       "1     R-HSA-114558                Thrombin-activated pars activate gq   \n",
       "2    R-HSA-1214188                     Prdm9 trimethylates histone h3   \n",
       "3    R-HSA-1605591  Glucosylceramidase cleaves the glucosidic bond...   \n",
       "4     R-HSA-163432  Cholesterol ester + h2o -> cholesterol + fatty...   \n",
       "..             ...                                                ...   \n",
       "223  R-HSA-9710490         The gsdme gene promoter is hypermethylated   \n",
       "224  R-HSA-9733545                      Bile salts and acids bind alb   \n",
       "225  R-HSA-9733960           Bile salts and acids dissociate from alb   \n",
       "226   R-HSA-977071  Sialyltransferase i can add sialic acid to the...   \n",
       "227   R-HSA-977228  Sialyltransferase i can add sialic acid to the...   \n",
       "\n",
       "     num_proteins  num_compounds  \n",
       "0               1              1  \n",
       "1               1              1  \n",
       "2               3              1  \n",
       "3               1              1  \n",
       "4               1              1  \n",
       "..            ...            ...  \n",
       "223             3              1  \n",
       "224             1              5  \n",
       "225             1              5  \n",
       "226             1              1  \n",
       "227             1              1  \n",
       "\n",
       "[228 rows x 4 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reactions = m.get_nodes(types=REACTIONS)\n",
    "\n",
    "data = []\n",
    "for reaction_id, reaction_data in reactions:\n",
    "    reaction_name = reaction_data['display_name']\n",
    "    proteins = m.get_connected(reaction_id, dest_type=PROTEINS, observed=True)\n",
    "    compounds = m.get_connected(reaction_id, dest_type=COMPOUNDS, observed=True)\n",
    "    \n",
    "    if len(proteins) > 0 and len(compounds) > 0:\n",
    "        row = [reaction_id, reaction_name, len(proteins), len(compounds)]\n",
    "        data.append(row)\n",
    "\n",
    "df = pd.DataFrame(data, columns=['reaction_id', 'reaction_name', 'num_proteins', 'num_compounds'])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### List all entities connected to reaction R-HSA-194153"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>display_name</th>\n",
       "      <th>data_type</th>\n",
       "      <th>observed</th>\n",
       "      <th>source_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>entity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ENSG00000108846</th>\n",
       "      <td>Abcc3</td>\n",
       "      <td>genes</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>O15438</th>\n",
       "      <td>O15438</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28865</th>\n",
       "      <td>Taurocholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30616</th>\n",
       "      <td>Atp(4-)</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17687</th>\n",
       "      <td>Glycocholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36274</th>\n",
       "      <td>Glycochenodeoxycholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16525</th>\n",
       "      <td>Taurochenodeoxycholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43474</th>\n",
       "      <td>Hydrogenphosphate</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>456216</th>\n",
       "      <td>Adp(3-)</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16359</th>\n",
       "      <td>Cholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9407</th>\n",
       "      <td>Taurochenodeoxycholate</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16755</th>\n",
       "      <td>Chenodeoxycholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>R-HSA-159418</th>\n",
       "      <td>Recycling of bile acids and salts</td>\n",
       "      <td>pathways</td>\n",
       "      <td>None</td>\n",
       "      <td>R-HSA-194153</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                      display_name  data_type observed  \\\n",
       "entity_id                                                                \n",
       "ENSG00000108846                              Abcc3      genes    False   \n",
       "O15438                                      O15438   proteins    False   \n",
       "28865                             Taurocholic acid  compounds     True   \n",
       "30616                                      Atp(4-)  compounds    False   \n",
       "17687                             Glycocholic acid  compounds    False   \n",
       "36274                   Glycochenodeoxycholic acid  compounds     True   \n",
       "16525                   Taurochenodeoxycholic acid  compounds    False   \n",
       "43474                            Hydrogenphosphate  compounds    False   \n",
       "456216                                     Adp(3-)  compounds    False   \n",
       "16359                                  Cholic acid  compounds     True   \n",
       "9407                        Taurochenodeoxycholate  compounds     True   \n",
       "16755                        Chenodeoxycholic acid  compounds     True   \n",
       "R-HSA-159418     Recycling of bile acids and salts   pathways     None   \n",
       "\n",
       "                    source_id  \n",
       "entity_id                      \n",
       "ENSG00000108846  R-HSA-194153  \n",
       "O15438           R-HSA-194153  \n",
       "28865            R-HSA-194153  \n",
       "30616            R-HSA-194153  \n",
       "17687            R-HSA-194153  \n",
       "36274            R-HSA-194153  \n",
       "16525            R-HSA-194153  \n",
       "43474            R-HSA-194153  \n",
       "456216           R-HSA-194153  \n",
       "16359            R-HSA-194153  \n",
       "9407             R-HSA-194153  \n",
       "16755            R-HSA-194153  \n",
       "R-HSA-159418     R-HSA-194153  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query_id = 'R-HSA-194153'\n",
    "m.get_connected(query_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Query the connections between proteins and compounds (through their shared reactions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>display_name</th>\n",
       "      <th>data_type</th>\n",
       "      <th>observed</th>\n",
       "      <th>source_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>entity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>18421</th>\n",
       "      <td>Superoxide</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17033</th>\n",
       "      <td>Biliverdin</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16990</th>\n",
       "      <td>Bilirubin ixalpha</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17627</th>\n",
       "      <td>Ferroheme b</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36144</th>\n",
       "      <td>Ferriheme b</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>456216</th>\n",
       "      <td>Adp(3-)</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30616</th>\n",
       "      <td>Atp(4-)</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36274</th>\n",
       "      <td>Glycochenodeoxycholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16755</th>\n",
       "      <td>Chenodeoxycholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16359</th>\n",
       "      <td>Cholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28865</th>\n",
       "      <td>Taurocholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17687</th>\n",
       "      <td>Glycocholic acid</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9407</th>\n",
       "      <td>Taurochenodeoxycholate</td>\n",
       "      <td>compounds</td>\n",
       "      <td>True</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17504</th>\n",
       "      <td>1-o-acyl-sn-glycero-3-phosphocholine(1+)</td>\n",
       "      <td>compounds</td>\n",
       "      <td>False</td>\n",
       "      <td>P02768</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       display_name  data_type  observed  \\\n",
       "entity_id                                                                  \n",
       "18421                                    Superoxide  compounds     False   \n",
       "17033                                    Biliverdin  compounds      True   \n",
       "16990                             Bilirubin ixalpha  compounds      True   \n",
       "17627                                   Ferroheme b  compounds     False   \n",
       "36144                                   Ferriheme b  compounds     False   \n",
       "456216                                      Adp(3-)  compounds     False   \n",
       "30616                                       Atp(4-)  compounds     False   \n",
       "36274                    Glycochenodeoxycholic acid  compounds      True   \n",
       "16755                         Chenodeoxycholic acid  compounds      True   \n",
       "16359                                   Cholic acid  compounds      True   \n",
       "28865                              Taurocholic acid  compounds      True   \n",
       "17687                              Glycocholic acid  compounds     False   \n",
       "9407                         Taurochenodeoxycholate  compounds      True   \n",
       "17504      1-o-acyl-sn-glycero-3-phosphocholine(1+)  compounds     False   \n",
       "\n",
       "          source_id  \n",
       "entity_id            \n",
       "18421        P02768  \n",
       "17033        P02768  \n",
       "16990        P02768  \n",
       "17627        P02768  \n",
       "36144        P02768  \n",
       "456216       P02768  \n",
       "30616        P02768  \n",
       "36274        P02768  \n",
       "16755        P02768  \n",
       "16359        P02768  \n",
       "28865        P02768  \n",
       "17687        P02768  \n",
       "9407         P02768  \n",
       "17504        P02768  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query_id = 'P02768'\n",
    "m.get_connected(query_id, dest_type=COMPOUNDS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>display_name</th>\n",
       "      <th>data_type</th>\n",
       "      <th>observed</th>\n",
       "      <th>source_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>entity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Q9NPD5</th>\n",
       "      <td>Q9NPD5</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P33527</th>\n",
       "      <td>P33527</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Q9BYK8</th>\n",
       "      <td>Q9BYK8</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Q96RS0</th>\n",
       "      <td>Q96RS0</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Q92793</th>\n",
       "      <td>Q92793</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P08047</th>\n",
       "      <td>P08047</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P30043</th>\n",
       "      <td>P30043</td>\n",
       "      <td>proteins</td>\n",
       "      <td>True</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P53004</th>\n",
       "      <td>P53004</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>O75182</th>\n",
       "      <td>O75182</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Q96ST3</th>\n",
       "      <td>Q96ST3</td>\n",
       "      <td>proteins</td>\n",
       "      <td>False</td>\n",
       "      <td>16990</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>130 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          display_name data_type  observed source_id\n",
       "entity_id                                           \n",
       "Q9NPD5          Q9NPD5  proteins     False     16990\n",
       "P33527          P33527  proteins     False     16990\n",
       "Q9BYK8          Q9BYK8  proteins     False     16990\n",
       "Q96RS0          Q96RS0  proteins     False     16990\n",
       "Q92793          Q92793  proteins     False     16990\n",
       "...                ...       ...       ...       ...\n",
       "P08047          P08047  proteins     False     16990\n",
       "P30043          P30043  proteins      True     16990\n",
       "P53004          P53004  proteins     False     16990\n",
       "O75182          O75182  proteins     False     16990\n",
       "Q96ST3          Q96ST3  proteins     False     16990\n",
       "\n",
       "[130 rows x 4 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query_id = '16990'\n",
    "m.get_connected(query_id, dest_type=PROTEINS)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}