1796 lines (1795 with data), 59.6 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os, sys"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pylab as plt\n",
"import matplotlib\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:40:01.676 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n",
"2022-03-24 14:40:01.676 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n",
"2022-03-24 14:40:01.677 | INFO | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n"
]
}
],
"source": [
"sys.path.append('..')\n",
"\n",
"from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n",
"from pyMultiOmics.constants import HOMO_SAPIENS, PROTEINS, COMPOUNDS, REACTIONS\n",
"from pyMultiOmics.mapping import Mapper\n",
"from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demonstration of pyMultiOmics mapping"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the processed Covid data from [1]\n",
"\n",
"[1] [Shen, Bo, et al. \"Proteomic and metabolomic characterization of COVID-19 patient sera.\" Cell 182.1 (2020): 59-72.](https://www.sciencedirect.com/science/article/pii/S0092867420306279?casa_token=wKwWn9P4MK0AAAAA:v8z5MVnQ1ONrcatncCsigSDoxeOq2ZOcN4da9SofGDcpeDqrC76QK8yNKrKtVUrMWBBAntI8)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:40:02.949 | INFO | pyMultiOmics.common:download_file:59 - Downloading covid19_dualomics_data.zip\n",
"551KB [00:00, 8.89kKB/s] \n",
"2022-03-24 14:40:03.061 | INFO | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_dualomics_data.zip\n",
"100%|█████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 1191.03it/s]\n",
"2022-03-24 14:40:03.075 | INFO | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_dualomics_data.zip\n"
]
}
],
"source": [
"url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_dualomics_data.zip'\n",
"out_file = download_file(url)\n",
"extract_zip_file(out_file)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_dualomics_data'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DATA_FOLDER = os.path.abspath(os.path.join('covid19_dualomics_data'))\n",
"DATA_FOLDER"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read proteomics data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Identifier')\n",
"protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>h_F1_131N</th>\n",
" <th>h_F1_131C</th>\n",
" <th>h_F1_132C</th>\n",
" <th>h_F2_131N</th>\n",
" <th>h_F2_131C</th>\n",
" <th>h_F2_132C</th>\n",
" <th>h_F3_131N</th>\n",
" <th>h_F3_131C</th>\n",
" <th>h_F3_132C</th>\n",
" <th>h_F4_131N</th>\n",
" <th>...</th>\n",
" <th>s_F3_128N</th>\n",
" <th>s_F3_128C</th>\n",
" <th>s_F3_129C</th>\n",
" <th>s_F4_128N</th>\n",
" <th>s_F4_128C</th>\n",
" <th>s_F5_128N</th>\n",
" <th>s_F5_128C</th>\n",
" <th>s_F6_128N</th>\n",
" <th>s_F6_128C</th>\n",
" <th>s_F6_133N</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Identifier</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>P04114</th>\n",
" <td>0.750</td>\n",
" <td>0.853</td>\n",
" <td>0.822</td>\n",
" <td>1.191</td>\n",
" <td>1.175</td>\n",
" <td>1.078</td>\n",
" <td>0.693</td>\n",
" <td>0.947</td>\n",
" <td>0.931</td>\n",
" <td>1.057</td>\n",
" <td>...</td>\n",
" <td>1.044</td>\n",
" <td>1.305</td>\n",
" <td>1.657</td>\n",
" <td>1.323</td>\n",
" <td>1.624</td>\n",
" <td>1.170</td>\n",
" <td>0.981</td>\n",
" <td>0.791</td>\n",
" <td>1.029</td>\n",
" <td>1.195</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P01024</th>\n",
" <td>0.782</td>\n",
" <td>1.057</td>\n",
" <td>0.994</td>\n",
" <td>0.864</td>\n",
" <td>0.917</td>\n",
" <td>0.790</td>\n",
" <td>0.823</td>\n",
" <td>1.152</td>\n",
" <td>0.816</td>\n",
" <td>0.920</td>\n",
" <td>...</td>\n",
" <td>1.100</td>\n",
" <td>0.986</td>\n",
" <td>1.114</td>\n",
" <td>1.210</td>\n",
" <td>1.289</td>\n",
" <td>1.104</td>\n",
" <td>1.111</td>\n",
" <td>1.007</td>\n",
" <td>1.159</td>\n",
" <td>0.979</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P02768</th>\n",
" <td>1.183</td>\n",
" <td>1.101</td>\n",
" <td>1.045</td>\n",
" <td>1.086</td>\n",
" <td>1.041</td>\n",
" <td>1.187</td>\n",
" <td>1.234</td>\n",
" <td>1.079</td>\n",
" <td>1.011</td>\n",
" <td>1.099</td>\n",
" <td>...</td>\n",
" <td>0.786</td>\n",
" <td>0.706</td>\n",
" <td>0.947</td>\n",
" <td>0.831</td>\n",
" <td>0.717</td>\n",
" <td>0.795</td>\n",
" <td>0.776</td>\n",
" <td>0.938</td>\n",
" <td>0.903</td>\n",
" <td>0.743</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P01023</th>\n",
" <td>1.066</td>\n",
" <td>1.278</td>\n",
" <td>0.959</td>\n",
" <td>0.811</td>\n",
" <td>0.789</td>\n",
" <td>0.931</td>\n",
" <td>0.971</td>\n",
" <td>0.769</td>\n",
" <td>1.011</td>\n",
" <td>0.866</td>\n",
" <td>...</td>\n",
" <td>0.817</td>\n",
" <td>0.728</td>\n",
" <td>0.861</td>\n",
" <td>0.798</td>\n",
" <td>0.751</td>\n",
" <td>0.917</td>\n",
" <td>0.809</td>\n",
" <td>0.780</td>\n",
" <td>1.195</td>\n",
" <td>0.706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P02751</th>\n",
" <td>1.085</td>\n",
" <td>0.947</td>\n",
" <td>0.993</td>\n",
" <td>1.343</td>\n",
" <td>1.130</td>\n",
" <td>0.778</td>\n",
" <td>0.731</td>\n",
" <td>1.084</td>\n",
" <td>1.107</td>\n",
" <td>0.909</td>\n",
" <td>...</td>\n",
" <td>0.566</td>\n",
" <td>0.854</td>\n",
" <td>1.109</td>\n",
" <td>0.630</td>\n",
" <td>0.850</td>\n",
" <td>0.661</td>\n",
" <td>0.848</td>\n",
" <td>0.829</td>\n",
" <td>0.760</td>\n",
" <td>0.811</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 70 columns</p>\n",
"</div>"
],
"text/plain": [
" h_F1_131N h_F1_131C h_F1_132C h_F2_131N h_F2_131C h_F2_132C \\\n",
"Identifier \n",
"P04114 0.750 0.853 0.822 1.191 1.175 1.078 \n",
"P01024 0.782 1.057 0.994 0.864 0.917 0.790 \n",
"P02768 1.183 1.101 1.045 1.086 1.041 1.187 \n",
"P01023 1.066 1.278 0.959 0.811 0.789 0.931 \n",
"P02751 1.085 0.947 0.993 1.343 1.130 0.778 \n",
"\n",
" h_F3_131N h_F3_131C h_F3_132C h_F4_131N ... s_F3_128N \\\n",
"Identifier ... \n",
"P04114 0.693 0.947 0.931 1.057 ... 1.044 \n",
"P01024 0.823 1.152 0.816 0.920 ... 1.100 \n",
"P02768 1.234 1.079 1.011 1.099 ... 0.786 \n",
"P01023 0.971 0.769 1.011 0.866 ... 0.817 \n",
"P02751 0.731 1.084 1.107 0.909 ... 0.566 \n",
"\n",
" s_F3_128C s_F3_129C s_F4_128N s_F4_128C s_F5_128N s_F5_128C \\\n",
"Identifier \n",
"P04114 1.305 1.657 1.323 1.624 1.170 0.981 \n",
"P01024 0.986 1.114 1.210 1.289 1.104 1.111 \n",
"P02768 0.706 0.947 0.831 0.717 0.795 0.776 \n",
"P01023 0.728 0.861 0.798 0.751 0.917 0.809 \n",
"P02751 0.854 1.109 0.630 0.850 0.661 0.848 \n",
"\n",
" s_F6_128N s_F6_128C s_F6_133N \n",
"Identifier \n",
"P04114 0.791 1.029 1.195 \n",
"P01024 1.007 1.159 0.979 \n",
"P02768 0.938 0.903 0.743 \n",
"P01023 0.780 1.195 0.706 \n",
"P02751 0.829 0.760 0.811 \n",
"\n",
"[5 rows x 70 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>group</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>h_F1_131N</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_F1_131C</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_F1_132C</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_F2_131N</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_F2_131C</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_F5_128N</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_F5_128C</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_F6_128N</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_F6_128C</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_F6_133N</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>70 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" group\n",
"sample \n",
"h_F1_131N healthy\n",
"h_F1_131C healthy\n",
"h_F1_132C healthy\n",
"h_F2_131N healthy\n",
"h_F2_131C healthy\n",
"... ...\n",
"s_F5_128N severe\n",
"s_F5_128C severe\n",
"s_F6_128N severe\n",
"s_F6_128C severe\n",
"s_F6_133N severe\n",
"\n",
"[70 rows x 1 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_design"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read metabolomics data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data.csv'), index_col='Identifier')\n",
"compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>h_jkdz1</th>\n",
" <th>h_jkdz2</th>\n",
" <th>h_jkdz3</th>\n",
" <th>h_jkdz4</th>\n",
" <th>h_jkdz5</th>\n",
" <th>h_jkdz6</th>\n",
" <th>h_jkdz7</th>\n",
" <th>h_jkdz8</th>\n",
" <th>h_jkdz9</th>\n",
" <th>h_jkdz10</th>\n",
" <th>...</th>\n",
" <th>s_ZX12</th>\n",
" <th>s_ZX13</th>\n",
" <th>s_ZX14</th>\n",
" <th>s_ZX15</th>\n",
" <th>s_ZX16</th>\n",
" <th>s_ZX17</th>\n",
" <th>s_ZX18</th>\n",
" <th>s_ZX19</th>\n",
" <th>s_ZX20</th>\n",
" <th>s_ZX21</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Identifier</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>C21482</th>\n",
" <td>19413052.00</td>\n",
" <td>6.381812e+06</td>\n",
" <td>9.748316e+06</td>\n",
" <td>5.326872e+06</td>\n",
" <td>1.998072e+07</td>\n",
" <td>3.580375e+06</td>\n",
" <td>8.256121e+06</td>\n",
" <td>8.079382e+06</td>\n",
" <td>1.559659e+07</td>\n",
" <td>1.520363e+07</td>\n",
" <td>...</td>\n",
" <td>1.904349e+06</td>\n",
" <td>3.226016e+06</td>\n",
" <td>7.378147e+05</td>\n",
" <td>2.817698e+06</td>\n",
" <td>3.329101e+06</td>\n",
" <td>3206752.75</td>\n",
" <td>1.466174e+06</td>\n",
" <td>2.779301e+06</td>\n",
" <td>2.117668e+06</td>\n",
" <td>2.184310e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C18218</th>\n",
" <td>2711915.25</td>\n",
" <td>2.056393e+06</td>\n",
" <td>1.445594e+06</td>\n",
" <td>2.038765e+06</td>\n",
" <td>2.536996e+06</td>\n",
" <td>2.638198e+06</td>\n",
" <td>2.285757e+06</td>\n",
" <td>1.973140e+06</td>\n",
" <td>2.015425e+06</td>\n",
" <td>2.290842e+06</td>\n",
" <td>...</td>\n",
" <td>1.409720e+06</td>\n",
" <td>1.413307e+06</td>\n",
" <td>3.218834e+06</td>\n",
" <td>1.602131e+06</td>\n",
" <td>1.317878e+06</td>\n",
" <td>2930312.75</td>\n",
" <td>1.168094e+06</td>\n",
" <td>2.946776e+06</td>\n",
" <td>1.417311e+06</td>\n",
" <td>1.474166e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C05127</th>\n",
" <td>87727.25</td>\n",
" <td>NaN</td>\n",
" <td>9.238706e+04</td>\n",
" <td>NaN</td>\n",
" <td>1.597879e+05</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.055130e+04</td>\n",
" <td>1.214114e+05</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.382788e+05</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C01152</th>\n",
" <td>58832828.00</td>\n",
" <td>5.843934e+07</td>\n",
" <td>5.552133e+07</td>\n",
" <td>4.516214e+07</td>\n",
" <td>5.478952e+07</td>\n",
" <td>3.941259e+07</td>\n",
" <td>2.987876e+07</td>\n",
" <td>6.751726e+07</td>\n",
" <td>4.666031e+07</td>\n",
" <td>9.118524e+07</td>\n",
" <td>...</td>\n",
" <td>2.881314e+07</td>\n",
" <td>3.164358e+07</td>\n",
" <td>2.538767e+07</td>\n",
" <td>3.307604e+07</td>\n",
" <td>3.915698e+07</td>\n",
" <td>24400592.00</td>\n",
" <td>2.593375e+07</td>\n",
" <td>6.413868e+07</td>\n",
" <td>4.020588e+07</td>\n",
" <td>4.904488e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C02918</th>\n",
" <td>NaN</td>\n",
" <td>1.815549e+05</td>\n",
" <td>2.240392e+05</td>\n",
" <td>1.609397e+05</td>\n",
" <td>3.206194e+05</td>\n",
" <td>7.176557e+05</td>\n",
" <td>3.268182e+05</td>\n",
" <td>5.135810e+05</td>\n",
" <td>2.734582e+05</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>3.337245e+05</td>\n",
" <td>NaN</td>\n",
" <td>4.347152e+05</td>\n",
" <td>3.532118e+04</td>\n",
" <td>NaN</td>\n",
" <td>655827.25</td>\n",
" <td>8.359706e+05</td>\n",
" <td>4.034381e+06</td>\n",
" <td>2.839358e+05</td>\n",
" <td>8.062160e+04</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 96 columns</p>\n",
"</div>"
],
"text/plain": [
" h_jkdz1 h_jkdz2 h_jkdz3 h_jkdz4 \\\n",
"Identifier \n",
"C21482 19413052.00 6.381812e+06 9.748316e+06 5.326872e+06 \n",
"C18218 2711915.25 2.056393e+06 1.445594e+06 2.038765e+06 \n",
"C05127 87727.25 NaN 9.238706e+04 NaN \n",
"C01152 58832828.00 5.843934e+07 5.552133e+07 4.516214e+07 \n",
"C02918 NaN 1.815549e+05 2.240392e+05 1.609397e+05 \n",
"\n",
" h_jkdz5 h_jkdz6 h_jkdz7 h_jkdz8 \\\n",
"Identifier \n",
"C21482 1.998072e+07 3.580375e+06 8.256121e+06 8.079382e+06 \n",
"C18218 2.536996e+06 2.638198e+06 2.285757e+06 1.973140e+06 \n",
"C05127 1.597879e+05 NaN NaN NaN \n",
"C01152 5.478952e+07 3.941259e+07 2.987876e+07 6.751726e+07 \n",
"C02918 3.206194e+05 7.176557e+05 3.268182e+05 5.135810e+05 \n",
"\n",
" h_jkdz9 h_jkdz10 ... s_ZX12 s_ZX13 \\\n",
"Identifier ... \n",
"C21482 1.559659e+07 1.520363e+07 ... 1.904349e+06 3.226016e+06 \n",
"C18218 2.015425e+06 2.290842e+06 ... 1.409720e+06 1.413307e+06 \n",
"C05127 9.055130e+04 1.214114e+05 ... NaN NaN \n",
"C01152 4.666031e+07 9.118524e+07 ... 2.881314e+07 3.164358e+07 \n",
"C02918 2.734582e+05 NaN ... 3.337245e+05 NaN \n",
"\n",
" s_ZX14 s_ZX15 s_ZX16 s_ZX17 \\\n",
"Identifier \n",
"C21482 7.378147e+05 2.817698e+06 3.329101e+06 3206752.75 \n",
"C18218 3.218834e+06 1.602131e+06 1.317878e+06 2930312.75 \n",
"C05127 NaN NaN NaN NaN \n",
"C01152 2.538767e+07 3.307604e+07 3.915698e+07 24400592.00 \n",
"C02918 4.347152e+05 3.532118e+04 NaN 655827.25 \n",
"\n",
" s_ZX18 s_ZX19 s_ZX20 s_ZX21 \n",
"Identifier \n",
"C21482 1.466174e+06 2.779301e+06 2.117668e+06 2.184310e+06 \n",
"C18218 1.168094e+06 2.946776e+06 1.417311e+06 1.474166e+06 \n",
"C05127 NaN 1.382788e+05 NaN NaN \n",
"C01152 2.593375e+07 6.413868e+07 4.020588e+07 4.904488e+07 \n",
"C02918 8.359706e+05 4.034381e+06 2.839358e+05 8.062160e+04 \n",
"\n",
"[5 rows x 96 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>group</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>h_jkdz1</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_jkdz2</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_jkdz3</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_jkdz4</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h_jkdz5</th>\n",
" <td>healthy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_ZX17</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_ZX18</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_ZX19</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_ZX20</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>s_ZX21</th>\n",
" <td>severe</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" group\n",
"sample \n",
"h_jkdz1 healthy\n",
"h_jkdz2 healthy\n",
"h_jkdz3 healthy\n",
"h_jkdz4 healthy\n",
"h_jkdz5 healthy\n",
"... ...\n",
"s_ZX17 severe\n",
"s_ZX18 severe\n",
"s_ZX19 severe\n",
"s_ZX20 severe\n",
"s_ZX21 severe\n",
"\n",
"[96 rows x 1 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_design"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set_log_level_info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create multi-omics container object"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"proteins data with (791, 70) measurements"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n",
"protein_data"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"compounds data with (220, 96) measurements"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n",
"compound_data"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"publication = 'Proteomic and Metabolomic Characterization of COVID-19 Patient Sera'\n",
"url = 'https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9'"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Multi-omics data container\n",
"- publication: Proteomic and Metabolomic Characterization of COVID-19 Patient Sera\n",
"- URL: https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9\n",
"- Views: 2 modalities\n",
"\t - proteins data with (791, 70) measurements\n",
"\t - compounds data with (220, 96) measurements"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mo = MultiOmicsData(publication=publication, url=url)\n",
"mo.add_data([protein_data, compound_data])\n",
"mo"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a mapping object"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The mapping object uses Reactome to map the different biological entities in the data:\n",
"- Transcripts (or genes) are connected to the proteins they encode\n",
"- Proteins and compounds are connected to reactions they're involved in\n",
"- Reactions are connected to pathways"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:40:03.534 | INFO | pyMultiOmics.functions:remove_dupes:385 - Removing 9 rows with duplicate identifiers\n",
"2022-03-24 14:40:03.535 | INFO | pyMultiOmics.functions:reactome_mapping:78 - There are 211 observed compound ids\n",
"2022-03-24 14:40:03.535 | INFO | pyMultiOmics.functions:reactome_mapping:81 - Mapping genes -> proteins\n",
"2022-03-24 14:40:06.215 | INFO | pyMultiOmics.functions:reactome_mapping:86 - Mapping proteins -> reactions\n",
"2022-03-24 14:40:07.489 | INFO | pyMultiOmics.functions:reactome_mapping:94 - Mapping compounds -> reactions\n",
"2022-03-24 14:40:11.539 | INFO | pyMultiOmics.functions:reactome_mapping:100 - Mapping reactions -> pathways\n",
"2022-03-24 14:40:12.580 | INFO | pyMultiOmics.functions:reactome_mapping:111 - Mapping reactions -> proteins\n",
"2022-03-24 14:40:18.631 | INFO | pyMultiOmics.functions:reactome_mapping:118 - Mapping reactions -> compounds\n",
"2022-03-24 14:40:21.469 | INFO | pyMultiOmics.functions:reactome_mapping:130 - Mapping proteins -> genes\n",
"2022-03-24 14:40:36.997 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: genes\n",
"2022-03-24 14:40:37.211 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: proteins\n",
"2022-03-24 14:40:37.901 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: compounds\n",
"2022-03-24 14:40:37.981 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: reactions\n",
"2022-03-24 14:40:38.092 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: pathways\n",
"2022-03-24 14:40:38.120 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: gene_proteins\n",
"2022-03-24 14:40:38.287 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: protein_reactions\n",
"2022-03-24 14:40:39.634 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: compound_reactions\n",
"2022-03-24 14:40:39.880 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: reaction_pathways\n",
"2022-03-24 14:40:40.005 | INFO | pyMultiOmics.mapping:build:51 - Created a multi-omics network with 19645 nodes and 80442 edges\n",
"2022-03-24 14:40:40.499 | INFO | pyMultiOmics.mapping:build:53 - node_counts = {'genes': 7054, 'proteins': 6590, 'compounds': 1078, 'reactions': 3926, 'pathways': 997}\n"
]
},
{
"data": {
"text/plain": [
"<pyMultiOmics.mapping.Mapper at 0x1762619f0>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m = Mapper(mo, HOMO_SAPIENS, metabolic_pathway_only=False)\n",
"m.build()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pyMultiOmics.mapping.Mapper at 0x1762619f0>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query mapping object"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below shows some example queries we can perform with the mapping object"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>reaction_id</th>\n",
" <th>reaction_name</th>\n",
" <th>num_proteins</th>\n",
" <th>num_compounds</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>R-HSA-114552</td>\n",
" <td>Thrombin-activated pars activate g12/13</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>R-HSA-114558</td>\n",
" <td>Thrombin-activated pars activate gq</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>R-HSA-1214188</td>\n",
" <td>Prdm9 trimethylates histone h3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>R-HSA-1605591</td>\n",
" <td>Glucosylceramidase cleaves the glucosidic bond...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>R-HSA-163432</td>\n",
" <td>Cholesterol ester + h2o -> cholesterol + fatty...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>R-HSA-9710490</td>\n",
" <td>The gsdme gene promoter is hypermethylated</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224</th>\n",
" <td>R-HSA-9733545</td>\n",
" <td>Bile salts and acids bind alb</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>225</th>\n",
" <td>R-HSA-9733960</td>\n",
" <td>Bile salts and acids dissociate from alb</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>226</th>\n",
" <td>R-HSA-977071</td>\n",
" <td>Sialyltransferase i can add sialic acid to the...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>227</th>\n",
" <td>R-HSA-977228</td>\n",
" <td>Sialyltransferase i can add sialic acid to the...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>228 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" reaction_id reaction_name \\\n",
"0 R-HSA-114552 Thrombin-activated pars activate g12/13 \n",
"1 R-HSA-114558 Thrombin-activated pars activate gq \n",
"2 R-HSA-1214188 Prdm9 trimethylates histone h3 \n",
"3 R-HSA-1605591 Glucosylceramidase cleaves the glucosidic bond... \n",
"4 R-HSA-163432 Cholesterol ester + h2o -> cholesterol + fatty... \n",
".. ... ... \n",
"223 R-HSA-9710490 The gsdme gene promoter is hypermethylated \n",
"224 R-HSA-9733545 Bile salts and acids bind alb \n",
"225 R-HSA-9733960 Bile salts and acids dissociate from alb \n",
"226 R-HSA-977071 Sialyltransferase i can add sialic acid to the... \n",
"227 R-HSA-977228 Sialyltransferase i can add sialic acid to the... \n",
"\n",
" num_proteins num_compounds \n",
"0 1 1 \n",
"1 1 1 \n",
"2 3 1 \n",
"3 1 1 \n",
"4 1 1 \n",
".. ... ... \n",
"223 3 1 \n",
"224 1 5 \n",
"225 1 5 \n",
"226 1 1 \n",
"227 1 1 \n",
"\n",
"[228 rows x 4 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reactions = m.get_nodes(types=REACTIONS)\n",
"\n",
"data = []\n",
"for reaction_id, reaction_data in reactions:\n",
" reaction_name = reaction_data['display_name']\n",
" proteins = m.get_connected(reaction_id, dest_type=PROTEINS, observed=True)\n",
" compounds = m.get_connected(reaction_id, dest_type=COMPOUNDS, observed=True)\n",
" \n",
" if len(proteins) > 0 and len(compounds) > 0:\n",
" row = [reaction_id, reaction_name, len(proteins), len(compounds)]\n",
" data.append(row)\n",
"\n",
"df = pd.DataFrame(data, columns=['reaction_id', 'reaction_name', 'num_proteins', 'num_compounds'])\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### List all entities connected to reaction R-HSA-194153"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ENSG00000108846</th>\n",
" <td>Abcc3</td>\n",
" <td>genes</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>O15438</th>\n",
" <td>O15438</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28865</th>\n",
" <td>Taurocholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30616</th>\n",
" <td>Atp(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17687</th>\n",
" <td>Glycocholic acid</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36274</th>\n",
" <td>Glycochenodeoxycholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16525</th>\n",
" <td>Taurochenodeoxycholic acid</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43474</th>\n",
" <td>Hydrogenphosphate</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>456216</th>\n",
" <td>Adp(3-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16359</th>\n",
" <td>Cholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9407</th>\n",
" <td>Taurochenodeoxycholate</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16755</th>\n",
" <td>Chenodeoxycholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>R-HSA-159418</th>\n",
" <td>Recycling of bile acids and salts</td>\n",
" <td>pathways</td>\n",
" <td>None</td>\n",
" <td>R-HSA-194153</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed \\\n",
"entity_id \n",
"ENSG00000108846 Abcc3 genes False \n",
"O15438 O15438 proteins False \n",
"28865 Taurocholic acid compounds True \n",
"30616 Atp(4-) compounds False \n",
"17687 Glycocholic acid compounds False \n",
"36274 Glycochenodeoxycholic acid compounds True \n",
"16525 Taurochenodeoxycholic acid compounds False \n",
"43474 Hydrogenphosphate compounds False \n",
"456216 Adp(3-) compounds False \n",
"16359 Cholic acid compounds True \n",
"9407 Taurochenodeoxycholate compounds True \n",
"16755 Chenodeoxycholic acid compounds True \n",
"R-HSA-159418 Recycling of bile acids and salts pathways None \n",
"\n",
" source_id \n",
"entity_id \n",
"ENSG00000108846 R-HSA-194153 \n",
"O15438 R-HSA-194153 \n",
"28865 R-HSA-194153 \n",
"30616 R-HSA-194153 \n",
"17687 R-HSA-194153 \n",
"36274 R-HSA-194153 \n",
"16525 R-HSA-194153 \n",
"43474 R-HSA-194153 \n",
"456216 R-HSA-194153 \n",
"16359 R-HSA-194153 \n",
"9407 R-HSA-194153 \n",
"16755 R-HSA-194153 \n",
"R-HSA-159418 R-HSA-194153 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = 'R-HSA-194153'\n",
"m.get_connected(query_id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Query the connections between proteins and compounds (through their shared reactions)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>18421</th>\n",
" <td>Superoxide</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17033</th>\n",
" <td>Biliverdin</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16990</th>\n",
" <td>Bilirubin ixalpha</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17627</th>\n",
" <td>Ferroheme b</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36144</th>\n",
" <td>Ferriheme b</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>456216</th>\n",
" <td>Adp(3-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30616</th>\n",
" <td>Atp(4-)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36274</th>\n",
" <td>Glycochenodeoxycholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16755</th>\n",
" <td>Chenodeoxycholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16359</th>\n",
" <td>Cholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28865</th>\n",
" <td>Taurocholic acid</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17687</th>\n",
" <td>Glycocholic acid</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9407</th>\n",
" <td>Taurochenodeoxycholate</td>\n",
" <td>compounds</td>\n",
" <td>True</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17504</th>\n",
" <td>1-o-acyl-sn-glycero-3-phosphocholine(1+)</td>\n",
" <td>compounds</td>\n",
" <td>False</td>\n",
" <td>P02768</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed \\\n",
"entity_id \n",
"18421 Superoxide compounds False \n",
"17033 Biliverdin compounds True \n",
"16990 Bilirubin ixalpha compounds True \n",
"17627 Ferroheme b compounds False \n",
"36144 Ferriheme b compounds False \n",
"456216 Adp(3-) compounds False \n",
"30616 Atp(4-) compounds False \n",
"36274 Glycochenodeoxycholic acid compounds True \n",
"16755 Chenodeoxycholic acid compounds True \n",
"16359 Cholic acid compounds True \n",
"28865 Taurocholic acid compounds True \n",
"17687 Glycocholic acid compounds False \n",
"9407 Taurochenodeoxycholate compounds True \n",
"17504 1-o-acyl-sn-glycero-3-phosphocholine(1+) compounds False \n",
"\n",
" source_id \n",
"entity_id \n",
"18421 P02768 \n",
"17033 P02768 \n",
"16990 P02768 \n",
"17627 P02768 \n",
"36144 P02768 \n",
"456216 P02768 \n",
"30616 P02768 \n",
"36274 P02768 \n",
"16755 P02768 \n",
"16359 P02768 \n",
"28865 P02768 \n",
"17687 P02768 \n",
"9407 P02768 \n",
"17504 P02768 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = 'P02768'\n",
"m.get_connected(query_id, dest_type=COMPOUNDS)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>display_name</th>\n",
" <th>data_type</th>\n",
" <th>observed</th>\n",
" <th>source_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>entity_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Q9NPD5</th>\n",
" <td>Q9NPD5</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P33527</th>\n",
" <td>P33527</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q9BYK8</th>\n",
" <td>Q9BYK8</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q96RS0</th>\n",
" <td>Q96RS0</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q92793</th>\n",
" <td>Q92793</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P08047</th>\n",
" <td>P08047</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P30043</th>\n",
" <td>P30043</td>\n",
" <td>proteins</td>\n",
" <td>True</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P53004</th>\n",
" <td>P53004</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>O75182</th>\n",
" <td>O75182</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Q96ST3</th>\n",
" <td>Q96ST3</td>\n",
" <td>proteins</td>\n",
" <td>False</td>\n",
" <td>16990</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>130 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" display_name data_type observed source_id\n",
"entity_id \n",
"Q9NPD5 Q9NPD5 proteins False 16990\n",
"P33527 P33527 proteins False 16990\n",
"Q9BYK8 Q9BYK8 proteins False 16990\n",
"Q96RS0 Q96RS0 proteins False 16990\n",
"Q92793 Q92793 proteins False 16990\n",
"... ... ... ... ...\n",
"P08047 P08047 proteins False 16990\n",
"P30043 P30043 proteins True 16990\n",
"P53004 P53004 proteins False 16990\n",
"O75182 O75182 proteins False 16990\n",
"Q96ST3 Q96ST3 proteins False 16990\n",
"\n",
"[130 rows x 4 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_id = '16990'\n",
"m.get_connected(query_id, dest_type=PROTEINS)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}