--- a +++ b/notebooks/mapping_covid.ipynb @@ -0,0 +1,1795 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pylab as plt\n", + "import matplotlib\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:40:01.676 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n", + "2022-03-24 14:40:01.676 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n", + "2022-03-24 14:40:01.677 | INFO | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n" + ] + } + ], + "source": [ + "sys.path.append('..')\n", + "\n", + "from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n", + "from pyMultiOmics.constants import HOMO_SAPIENS, PROTEINS, COMPOUNDS, REACTIONS\n", + "from pyMultiOmics.mapping import Mapper\n", + "from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Demonstration of pyMultiOmics mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the processed Covid data from [1]\n", + "\n", + "[1] [Shen, Bo, et al. \"Proteomic and metabolomic characterization of COVID-19 patient sera.\" Cell 182.1 (2020): 59-72.](https://www.sciencedirect.com/science/article/pii/S0092867420306279?casa_token=wKwWn9P4MK0AAAAA:v8z5MVnQ1ONrcatncCsigSDoxeOq2ZOcN4da9SofGDcpeDqrC76QK8yNKrKtVUrMWBBAntI8)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:40:02.949 | INFO | pyMultiOmics.common:download_file:59 - Downloading covid19_dualomics_data.zip\n", + "551KB [00:00, 8.89kKB/s] \n", + "2022-03-24 14:40:03.061 | INFO | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_dualomics_data.zip\n", + "100%|█████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 1191.03it/s]\n", + "2022-03-24 14:40:03.075 | INFO | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_dualomics_data.zip\n" + ] + } + ], + "source": [ + "url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_dualomics_data.zip'\n", + "out_file = download_file(url)\n", + "extract_zip_file(out_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_dualomics_data'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DATA_FOLDER = os.path.abspath(os.path.join('covid19_dualomics_data'))\n", + "DATA_FOLDER" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read proteomics data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Identifier')\n", + "protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>h_F1_131N</th>\n", + " <th>h_F1_131C</th>\n", + " <th>h_F1_132C</th>\n", + " <th>h_F2_131N</th>\n", + " <th>h_F2_131C</th>\n", + " <th>h_F2_132C</th>\n", + " <th>h_F3_131N</th>\n", + " <th>h_F3_131C</th>\n", + " <th>h_F3_132C</th>\n", + " <th>h_F4_131N</th>\n", + " <th>...</th>\n", + " <th>s_F3_128N</th>\n", + " <th>s_F3_128C</th>\n", + " <th>s_F3_129C</th>\n", + " <th>s_F4_128N</th>\n", + " <th>s_F4_128C</th>\n", + " <th>s_F5_128N</th>\n", + " <th>s_F5_128C</th>\n", + " <th>s_F6_128N</th>\n", + " <th>s_F6_128C</th>\n", + " <th>s_F6_133N</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Identifier</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>P04114</th>\n", + " <td>0.750</td>\n", + " <td>0.853</td>\n", + " <td>0.822</td>\n", + " <td>1.191</td>\n", + " <td>1.175</td>\n", + " <td>1.078</td>\n", + " <td>0.693</td>\n", + " <td>0.947</td>\n", + " <td>0.931</td>\n", + " <td>1.057</td>\n", + " <td>...</td>\n", + " <td>1.044</td>\n", + " <td>1.305</td>\n", + " <td>1.657</td>\n", + " <td>1.323</td>\n", + " <td>1.624</td>\n", + " <td>1.170</td>\n", + " <td>0.981</td>\n", + " <td>0.791</td>\n", + " <td>1.029</td>\n", + " <td>1.195</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P01024</th>\n", + " <td>0.782</td>\n", + " <td>1.057</td>\n", + " <td>0.994</td>\n", + " <td>0.864</td>\n", + " <td>0.917</td>\n", + " <td>0.790</td>\n", + " <td>0.823</td>\n", + " <td>1.152</td>\n", + " <td>0.816</td>\n", + " <td>0.920</td>\n", + " <td>...</td>\n", + " <td>1.100</td>\n", + " <td>0.986</td>\n", + " <td>1.114</td>\n", + " <td>1.210</td>\n", + " <td>1.289</td>\n", + " <td>1.104</td>\n", + " <td>1.111</td>\n", + " <td>1.007</td>\n", + " <td>1.159</td>\n", + " <td>0.979</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P02768</th>\n", + " <td>1.183</td>\n", + " <td>1.101</td>\n", + " <td>1.045</td>\n", + " <td>1.086</td>\n", + " <td>1.041</td>\n", + " <td>1.187</td>\n", + " <td>1.234</td>\n", + " <td>1.079</td>\n", + " <td>1.011</td>\n", + " <td>1.099</td>\n", + " <td>...</td>\n", + " <td>0.786</td>\n", + " <td>0.706</td>\n", + " <td>0.947</td>\n", + " <td>0.831</td>\n", + " <td>0.717</td>\n", + " <td>0.795</td>\n", + " <td>0.776</td>\n", + " <td>0.938</td>\n", + " <td>0.903</td>\n", + " <td>0.743</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P01023</th>\n", + " <td>1.066</td>\n", + " <td>1.278</td>\n", + " <td>0.959</td>\n", + " <td>0.811</td>\n", + " <td>0.789</td>\n", + " <td>0.931</td>\n", + " <td>0.971</td>\n", + " <td>0.769</td>\n", + " <td>1.011</td>\n", + " <td>0.866</td>\n", + " <td>...</td>\n", + " <td>0.817</td>\n", + " <td>0.728</td>\n", + " <td>0.861</td>\n", + " <td>0.798</td>\n", + " <td>0.751</td>\n", + " <td>0.917</td>\n", + " <td>0.809</td>\n", + " <td>0.780</td>\n", + " <td>1.195</td>\n", + " <td>0.706</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P02751</th>\n", + " <td>1.085</td>\n", + " <td>0.947</td>\n", + " <td>0.993</td>\n", + " <td>1.343</td>\n", + " <td>1.130</td>\n", + " <td>0.778</td>\n", + " <td>0.731</td>\n", + " <td>1.084</td>\n", + " <td>1.107</td>\n", + " <td>0.909</td>\n", + " <td>...</td>\n", + " <td>0.566</td>\n", + " <td>0.854</td>\n", + " <td>1.109</td>\n", + " <td>0.630</td>\n", + " <td>0.850</td>\n", + " <td>0.661</td>\n", + " <td>0.848</td>\n", + " <td>0.829</td>\n", + " <td>0.760</td>\n", + " <td>0.811</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 70 columns</p>\n", + "</div>" + ], + "text/plain": [ + " h_F1_131N h_F1_131C h_F1_132C h_F2_131N h_F2_131C h_F2_132C \\\n", + "Identifier \n", + "P04114 0.750 0.853 0.822 1.191 1.175 1.078 \n", + "P01024 0.782 1.057 0.994 0.864 0.917 0.790 \n", + "P02768 1.183 1.101 1.045 1.086 1.041 1.187 \n", + "P01023 1.066 1.278 0.959 0.811 0.789 0.931 \n", + "P02751 1.085 0.947 0.993 1.343 1.130 0.778 \n", + "\n", + " h_F3_131N h_F3_131C h_F3_132C h_F4_131N ... s_F3_128N \\\n", + "Identifier ... \n", + "P04114 0.693 0.947 0.931 1.057 ... 1.044 \n", + "P01024 0.823 1.152 0.816 0.920 ... 1.100 \n", + "P02768 1.234 1.079 1.011 1.099 ... 0.786 \n", + "P01023 0.971 0.769 1.011 0.866 ... 0.817 \n", + "P02751 0.731 1.084 1.107 0.909 ... 0.566 \n", + "\n", + " s_F3_128C s_F3_129C s_F4_128N s_F4_128C s_F5_128N s_F5_128C \\\n", + "Identifier \n", + "P04114 1.305 1.657 1.323 1.624 1.170 0.981 \n", + "P01024 0.986 1.114 1.210 1.289 1.104 1.111 \n", + "P02768 0.706 0.947 0.831 0.717 0.795 0.776 \n", + "P01023 0.728 0.861 0.798 0.751 0.917 0.809 \n", + "P02751 0.854 1.109 0.630 0.850 0.661 0.848 \n", + "\n", + " s_F6_128N s_F6_128C s_F6_133N \n", + "Identifier \n", + "P04114 0.791 1.029 1.195 \n", + "P01024 1.007 1.159 0.979 \n", + "P02768 0.938 0.903 0.743 \n", + "P01023 0.780 1.195 0.706 \n", + "P02751 0.829 0.760 0.811 \n", + "\n", + "[5 rows x 70 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>group</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sample</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>h_F1_131N</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_F1_131C</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_F1_132C</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_F2_131N</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_F2_131C</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_F5_128N</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_F5_128C</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_F6_128N</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_F6_128C</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_F6_133N</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>70 rows × 1 columns</p>\n", + "</div>" + ], + "text/plain": [ + " group\n", + "sample \n", + "h_F1_131N healthy\n", + "h_F1_131C healthy\n", + "h_F1_132C healthy\n", + "h_F2_131N healthy\n", + "h_F2_131C healthy\n", + "... ...\n", + "s_F5_128N severe\n", + "s_F5_128C severe\n", + "s_F6_128N severe\n", + "s_F6_128C severe\n", + "s_F6_133N severe\n", + "\n", + "[70 rows x 1 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_design" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read metabolomics data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data.csv'), index_col='Identifier')\n", + "compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>h_jkdz1</th>\n", + " <th>h_jkdz2</th>\n", + " <th>h_jkdz3</th>\n", + " <th>h_jkdz4</th>\n", + " <th>h_jkdz5</th>\n", + " <th>h_jkdz6</th>\n", + " <th>h_jkdz7</th>\n", + " <th>h_jkdz8</th>\n", + " <th>h_jkdz9</th>\n", + " <th>h_jkdz10</th>\n", + " <th>...</th>\n", + " <th>s_ZX12</th>\n", + " <th>s_ZX13</th>\n", + " <th>s_ZX14</th>\n", + " <th>s_ZX15</th>\n", + " <th>s_ZX16</th>\n", + " <th>s_ZX17</th>\n", + " <th>s_ZX18</th>\n", + " <th>s_ZX19</th>\n", + " <th>s_ZX20</th>\n", + " <th>s_ZX21</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Identifier</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>C21482</th>\n", + " <td>19413052.00</td>\n", + " <td>6.381812e+06</td>\n", + " <td>9.748316e+06</td>\n", + " <td>5.326872e+06</td>\n", + " <td>1.998072e+07</td>\n", + " <td>3.580375e+06</td>\n", + " <td>8.256121e+06</td>\n", + " <td>8.079382e+06</td>\n", + " <td>1.559659e+07</td>\n", + " <td>1.520363e+07</td>\n", + " <td>...</td>\n", + " <td>1.904349e+06</td>\n", + " <td>3.226016e+06</td>\n", + " <td>7.378147e+05</td>\n", + " <td>2.817698e+06</td>\n", + " <td>3.329101e+06</td>\n", + " <td>3206752.75</td>\n", + " <td>1.466174e+06</td>\n", + " <td>2.779301e+06</td>\n", + " <td>2.117668e+06</td>\n", + " <td>2.184310e+06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C18218</th>\n", + " <td>2711915.25</td>\n", + " <td>2.056393e+06</td>\n", + " <td>1.445594e+06</td>\n", + " <td>2.038765e+06</td>\n", + " <td>2.536996e+06</td>\n", + " <td>2.638198e+06</td>\n", + " <td>2.285757e+06</td>\n", + " <td>1.973140e+06</td>\n", + " <td>2.015425e+06</td>\n", + " <td>2.290842e+06</td>\n", + " <td>...</td>\n", + " <td>1.409720e+06</td>\n", + " <td>1.413307e+06</td>\n", + " <td>3.218834e+06</td>\n", + " <td>1.602131e+06</td>\n", + " <td>1.317878e+06</td>\n", + " <td>2930312.75</td>\n", + " <td>1.168094e+06</td>\n", + " <td>2.946776e+06</td>\n", + " <td>1.417311e+06</td>\n", + " <td>1.474166e+06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C05127</th>\n", + " <td>87727.25</td>\n", + " <td>NaN</td>\n", + " <td>9.238706e+04</td>\n", + " <td>NaN</td>\n", + " <td>1.597879e+05</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>9.055130e+04</td>\n", + " <td>1.214114e+05</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1.382788e+05</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C01152</th>\n", + " <td>58832828.00</td>\n", + " <td>5.843934e+07</td>\n", + " <td>5.552133e+07</td>\n", + " <td>4.516214e+07</td>\n", + " <td>5.478952e+07</td>\n", + " <td>3.941259e+07</td>\n", + " <td>2.987876e+07</td>\n", + " <td>6.751726e+07</td>\n", + " <td>4.666031e+07</td>\n", + " <td>9.118524e+07</td>\n", + " <td>...</td>\n", + " <td>2.881314e+07</td>\n", + " <td>3.164358e+07</td>\n", + " <td>2.538767e+07</td>\n", + " <td>3.307604e+07</td>\n", + " <td>3.915698e+07</td>\n", + " <td>24400592.00</td>\n", + " <td>2.593375e+07</td>\n", + " <td>6.413868e+07</td>\n", + " <td>4.020588e+07</td>\n", + " <td>4.904488e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C02918</th>\n", + " <td>NaN</td>\n", + " <td>1.815549e+05</td>\n", + " <td>2.240392e+05</td>\n", + " <td>1.609397e+05</td>\n", + " <td>3.206194e+05</td>\n", + " <td>7.176557e+05</td>\n", + " <td>3.268182e+05</td>\n", + " <td>5.135810e+05</td>\n", + " <td>2.734582e+05</td>\n", + " <td>NaN</td>\n", + " <td>...</td>\n", + " <td>3.337245e+05</td>\n", + " <td>NaN</td>\n", + " <td>4.347152e+05</td>\n", + " <td>3.532118e+04</td>\n", + " <td>NaN</td>\n", + " <td>655827.25</td>\n", + " <td>8.359706e+05</td>\n", + " <td>4.034381e+06</td>\n", + " <td>2.839358e+05</td>\n", + " <td>8.062160e+04</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 96 columns</p>\n", + "</div>" + ], + "text/plain": [ + " h_jkdz1 h_jkdz2 h_jkdz3 h_jkdz4 \\\n", + "Identifier \n", + "C21482 19413052.00 6.381812e+06 9.748316e+06 5.326872e+06 \n", + "C18218 2711915.25 2.056393e+06 1.445594e+06 2.038765e+06 \n", + "C05127 87727.25 NaN 9.238706e+04 NaN \n", + "C01152 58832828.00 5.843934e+07 5.552133e+07 4.516214e+07 \n", + "C02918 NaN 1.815549e+05 2.240392e+05 1.609397e+05 \n", + "\n", + " h_jkdz5 h_jkdz6 h_jkdz7 h_jkdz8 \\\n", + "Identifier \n", + "C21482 1.998072e+07 3.580375e+06 8.256121e+06 8.079382e+06 \n", + "C18218 2.536996e+06 2.638198e+06 2.285757e+06 1.973140e+06 \n", + "C05127 1.597879e+05 NaN NaN NaN \n", + "C01152 5.478952e+07 3.941259e+07 2.987876e+07 6.751726e+07 \n", + "C02918 3.206194e+05 7.176557e+05 3.268182e+05 5.135810e+05 \n", + "\n", + " h_jkdz9 h_jkdz10 ... s_ZX12 s_ZX13 \\\n", + "Identifier ... \n", + "C21482 1.559659e+07 1.520363e+07 ... 1.904349e+06 3.226016e+06 \n", + "C18218 2.015425e+06 2.290842e+06 ... 1.409720e+06 1.413307e+06 \n", + "C05127 9.055130e+04 1.214114e+05 ... NaN NaN \n", + "C01152 4.666031e+07 9.118524e+07 ... 2.881314e+07 3.164358e+07 \n", + "C02918 2.734582e+05 NaN ... 3.337245e+05 NaN \n", + "\n", + " s_ZX14 s_ZX15 s_ZX16 s_ZX17 \\\n", + "Identifier \n", + "C21482 7.378147e+05 2.817698e+06 3.329101e+06 3206752.75 \n", + "C18218 3.218834e+06 1.602131e+06 1.317878e+06 2930312.75 \n", + "C05127 NaN NaN NaN NaN \n", + "C01152 2.538767e+07 3.307604e+07 3.915698e+07 24400592.00 \n", + "C02918 4.347152e+05 3.532118e+04 NaN 655827.25 \n", + "\n", + " s_ZX18 s_ZX19 s_ZX20 s_ZX21 \n", + "Identifier \n", + "C21482 1.466174e+06 2.779301e+06 2.117668e+06 2.184310e+06 \n", + "C18218 1.168094e+06 2.946776e+06 1.417311e+06 1.474166e+06 \n", + "C05127 NaN 1.382788e+05 NaN NaN \n", + "C01152 2.593375e+07 6.413868e+07 4.020588e+07 4.904488e+07 \n", + "C02918 8.359706e+05 4.034381e+06 2.839358e+05 8.062160e+04 \n", + "\n", + "[5 rows x 96 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>group</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sample</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>h_jkdz1</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_jkdz2</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_jkdz3</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_jkdz4</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>h_jkdz5</th>\n", + " <td>healthy</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_ZX17</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_ZX18</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_ZX19</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_ZX20</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " <tr>\n", + " <th>s_ZX21</th>\n", + " <td>severe</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>96 rows × 1 columns</p>\n", + "</div>" + ], + "text/plain": [ + " group\n", + "sample \n", + "h_jkdz1 healthy\n", + "h_jkdz2 healthy\n", + "h_jkdz3 healthy\n", + "h_jkdz4 healthy\n", + "h_jkdz5 healthy\n", + "... ...\n", + "s_ZX17 severe\n", + "s_ZX18 severe\n", + "s_ZX19 severe\n", + "s_ZX20 severe\n", + "s_ZX21 severe\n", + "\n", + "[96 rows x 1 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_design" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set_log_level_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create multi-omics container object" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "proteins data with (791, 70) measurements" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n", + "protein_data" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "compounds data with (220, 96) measurements" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n", + "compound_data" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "publication = 'Proteomic and Metabolomic Characterization of COVID-19 Patient Sera'\n", + "url = 'https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9'" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Multi-omics data container\n", + "- publication: Proteomic and Metabolomic Characterization of COVID-19 Patient Sera\n", + "- URL: https://www.cell.com/cell/fulltext/S0092-8674(20)30627-9\n", + "- Views: 2 modalities\n", + "\t - proteins data with (791, 70) measurements\n", + "\t - compounds data with (220, 96) measurements" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mo = MultiOmicsData(publication=publication, url=url)\n", + "mo.add_data([protein_data, compound_data])\n", + "mo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a mapping object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The mapping object uses Reactome to map the different biological entities in the data:\n", + "- Transcripts (or genes) are connected to the proteins they encode\n", + "- Proteins and compounds are connected to reactions they're involved in\n", + "- Reactions are connected to pathways" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:40:03.534 | INFO | pyMultiOmics.functions:remove_dupes:385 - Removing 9 rows with duplicate identifiers\n", + "2022-03-24 14:40:03.535 | INFO | pyMultiOmics.functions:reactome_mapping:78 - There are 211 observed compound ids\n", + "2022-03-24 14:40:03.535 | INFO | pyMultiOmics.functions:reactome_mapping:81 - Mapping genes -> proteins\n", + "2022-03-24 14:40:06.215 | INFO | pyMultiOmics.functions:reactome_mapping:86 - Mapping proteins -> reactions\n", + "2022-03-24 14:40:07.489 | INFO | pyMultiOmics.functions:reactome_mapping:94 - Mapping compounds -> reactions\n", + "2022-03-24 14:40:11.539 | INFO | pyMultiOmics.functions:reactome_mapping:100 - Mapping reactions -> pathways\n", + "2022-03-24 14:40:12.580 | INFO | pyMultiOmics.functions:reactome_mapping:111 - Mapping reactions -> proteins\n", + "2022-03-24 14:40:18.631 | INFO | pyMultiOmics.functions:reactome_mapping:118 - Mapping reactions -> compounds\n", + "2022-03-24 14:40:21.469 | INFO | pyMultiOmics.functions:reactome_mapping:130 - Mapping proteins -> genes\n", + "2022-03-24 14:40:36.997 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: genes\n", + "2022-03-24 14:40:37.211 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: proteins\n", + "2022-03-24 14:40:37.901 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: compounds\n", + "2022-03-24 14:40:37.981 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: reactions\n", + "2022-03-24 14:40:38.092 | INFO | pyMultiOmics.mapping:_add_nodes:153 - Processing nodes: pathways\n", + "2022-03-24 14:40:38.120 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: gene_proteins\n", + "2022-03-24 14:40:38.287 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: protein_reactions\n", + "2022-03-24 14:40:39.634 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: compound_reactions\n", + "2022-03-24 14:40:39.880 | INFO | pyMultiOmics.mapping:_add_edges:201 - Processing edges: reaction_pathways\n", + "2022-03-24 14:40:40.005 | INFO | pyMultiOmics.mapping:build:51 - Created a multi-omics network with 19645 nodes and 80442 edges\n", + "2022-03-24 14:40:40.499 | INFO | pyMultiOmics.mapping:build:53 - node_counts = {'genes': 7054, 'proteins': 6590, 'compounds': 1078, 'reactions': 3926, 'pathways': 997}\n" + ] + }, + { + "data": { + "text/plain": [ + "<pyMultiOmics.mapping.Mapper at 0x1762619f0>" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m = Mapper(mo, HOMO_SAPIENS, metabolic_pathway_only=False)\n", + "m.build()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<pyMultiOmics.mapping.Mapper at 0x1762619f0>" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query mapping object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below shows some example queries we can perform with the mapping object" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>reaction_id</th>\n", + " <th>reaction_name</th>\n", + " <th>num_proteins</th>\n", + " <th>num_compounds</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>R-HSA-114552</td>\n", + " <td>Thrombin-activated pars activate g12/13</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>R-HSA-114558</td>\n", + " <td>Thrombin-activated pars activate gq</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>R-HSA-1214188</td>\n", + " <td>Prdm9 trimethylates histone h3</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>R-HSA-1605591</td>\n", + " <td>Glucosylceramidase cleaves the glucosidic bond...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>R-HSA-163432</td>\n", + " <td>Cholesterol ester + h2o -> cholesterol + fatty...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>223</th>\n", + " <td>R-HSA-9710490</td>\n", + " <td>The gsdme gene promoter is hypermethylated</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>224</th>\n", + " <td>R-HSA-9733545</td>\n", + " <td>Bile salts and acids bind alb</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>225</th>\n", + " <td>R-HSA-9733960</td>\n", + " <td>Bile salts and acids dissociate from alb</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>226</th>\n", + " <td>R-HSA-977071</td>\n", + " <td>Sialyltransferase i can add sialic acid to the...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>227</th>\n", + " <td>R-HSA-977228</td>\n", + " <td>Sialyltransferase i can add sialic acid to the...</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>228 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " reaction_id reaction_name \\\n", + "0 R-HSA-114552 Thrombin-activated pars activate g12/13 \n", + "1 R-HSA-114558 Thrombin-activated pars activate gq \n", + "2 R-HSA-1214188 Prdm9 trimethylates histone h3 \n", + "3 R-HSA-1605591 Glucosylceramidase cleaves the glucosidic bond... \n", + "4 R-HSA-163432 Cholesterol ester + h2o -> cholesterol + fatty... \n", + ".. ... ... \n", + "223 R-HSA-9710490 The gsdme gene promoter is hypermethylated \n", + "224 R-HSA-9733545 Bile salts and acids bind alb \n", + "225 R-HSA-9733960 Bile salts and acids dissociate from alb \n", + "226 R-HSA-977071 Sialyltransferase i can add sialic acid to the... \n", + "227 R-HSA-977228 Sialyltransferase i can add sialic acid to the... \n", + "\n", + " num_proteins num_compounds \n", + "0 1 1 \n", + "1 1 1 \n", + "2 3 1 \n", + "3 1 1 \n", + "4 1 1 \n", + ".. ... ... \n", + "223 3 1 \n", + "224 1 5 \n", + "225 1 5 \n", + "226 1 1 \n", + "227 1 1 \n", + "\n", + "[228 rows x 4 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reactions = m.get_nodes(types=REACTIONS)\n", + "\n", + "data = []\n", + "for reaction_id, reaction_data in reactions:\n", + " reaction_name = reaction_data['display_name']\n", + " proteins = m.get_connected(reaction_id, dest_type=PROTEINS, observed=True)\n", + " compounds = m.get_connected(reaction_id, dest_type=COMPOUNDS, observed=True)\n", + " \n", + " if len(proteins) > 0 and len(compounds) > 0:\n", + " row = [reaction_id, reaction_name, len(proteins), len(compounds)]\n", + " data.append(row)\n", + "\n", + "df = pd.DataFrame(data, columns=['reaction_id', 'reaction_name', 'num_proteins', 'num_compounds'])\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### List all entities connected to reaction R-HSA-194153" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>ENSG00000108846</th>\n", + " <td>Abcc3</td>\n", + " <td>genes</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>O15438</th>\n", + " <td>O15438</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28865</th>\n", + " <td>Taurocholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30616</th>\n", + " <td>Atp(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17687</th>\n", + " <td>Glycocholic acid</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36274</th>\n", + " <td>Glycochenodeoxycholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16525</th>\n", + " <td>Taurochenodeoxycholic acid</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43474</th>\n", + " <td>Hydrogenphosphate</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>456216</th>\n", + " <td>Adp(3-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16359</th>\n", + " <td>Cholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9407</th>\n", + " <td>Taurochenodeoxycholate</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16755</th>\n", + " <td>Chenodeoxycholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>R-HSA-159418</th>\n", + " <td>Recycling of bile acids and salts</td>\n", + " <td>pathways</td>\n", + " <td>None</td>\n", + " <td>R-HSA-194153</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed \\\n", + "entity_id \n", + "ENSG00000108846 Abcc3 genes False \n", + "O15438 O15438 proteins False \n", + "28865 Taurocholic acid compounds True \n", + "30616 Atp(4-) compounds False \n", + "17687 Glycocholic acid compounds False \n", + "36274 Glycochenodeoxycholic acid compounds True \n", + "16525 Taurochenodeoxycholic acid compounds False \n", + "43474 Hydrogenphosphate compounds False \n", + "456216 Adp(3-) compounds False \n", + "16359 Cholic acid compounds True \n", + "9407 Taurochenodeoxycholate compounds True \n", + "16755 Chenodeoxycholic acid compounds True \n", + "R-HSA-159418 Recycling of bile acids and salts pathways None \n", + "\n", + " source_id \n", + "entity_id \n", + "ENSG00000108846 R-HSA-194153 \n", + "O15438 R-HSA-194153 \n", + "28865 R-HSA-194153 \n", + "30616 R-HSA-194153 \n", + "17687 R-HSA-194153 \n", + "36274 R-HSA-194153 \n", + "16525 R-HSA-194153 \n", + "43474 R-HSA-194153 \n", + "456216 R-HSA-194153 \n", + "16359 R-HSA-194153 \n", + "9407 R-HSA-194153 \n", + "16755 R-HSA-194153 \n", + "R-HSA-159418 R-HSA-194153 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = 'R-HSA-194153'\n", + "m.get_connected(query_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Query the connections between proteins and compounds (through their shared reactions)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>18421</th>\n", + " <td>Superoxide</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17033</th>\n", + " <td>Biliverdin</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16990</th>\n", + " <td>Bilirubin ixalpha</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17627</th>\n", + " <td>Ferroheme b</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36144</th>\n", + " <td>Ferriheme b</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>456216</th>\n", + " <td>Adp(3-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30616</th>\n", + " <td>Atp(4-)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36274</th>\n", + " <td>Glycochenodeoxycholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16755</th>\n", + " <td>Chenodeoxycholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16359</th>\n", + " <td>Cholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28865</th>\n", + " <td>Taurocholic acid</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17687</th>\n", + " <td>Glycocholic acid</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9407</th>\n", + " <td>Taurochenodeoxycholate</td>\n", + " <td>compounds</td>\n", + " <td>True</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17504</th>\n", + " <td>1-o-acyl-sn-glycero-3-phosphocholine(1+)</td>\n", + " <td>compounds</td>\n", + " <td>False</td>\n", + " <td>P02768</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed \\\n", + "entity_id \n", + "18421 Superoxide compounds False \n", + "17033 Biliverdin compounds True \n", + "16990 Bilirubin ixalpha compounds True \n", + "17627 Ferroheme b compounds False \n", + "36144 Ferriheme b compounds False \n", + "456216 Adp(3-) compounds False \n", + "30616 Atp(4-) compounds False \n", + "36274 Glycochenodeoxycholic acid compounds True \n", + "16755 Chenodeoxycholic acid compounds True \n", + "16359 Cholic acid compounds True \n", + "28865 Taurocholic acid compounds True \n", + "17687 Glycocholic acid compounds False \n", + "9407 Taurochenodeoxycholate compounds True \n", + "17504 1-o-acyl-sn-glycero-3-phosphocholine(1+) compounds False \n", + "\n", + " source_id \n", + "entity_id \n", + "18421 P02768 \n", + "17033 P02768 \n", + "16990 P02768 \n", + "17627 P02768 \n", + "36144 P02768 \n", + "456216 P02768 \n", + "30616 P02768 \n", + "36274 P02768 \n", + "16755 P02768 \n", + "16359 P02768 \n", + "28865 P02768 \n", + "17687 P02768 \n", + "9407 P02768 \n", + "17504 P02768 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = 'P02768'\n", + "m.get_connected(query_id, dest_type=COMPOUNDS)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>display_name</th>\n", + " <th>data_type</th>\n", + " <th>observed</th>\n", + " <th>source_id</th>\n", + " </tr>\n", + " <tr>\n", + " <th>entity_id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Q9NPD5</th>\n", + " <td>Q9NPD5</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P33527</th>\n", + " <td>P33527</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q9BYK8</th>\n", + " <td>Q9BYK8</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q96RS0</th>\n", + " <td>Q96RS0</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q92793</th>\n", + " <td>Q92793</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P08047</th>\n", + " <td>P08047</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P30043</th>\n", + " <td>P30043</td>\n", + " <td>proteins</td>\n", + " <td>True</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P53004</th>\n", + " <td>P53004</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>O75182</th>\n", + " <td>O75182</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Q96ST3</th>\n", + " <td>Q96ST3</td>\n", + " <td>proteins</td>\n", + " <td>False</td>\n", + " <td>16990</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>130 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " display_name data_type observed source_id\n", + "entity_id \n", + "Q9NPD5 Q9NPD5 proteins False 16990\n", + "P33527 P33527 proteins False 16990\n", + "Q9BYK8 Q9BYK8 proteins False 16990\n", + "Q96RS0 Q96RS0 proteins False 16990\n", + "Q92793 Q92793 proteins False 16990\n", + "... ... ... ... ...\n", + "P08047 P08047 proteins False 16990\n", + "P30043 P30043 proteins True 16990\n", + "P53004 P53004 proteins False 16990\n", + "O75182 O75182 proteins False 16990\n", + "Q96ST3 Q96ST3 proteins False 16990\n", + "\n", + "[130 rows x 4 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_id = '16990'\n", + "m.get_connected(query_id, dest_type=PROTEINS)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}