pyMultiOmics / Git / [7d5693] /notebooks/base

Models:
AlyssaS/
pyMultiOmics
Downloads: 1
[7d5693]: / notebooks / base_test.ipynb
History
Download this file
875 lines (874 with data), 29.0 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pylab as plt\n",
    "import matplotlib\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['/Users/joewandy/Work/git/pyMultiOmics/notebooks', '/opt/anaconda3/envs/pyMultiOmics/lib/python310.zip', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10/lib-dynload', '', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10/site-packages']\n"
     ]
    }
   ],
   "source": [
    "os.getcwd()\n",
    "print(sys.path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-03-24 14:39:13.236 | WARNING  | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n",
      "2022-03-24 14:39:13.236 | WARNING  | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n",
      "2022-03-24 14:39:13.236 | INFO     | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n"
     ]
    }
   ],
   "source": [
    "sys.path.append('..')\n",
    "\n",
    "from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n",
    "from pyMultiOmics.mapping import Mapper\n",
    "from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file\n",
    "from pyMultiOmics.constants import IDENTIFIER_COL, SAMPLE_COL\n",
    "from pyMultiOmics.constants import GENES, PROTEINS, COMPOUNDS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demonstration of pyMultiOmics base classes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "### Load the multi-omics COVID-19 data from [1]\n",
    "\n",
    "[1] [Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.](https://www.sciencedirect.com/science/article/pii/S2405471220303719)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-03-24 14:39:14.454 | INFO     | pyMultiOmics.common:download_file:59 - Downloading covid19_multiomics_data.zip\n",
      "2.36kKB [00:00, 15.6kKB/s]                                                                                              \n",
      "2022-03-24 14:39:14.634 | INFO     | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_multiomics_data.zip\n",
      "100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 256.77it/s]\n",
      "2022-03-24 14:39:14.692 | INFO     | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_multiomics_data.zip\n"
     ]
    }
   ],
   "source": [
    "url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_multiomics_data.zip'\n",
    "out_file = download_file(url)\n",
    "extract_zip_file(out_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_multiomics_data'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DATA_FOLDER = os.path.abspath(os.path.join('covid19_multiomics_data'))\n",
    "DATA_FOLDER"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read the individual dataframes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "transcript_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_genes_include_p.csv'), index_col='Identifier')\n",
    "transcript_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_genes_design.csv'), index_col='sample')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_protein_include_p.csv'), index_col='Identifier')\n",
    "protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_protein_design.csv'), index_col='sample')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_compounds_include_p.csv'), index_col='Identifier')\n",
    "compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_compounds_design.csv'), index_col='sample')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create single omics data container objects"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some data cleaning is done upon loading in `SingleOmicsData`:\n",
    "- Duplicate values are removed from the rows and columns\n",
    "- Duplicate sample names are removed\n",
    "- Measurements with missing metadata are removed\n",
    "- Metadata with missing measurements are removed too"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-03-24 14:39:14.981 | WARNING  | pyMultiOmics.base:_keep_common_samples:126 - Dropped 4 columns from sample metadata due to missing measurements\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "genes data with (13028, 125) measurements"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "transcript_data = SingleOmicsData(GENES, transcript_df, transcript_design)\n",
    "transcript_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "proteins data with (1499, 129) measurements"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n",
    "protein_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "compounds data with (46, 129) measurements"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n",
    "compound_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Getting values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can get data out of the container by using the `data_df` and `design_df` attributes.\n",
    "\n",
    "Notice that after the data is loaded and cleaned, the number of samples in the measurement dataframe (`data_df`) is the same as the number of rows in the sample metadata dataframe (`design_df`)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sample_1</th>\n",
       "      <th>sample_2</th>\n",
       "      <th>sample_3</th>\n",
       "      <th>sample_4</th>\n",
       "      <th>sample_5</th>\n",
       "      <th>sample_6</th>\n",
       "      <th>sample_7</th>\n",
       "      <th>sample_8</th>\n",
       "      <th>sample_9</th>\n",
       "      <th>sample_10</th>\n",
       "      <th>...</th>\n",
       "      <th>sample_120</th>\n",
       "      <th>sample_121</th>\n",
       "      <th>sample_122</th>\n",
       "      <th>sample_123</th>\n",
       "      <th>sample_124</th>\n",
       "      <th>sample_125</th>\n",
       "      <th>sample_126</th>\n",
       "      <th>sample_127</th>\n",
       "      <th>sample_128</th>\n",
       "      <th>sample_129</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Identifier</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A0A024R6I7</th>\n",
       "      <td>37.995543</td>\n",
       "      <td>37.353091</td>\n",
       "      <td>37.527875</td>\n",
       "      <td>37.673128</td>\n",
       "      <td>37.983542</td>\n",
       "      <td>37.489959</td>\n",
       "      <td>37.615303</td>\n",
       "      <td>37.534702</td>\n",
       "      <td>37.525762</td>\n",
       "      <td>37.844902</td>\n",
       "      <td>...</td>\n",
       "      <td>37.730180</td>\n",
       "      <td>38.082377</td>\n",
       "      <td>37.661959</td>\n",
       "      <td>37.195828</td>\n",
       "      <td>37.365447</td>\n",
       "      <td>37.564109</td>\n",
       "      <td>37.683884</td>\n",
       "      <td>37.282698</td>\n",
       "      <td>37.655909</td>\n",
       "      <td>37.754401</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A0A0G2JRN3</th>\n",
       "      <td>37.995543</td>\n",
       "      <td>37.353091</td>\n",
       "      <td>37.527875</td>\n",
       "      <td>37.673128</td>\n",
       "      <td>37.983542</td>\n",
       "      <td>37.489959</td>\n",
       "      <td>37.615303</td>\n",
       "      <td>37.534702</td>\n",
       "      <td>37.525762</td>\n",
       "      <td>37.844902</td>\n",
       "      <td>...</td>\n",
       "      <td>37.730180</td>\n",
       "      <td>38.082377</td>\n",
       "      <td>37.661959</td>\n",
       "      <td>37.195828</td>\n",
       "      <td>37.365447</td>\n",
       "      <td>37.564109</td>\n",
       "      <td>37.683884</td>\n",
       "      <td>37.282698</td>\n",
       "      <td>37.655909</td>\n",
       "      <td>37.754401</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A0A075B6H9</th>\n",
       "      <td>27.176361</td>\n",
       "      <td>29.419228</td>\n",
       "      <td>27.174171</td>\n",
       "      <td>28.879702</td>\n",
       "      <td>27.262485</td>\n",
       "      <td>29.535232</td>\n",
       "      <td>27.657446</td>\n",
       "      <td>30.323779</td>\n",
       "      <td>28.194885</td>\n",
       "      <td>29.253091</td>\n",
       "      <td>...</td>\n",
       "      <td>27.731627</td>\n",
       "      <td>27.621776</td>\n",
       "      <td>27.249976</td>\n",
       "      <td>27.403813</td>\n",
       "      <td>27.133006</td>\n",
       "      <td>27.793462</td>\n",
       "      <td>26.876193</td>\n",
       "      <td>27.715091</td>\n",
       "      <td>26.965212</td>\n",
       "      <td>28.376997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A0A075B6I0</th>\n",
       "      <td>28.294477</td>\n",
       "      <td>29.048510</td>\n",
       "      <td>28.953215</td>\n",
       "      <td>29.527460</td>\n",
       "      <td>28.605867</td>\n",
       "      <td>29.251895</td>\n",
       "      <td>26.943355</td>\n",
       "      <td>29.914134</td>\n",
       "      <td>28.947290</td>\n",
       "      <td>29.108465</td>\n",
       "      <td>...</td>\n",
       "      <td>28.977874</td>\n",
       "      <td>27.961132</td>\n",
       "      <td>29.085045</td>\n",
       "      <td>28.319069</td>\n",
       "      <td>29.418182</td>\n",
       "      <td>29.003245</td>\n",
       "      <td>28.002763</td>\n",
       "      <td>28.660543</td>\n",
       "      <td>28.665092</td>\n",
       "      <td>29.839541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A0A075B6I4</th>\n",
       "      <td>24.783368</td>\n",
       "      <td>27.573331</td>\n",
       "      <td>25.619161</td>\n",
       "      <td>23.893562</td>\n",
       "      <td>18.440251</td>\n",
       "      <td>25.033513</td>\n",
       "      <td>19.384221</td>\n",
       "      <td>22.054705</td>\n",
       "      <td>27.642444</td>\n",
       "      <td>27.163654</td>\n",
       "      <td>...</td>\n",
       "      <td>27.365780</td>\n",
       "      <td>26.182814</td>\n",
       "      <td>27.649039</td>\n",
       "      <td>25.888229</td>\n",
       "      <td>26.724164</td>\n",
       "      <td>26.577443</td>\n",
       "      <td>27.996742</td>\n",
       "      <td>21.713502</td>\n",
       "      <td>19.659693</td>\n",
       "      <td>20.145198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>V9GYE3</th>\n",
       "      <td>30.157740</td>\n",
       "      <td>29.011826</td>\n",
       "      <td>30.755732</td>\n",
       "      <td>31.037739</td>\n",
       "      <td>30.690791</td>\n",
       "      <td>29.544621</td>\n",
       "      <td>29.991859</td>\n",
       "      <td>29.005067</td>\n",
       "      <td>28.611893</td>\n",
       "      <td>29.039570</td>\n",
       "      <td>...</td>\n",
       "      <td>30.307260</td>\n",
       "      <td>30.378135</td>\n",
       "      <td>29.925498</td>\n",
       "      <td>30.471018</td>\n",
       "      <td>31.777999</td>\n",
       "      <td>31.486205</td>\n",
       "      <td>31.822441</td>\n",
       "      <td>31.144855</td>\n",
       "      <td>32.149526</td>\n",
       "      <td>28.001153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>V9GYG9</th>\n",
       "      <td>30.157740</td>\n",
       "      <td>29.011826</td>\n",
       "      <td>30.755732</td>\n",
       "      <td>31.037739</td>\n",
       "      <td>30.690791</td>\n",
       "      <td>29.544621</td>\n",
       "      <td>29.991859</td>\n",
       "      <td>29.005067</td>\n",
       "      <td>28.611893</td>\n",
       "      <td>29.039570</td>\n",
       "      <td>...</td>\n",
       "      <td>30.307260</td>\n",
       "      <td>30.378135</td>\n",
       "      <td>29.925498</td>\n",
       "      <td>30.471018</td>\n",
       "      <td>31.777999</td>\n",
       "      <td>31.486205</td>\n",
       "      <td>31.822441</td>\n",
       "      <td>31.144855</td>\n",
       "      <td>32.149526</td>\n",
       "      <td>28.001153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>X6R8F3</th>\n",
       "      <td>22.821315</td>\n",
       "      <td>20.147061</td>\n",
       "      <td>21.002496</td>\n",
       "      <td>19.830364</td>\n",
       "      <td>18.213300</td>\n",
       "      <td>22.831885</td>\n",
       "      <td>21.686202</td>\n",
       "      <td>23.468367</td>\n",
       "      <td>20.292526</td>\n",
       "      <td>24.971321</td>\n",
       "      <td>...</td>\n",
       "      <td>20.178151</td>\n",
       "      <td>21.893118</td>\n",
       "      <td>24.072429</td>\n",
       "      <td>18.115715</td>\n",
       "      <td>20.282632</td>\n",
       "      <td>21.229028</td>\n",
       "      <td>23.293898</td>\n",
       "      <td>20.400931</td>\n",
       "      <td>18.845678</td>\n",
       "      <td>26.863133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P80188</th>\n",
       "      <td>22.821315</td>\n",
       "      <td>20.147061</td>\n",
       "      <td>21.002496</td>\n",
       "      <td>19.830364</td>\n",
       "      <td>18.213300</td>\n",
       "      <td>22.831885</td>\n",
       "      <td>21.686202</td>\n",
       "      <td>23.468367</td>\n",
       "      <td>20.292526</td>\n",
       "      <td>24.971321</td>\n",
       "      <td>...</td>\n",
       "      <td>20.178151</td>\n",
       "      <td>21.893118</td>\n",
       "      <td>24.072429</td>\n",
       "      <td>18.115715</td>\n",
       "      <td>20.282632</td>\n",
       "      <td>21.229028</td>\n",
       "      <td>23.293898</td>\n",
       "      <td>20.400931</td>\n",
       "      <td>18.845678</td>\n",
       "      <td>26.863133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P80188-2</th>\n",
       "      <td>22.821315</td>\n",
       "      <td>20.147061</td>\n",
       "      <td>21.002496</td>\n",
       "      <td>19.830364</td>\n",
       "      <td>18.213300</td>\n",
       "      <td>22.831885</td>\n",
       "      <td>21.686202</td>\n",
       "      <td>23.468367</td>\n",
       "      <td>20.292526</td>\n",
       "      <td>24.971321</td>\n",
       "      <td>...</td>\n",
       "      <td>20.178151</td>\n",
       "      <td>21.893118</td>\n",
       "      <td>24.072429</td>\n",
       "      <td>18.115715</td>\n",
       "      <td>20.282632</td>\n",
       "      <td>21.229028</td>\n",
       "      <td>23.293898</td>\n",
       "      <td>20.400931</td>\n",
       "      <td>18.845678</td>\n",
       "      <td>26.863133</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1499 rows × 129 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             sample_1   sample_2   sample_3   sample_4   sample_5   sample_6  \\\n",
       "Identifier                                                                     \n",
       "A0A024R6I7  37.995543  37.353091  37.527875  37.673128  37.983542  37.489959   \n",
       "A0A0G2JRN3  37.995543  37.353091  37.527875  37.673128  37.983542  37.489959   \n",
       "A0A075B6H9  27.176361  29.419228  27.174171  28.879702  27.262485  29.535232   \n",
       "A0A075B6I0  28.294477  29.048510  28.953215  29.527460  28.605867  29.251895   \n",
       "A0A075B6I4  24.783368  27.573331  25.619161  23.893562  18.440251  25.033513   \n",
       "...               ...        ...        ...        ...        ...        ...   \n",
       "V9GYE3      30.157740  29.011826  30.755732  31.037739  30.690791  29.544621   \n",
       "V9GYG9      30.157740  29.011826  30.755732  31.037739  30.690791  29.544621   \n",
       "X6R8F3      22.821315  20.147061  21.002496  19.830364  18.213300  22.831885   \n",
       "P80188      22.821315  20.147061  21.002496  19.830364  18.213300  22.831885   \n",
       "P80188-2    22.821315  20.147061  21.002496  19.830364  18.213300  22.831885   \n",
       "\n",
       "             sample_7   sample_8   sample_9  sample_10  ...  sample_120  \\\n",
       "Identifier                                              ...               \n",
       "A0A024R6I7  37.615303  37.534702  37.525762  37.844902  ...   37.730180   \n",
       "A0A0G2JRN3  37.615303  37.534702  37.525762  37.844902  ...   37.730180   \n",
       "A0A075B6H9  27.657446  30.323779  28.194885  29.253091  ...   27.731627   \n",
       "A0A075B6I0  26.943355  29.914134  28.947290  29.108465  ...   28.977874   \n",
       "A0A075B6I4  19.384221  22.054705  27.642444  27.163654  ...   27.365780   \n",
       "...               ...        ...        ...        ...  ...         ...   \n",
       "V9GYE3      29.991859  29.005067  28.611893  29.039570  ...   30.307260   \n",
       "V9GYG9      29.991859  29.005067  28.611893  29.039570  ...   30.307260   \n",
       "X6R8F3      21.686202  23.468367  20.292526  24.971321  ...   20.178151   \n",
       "P80188      21.686202  23.468367  20.292526  24.971321  ...   20.178151   \n",
       "P80188-2    21.686202  23.468367  20.292526  24.971321  ...   20.178151   \n",
       "\n",
       "            sample_121  sample_122  sample_123  sample_124  sample_125  \\\n",
       "Identifier                                                               \n",
       "A0A024R6I7   38.082377   37.661959   37.195828   37.365447   37.564109   \n",
       "A0A0G2JRN3   38.082377   37.661959   37.195828   37.365447   37.564109   \n",
       "A0A075B6H9   27.621776   27.249976   27.403813   27.133006   27.793462   \n",
       "A0A075B6I0   27.961132   29.085045   28.319069   29.418182   29.003245   \n",
       "A0A075B6I4   26.182814   27.649039   25.888229   26.724164   26.577443   \n",
       "...                ...         ...         ...         ...         ...   \n",
       "V9GYE3       30.378135   29.925498   30.471018   31.777999   31.486205   \n",
       "V9GYG9       30.378135   29.925498   30.471018   31.777999   31.486205   \n",
       "X6R8F3       21.893118   24.072429   18.115715   20.282632   21.229028   \n",
       "P80188       21.893118   24.072429   18.115715   20.282632   21.229028   \n",
       "P80188-2     21.893118   24.072429   18.115715   20.282632   21.229028   \n",
       "\n",
       "            sample_126  sample_127  sample_128  sample_129  \n",
       "Identifier                                                  \n",
       "A0A024R6I7   37.683884   37.282698   37.655909   37.754401  \n",
       "A0A0G2JRN3   37.683884   37.282698   37.655909   37.754401  \n",
       "A0A075B6H9   26.876193   27.715091   26.965212   28.376997  \n",
       "A0A075B6I0   28.002763   28.660543   28.665092   29.839541  \n",
       "A0A075B6I4   27.996742   21.713502   19.659693   20.145198  \n",
       "...                ...         ...         ...         ...  \n",
       "V9GYE3       31.822441   31.144855   32.149526   28.001153  \n",
       "V9GYG9       31.822441   31.144855   32.149526   28.001153  \n",
       "X6R8F3       23.293898   20.400931   18.845678   26.863133  \n",
       "P80188       23.293898   20.400931   18.845678   26.863133  \n",
       "P80188-2     23.293898   20.400931   18.845678   26.863133  \n",
       "\n",
       "[1499 rows x 129 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "protein_data.data_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>group</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>sample_1</th>\n",
       "      <td>covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_2</th>\n",
       "      <td>covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_3</th>\n",
       "      <td>covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_4</th>\n",
       "      <td>covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_5</th>\n",
       "      <td>covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_125</th>\n",
       "      <td>non_covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_126</th>\n",
       "      <td>non_covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_127</th>\n",
       "      <td>non_covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_128</th>\n",
       "      <td>non_covid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample_129</th>\n",
       "      <td>non_covid</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>129 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                group\n",
       "sample               \n",
       "sample_1        covid\n",
       "sample_2        covid\n",
       "sample_3        covid\n",
       "sample_4        covid\n",
       "sample_5        covid\n",
       "...               ...\n",
       "sample_125  non_covid\n",
       "sample_126  non_covid\n",
       "sample_127  non_covid\n",
       "sample_128  non_covid\n",
       "sample_129  non_covid\n",
       "\n",
       "[129 rows x 1 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "protein_data.design_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create a multi-omics data container objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "publication = 'Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.'\n",
    "url = 'https://www.sciencedirect.com/science/article/pii/S2405471220303719'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Multi-omics data container\n",
       "- publication: Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.\n",
       "- URL: https://www.sciencedirect.com/science/article/pii/S2405471220303719\n",
       "- Views: 3 modalities\n",
       "\t - genes data with (13028, 125) measurements\n",
       "\t - proteins data with (1499, 129) measurements\n",
       "\t - compounds data with (46, 129) measurements"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mo = MultiOmicsData(publication=publication, url=url)\n",
    "mo.add_data([transcript_data, protein_data, compound_data])\n",
    "mo"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}