--- a +++ b/notebooks/base_test.ipynb @@ -0,0 +1,874 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pylab as plt\n", + "import matplotlib\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/Users/joewandy/Work/git/pyMultiOmics/notebooks', '/opt/anaconda3/envs/pyMultiOmics/lib/python310.zip', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10/lib-dynload', '', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10/site-packages']\n" + ] + } + ], + "source": [ + "os.getcwd()\n", + "print(sys.path)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:39:13.236 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n", + "2022-03-24 14:39:13.236 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n", + "2022-03-24 14:39:13.236 | INFO | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n" + ] + } + ], + "source": [ + "sys.path.append('..')\n", + "\n", + "from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n", + "from pyMultiOmics.mapping import Mapper\n", + "from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file\n", + "from pyMultiOmics.constants import IDENTIFIER_COL, SAMPLE_COL\n", + "from pyMultiOmics.constants import GENES, PROTEINS, COMPOUNDS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Demonstration of pyMultiOmics base classes" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Load the multi-omics COVID-19 data from [1]\n", + "\n", + "[1] [Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.](https://www.sciencedirect.com/science/article/pii/S2405471220303719)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:39:14.454 | INFO | pyMultiOmics.common:download_file:59 - Downloading covid19_multiomics_data.zip\n", + "2.36kKB [00:00, 15.6kKB/s] \n", + "2022-03-24 14:39:14.634 | INFO | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_multiomics_data.zip\n", + "100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 256.77it/s]\n", + "2022-03-24 14:39:14.692 | INFO | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_multiomics_data.zip\n" + ] + } + ], + "source": [ + "url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_multiomics_data.zip'\n", + "out_file = download_file(url)\n", + "extract_zip_file(out_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_multiomics_data'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DATA_FOLDER = os.path.abspath(os.path.join('covid19_multiomics_data'))\n", + "DATA_FOLDER" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the individual dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "transcript_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_genes_include_p.csv'), index_col='Identifier')\n", + "transcript_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_genes_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_protein_include_p.csv'), index_col='Identifier')\n", + "protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_protein_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_compounds_include_p.csv'), index_col='Identifier')\n", + "compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_compounds_design.csv'), index_col='sample')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create single omics data container objects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some data cleaning is done upon loading in `SingleOmicsData`:\n", + "- Duplicate values are removed from the rows and columns\n", + "- Duplicate sample names are removed\n", + "- Measurements with missing metadata are removed\n", + "- Metadata with missing measurements are removed too" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-03-24 14:39:14.981 | WARNING | pyMultiOmics.base:_keep_common_samples:126 - Dropped 4 columns from sample metadata due to missing measurements\n" + ] + }, + { + "data": { + "text/plain": [ + "genes data with (13028, 125) measurements" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transcript_data = SingleOmicsData(GENES, transcript_df, transcript_design)\n", + "transcript_data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "proteins data with (1499, 129) measurements" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n", + "protein_data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "compounds data with (46, 129) measurements" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n", + "compound_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can get data out of the container by using the `data_df` and `design_df` attributes.\n", + "\n", + "Notice that after the data is loaded and cleaned, the number of samples in the measurement dataframe (`data_df`) is the same as the number of rows in the sample metadata dataframe (`design_df`)." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>sample_1</th>\n", + " <th>sample_2</th>\n", + " <th>sample_3</th>\n", + " <th>sample_4</th>\n", + " <th>sample_5</th>\n", + " <th>sample_6</th>\n", + " <th>sample_7</th>\n", + " <th>sample_8</th>\n", + " <th>sample_9</th>\n", + " <th>sample_10</th>\n", + " <th>...</th>\n", + " <th>sample_120</th>\n", + " <th>sample_121</th>\n", + " <th>sample_122</th>\n", + " <th>sample_123</th>\n", + " <th>sample_124</th>\n", + " <th>sample_125</th>\n", + " <th>sample_126</th>\n", + " <th>sample_127</th>\n", + " <th>sample_128</th>\n", + " <th>sample_129</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Identifier</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>A0A024R6I7</th>\n", + " <td>37.995543</td>\n", + " <td>37.353091</td>\n", + " <td>37.527875</td>\n", + " <td>37.673128</td>\n", + " <td>37.983542</td>\n", + " <td>37.489959</td>\n", + " <td>37.615303</td>\n", + " <td>37.534702</td>\n", + " <td>37.525762</td>\n", + " <td>37.844902</td>\n", + " <td>...</td>\n", + " <td>37.730180</td>\n", + " <td>38.082377</td>\n", + " <td>37.661959</td>\n", + " <td>37.195828</td>\n", + " <td>37.365447</td>\n", + " <td>37.564109</td>\n", + " <td>37.683884</td>\n", + " <td>37.282698</td>\n", + " <td>37.655909</td>\n", + " <td>37.754401</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0A0G2JRN3</th>\n", + " <td>37.995543</td>\n", + " <td>37.353091</td>\n", + " <td>37.527875</td>\n", + " <td>37.673128</td>\n", + " <td>37.983542</td>\n", + " <td>37.489959</td>\n", + " <td>37.615303</td>\n", + " <td>37.534702</td>\n", + " <td>37.525762</td>\n", + " <td>37.844902</td>\n", + " <td>...</td>\n", + " <td>37.730180</td>\n", + " <td>38.082377</td>\n", + " <td>37.661959</td>\n", + " <td>37.195828</td>\n", + " <td>37.365447</td>\n", + " <td>37.564109</td>\n", + " <td>37.683884</td>\n", + " <td>37.282698</td>\n", + " <td>37.655909</td>\n", + " <td>37.754401</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0A075B6H9</th>\n", + " <td>27.176361</td>\n", + " <td>29.419228</td>\n", + " <td>27.174171</td>\n", + " <td>28.879702</td>\n", + " <td>27.262485</td>\n", + " <td>29.535232</td>\n", + " <td>27.657446</td>\n", + " <td>30.323779</td>\n", + " <td>28.194885</td>\n", + " <td>29.253091</td>\n", + " <td>...</td>\n", + " <td>27.731627</td>\n", + " <td>27.621776</td>\n", + " <td>27.249976</td>\n", + " <td>27.403813</td>\n", + " <td>27.133006</td>\n", + " <td>27.793462</td>\n", + " <td>26.876193</td>\n", + " <td>27.715091</td>\n", + " <td>26.965212</td>\n", + " <td>28.376997</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0A075B6I0</th>\n", + " <td>28.294477</td>\n", + " <td>29.048510</td>\n", + " <td>28.953215</td>\n", + " <td>29.527460</td>\n", + " <td>28.605867</td>\n", + " <td>29.251895</td>\n", + " <td>26.943355</td>\n", + " <td>29.914134</td>\n", + " <td>28.947290</td>\n", + " <td>29.108465</td>\n", + " <td>...</td>\n", + " <td>28.977874</td>\n", + " <td>27.961132</td>\n", + " <td>29.085045</td>\n", + " <td>28.319069</td>\n", + " <td>29.418182</td>\n", + " <td>29.003245</td>\n", + " <td>28.002763</td>\n", + " <td>28.660543</td>\n", + " <td>28.665092</td>\n", + " <td>29.839541</td>\n", + " </tr>\n", + " <tr>\n", + " <th>A0A075B6I4</th>\n", + " <td>24.783368</td>\n", + " <td>27.573331</td>\n", + " <td>25.619161</td>\n", + " <td>23.893562</td>\n", + " <td>18.440251</td>\n", + " <td>25.033513</td>\n", + " <td>19.384221</td>\n", + " <td>22.054705</td>\n", + " <td>27.642444</td>\n", + " <td>27.163654</td>\n", + " <td>...</td>\n", + " <td>27.365780</td>\n", + " <td>26.182814</td>\n", + " <td>27.649039</td>\n", + " <td>25.888229</td>\n", + " <td>26.724164</td>\n", + " <td>26.577443</td>\n", + " <td>27.996742</td>\n", + " <td>21.713502</td>\n", + " <td>19.659693</td>\n", + " <td>20.145198</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>V9GYE3</th>\n", + " <td>30.157740</td>\n", + " <td>29.011826</td>\n", + " <td>30.755732</td>\n", + " <td>31.037739</td>\n", + " <td>30.690791</td>\n", + " <td>29.544621</td>\n", + " <td>29.991859</td>\n", + " <td>29.005067</td>\n", + " <td>28.611893</td>\n", + " <td>29.039570</td>\n", + " <td>...</td>\n", + " <td>30.307260</td>\n", + " <td>30.378135</td>\n", + " <td>29.925498</td>\n", + " <td>30.471018</td>\n", + " <td>31.777999</td>\n", + " <td>31.486205</td>\n", + " <td>31.822441</td>\n", + " <td>31.144855</td>\n", + " <td>32.149526</td>\n", + " <td>28.001153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>V9GYG9</th>\n", + " <td>30.157740</td>\n", + " <td>29.011826</td>\n", + " <td>30.755732</td>\n", + " <td>31.037739</td>\n", + " <td>30.690791</td>\n", + " <td>29.544621</td>\n", + " <td>29.991859</td>\n", + " <td>29.005067</td>\n", + " <td>28.611893</td>\n", + " <td>29.039570</td>\n", + " <td>...</td>\n", + " <td>30.307260</td>\n", + " <td>30.378135</td>\n", + " <td>29.925498</td>\n", + " <td>30.471018</td>\n", + " <td>31.777999</td>\n", + " <td>31.486205</td>\n", + " <td>31.822441</td>\n", + " <td>31.144855</td>\n", + " <td>32.149526</td>\n", + " <td>28.001153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>X6R8F3</th>\n", + " <td>22.821315</td>\n", + " <td>20.147061</td>\n", + " <td>21.002496</td>\n", + " <td>19.830364</td>\n", + " <td>18.213300</td>\n", + " <td>22.831885</td>\n", + " <td>21.686202</td>\n", + " <td>23.468367</td>\n", + " <td>20.292526</td>\n", + " <td>24.971321</td>\n", + " <td>...</td>\n", + " <td>20.178151</td>\n", + " <td>21.893118</td>\n", + " <td>24.072429</td>\n", + " <td>18.115715</td>\n", + " <td>20.282632</td>\n", + " <td>21.229028</td>\n", + " <td>23.293898</td>\n", + " <td>20.400931</td>\n", + " <td>18.845678</td>\n", + " <td>26.863133</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P80188</th>\n", + " <td>22.821315</td>\n", + " <td>20.147061</td>\n", + " <td>21.002496</td>\n", + " <td>19.830364</td>\n", + " <td>18.213300</td>\n", + " <td>22.831885</td>\n", + " <td>21.686202</td>\n", + " <td>23.468367</td>\n", + " <td>20.292526</td>\n", + " <td>24.971321</td>\n", + " <td>...</td>\n", + " <td>20.178151</td>\n", + " <td>21.893118</td>\n", + " <td>24.072429</td>\n", + " <td>18.115715</td>\n", + " <td>20.282632</td>\n", + " <td>21.229028</td>\n", + " <td>23.293898</td>\n", + " <td>20.400931</td>\n", + " <td>18.845678</td>\n", + " <td>26.863133</td>\n", + " </tr>\n", + " <tr>\n", + " <th>P80188-2</th>\n", + " <td>22.821315</td>\n", + " <td>20.147061</td>\n", + " <td>21.002496</td>\n", + " <td>19.830364</td>\n", + " <td>18.213300</td>\n", + " <td>22.831885</td>\n", + " <td>21.686202</td>\n", + " <td>23.468367</td>\n", + " <td>20.292526</td>\n", + " <td>24.971321</td>\n", + " <td>...</td>\n", + " <td>20.178151</td>\n", + " <td>21.893118</td>\n", + " <td>24.072429</td>\n", + " <td>18.115715</td>\n", + " <td>20.282632</td>\n", + " <td>21.229028</td>\n", + " <td>23.293898</td>\n", + " <td>20.400931</td>\n", + " <td>18.845678</td>\n", + " <td>26.863133</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>1499 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 \\\n", + "Identifier \n", + "A0A024R6I7 37.995543 37.353091 37.527875 37.673128 37.983542 37.489959 \n", + "A0A0G2JRN3 37.995543 37.353091 37.527875 37.673128 37.983542 37.489959 \n", + "A0A075B6H9 27.176361 29.419228 27.174171 28.879702 27.262485 29.535232 \n", + "A0A075B6I0 28.294477 29.048510 28.953215 29.527460 28.605867 29.251895 \n", + "A0A075B6I4 24.783368 27.573331 25.619161 23.893562 18.440251 25.033513 \n", + "... ... ... ... ... ... ... \n", + "V9GYE3 30.157740 29.011826 30.755732 31.037739 30.690791 29.544621 \n", + "V9GYG9 30.157740 29.011826 30.755732 31.037739 30.690791 29.544621 \n", + "X6R8F3 22.821315 20.147061 21.002496 19.830364 18.213300 22.831885 \n", + "P80188 22.821315 20.147061 21.002496 19.830364 18.213300 22.831885 \n", + "P80188-2 22.821315 20.147061 21.002496 19.830364 18.213300 22.831885 \n", + "\n", + " sample_7 sample_8 sample_9 sample_10 ... sample_120 \\\n", + "Identifier ... \n", + "A0A024R6I7 37.615303 37.534702 37.525762 37.844902 ... 37.730180 \n", + "A0A0G2JRN3 37.615303 37.534702 37.525762 37.844902 ... 37.730180 \n", + "A0A075B6H9 27.657446 30.323779 28.194885 29.253091 ... 27.731627 \n", + "A0A075B6I0 26.943355 29.914134 28.947290 29.108465 ... 28.977874 \n", + "A0A075B6I4 19.384221 22.054705 27.642444 27.163654 ... 27.365780 \n", + "... ... ... ... ... ... ... \n", + "V9GYE3 29.991859 29.005067 28.611893 29.039570 ... 30.307260 \n", + "V9GYG9 29.991859 29.005067 28.611893 29.039570 ... 30.307260 \n", + "X6R8F3 21.686202 23.468367 20.292526 24.971321 ... 20.178151 \n", + "P80188 21.686202 23.468367 20.292526 24.971321 ... 20.178151 \n", + "P80188-2 21.686202 23.468367 20.292526 24.971321 ... 20.178151 \n", + "\n", + " sample_121 sample_122 sample_123 sample_124 sample_125 \\\n", + "Identifier \n", + "A0A024R6I7 38.082377 37.661959 37.195828 37.365447 37.564109 \n", + "A0A0G2JRN3 38.082377 37.661959 37.195828 37.365447 37.564109 \n", + "A0A075B6H9 27.621776 27.249976 27.403813 27.133006 27.793462 \n", + "A0A075B6I0 27.961132 29.085045 28.319069 29.418182 29.003245 \n", + "A0A075B6I4 26.182814 27.649039 25.888229 26.724164 26.577443 \n", + "... ... ... ... ... ... \n", + "V9GYE3 30.378135 29.925498 30.471018 31.777999 31.486205 \n", + "V9GYG9 30.378135 29.925498 30.471018 31.777999 31.486205 \n", + "X6R8F3 21.893118 24.072429 18.115715 20.282632 21.229028 \n", + "P80188 21.893118 24.072429 18.115715 20.282632 21.229028 \n", + "P80188-2 21.893118 24.072429 18.115715 20.282632 21.229028 \n", + "\n", + " sample_126 sample_127 sample_128 sample_129 \n", + "Identifier \n", + "A0A024R6I7 37.683884 37.282698 37.655909 37.754401 \n", + "A0A0G2JRN3 37.683884 37.282698 37.655909 37.754401 \n", + "A0A075B6H9 26.876193 27.715091 26.965212 28.376997 \n", + "A0A075B6I0 28.002763 28.660543 28.665092 29.839541 \n", + "A0A075B6I4 27.996742 21.713502 19.659693 20.145198 \n", + "... ... ... ... ... \n", + "V9GYE3 31.822441 31.144855 32.149526 28.001153 \n", + "V9GYG9 31.822441 31.144855 32.149526 28.001153 \n", + "X6R8F3 23.293898 20.400931 18.845678 26.863133 \n", + "P80188 23.293898 20.400931 18.845678 26.863133 \n", + "P80188-2 23.293898 20.400931 18.845678 26.863133 \n", + "\n", + "[1499 rows x 129 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_data.data_df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>group</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sample</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>sample_1</th>\n", + " <td>covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_2</th>\n", + " <td>covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_3</th>\n", + " <td>covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_4</th>\n", + " <td>covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_5</th>\n", + " <td>covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_125</th>\n", + " <td>non_covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_126</th>\n", + " <td>non_covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_127</th>\n", + " <td>non_covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_128</th>\n", + " <td>non_covid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sample_129</th>\n", + " <td>non_covid</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>129 rows × 1 columns</p>\n", + "</div>" + ], + "text/plain": [ + " group\n", + "sample \n", + "sample_1 covid\n", + "sample_2 covid\n", + "sample_3 covid\n", + "sample_4 covid\n", + "sample_5 covid\n", + "... ...\n", + "sample_125 non_covid\n", + "sample_126 non_covid\n", + "sample_127 non_covid\n", + "sample_128 non_covid\n", + "sample_129 non_covid\n", + "\n", + "[129 rows x 1 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protein_data.design_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a multi-omics data container objects" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "publication = 'Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.'\n", + "url = 'https://www.sciencedirect.com/science/article/pii/S2405471220303719'" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Multi-omics data container\n", + "- publication: Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.\n", + "- URL: https://www.sciencedirect.com/science/article/pii/S2405471220303719\n", + "- Views: 3 modalities\n", + "\t - genes data with (13028, 125) measurements\n", + "\t - proteins data with (1499, 129) measurements\n", + "\t - compounds data with (46, 129) measurements" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mo = MultiOmicsData(publication=publication, url=url)\n", + "mo.add_data([transcript_data, protein_data, compound_data])\n", + "mo" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}