875 lines (874 with data), 29.0 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os, sys"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pylab as plt\n",
"import matplotlib\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['/Users/joewandy/Work/git/pyMultiOmics/notebooks', '/opt/anaconda3/envs/pyMultiOmics/lib/python310.zip', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10/lib-dynload', '', '/opt/anaconda3/envs/pyMultiOmics/lib/python3.10/site-packages']\n"
]
}
],
"source": [
"os.getcwd()\n",
"print(sys.path)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:39:13.236 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:14 - Using a default neo4j server: bolt://localhost:7687\n",
"2022-03-24 14:39:13.236 | WARNING | pyMultiOmics.reactome:get_neo4j_driver:19 - Using a default neo4j username or password: neo4j\n",
"2022-03-24 14:39:13.236 | INFO | pyMultiOmics.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)\n"
]
}
],
"source": [
"sys.path.append('..')\n",
"\n",
"from pyMultiOmics.base import SingleOmicsData, MultiOmicsData\n",
"from pyMultiOmics.mapping import Mapper\n",
"from pyMultiOmics.common import set_log_level_info, set_log_level_debug, download_file, extract_zip_file\n",
"from pyMultiOmics.constants import IDENTIFIER_COL, SAMPLE_COL\n",
"from pyMultiOmics.constants import GENES, PROTEINS, COMPOUNDS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demonstration of pyMultiOmics base classes"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"### Load the multi-omics COVID-19 data from [1]\n",
"\n",
"[1] [Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.](https://www.sciencedirect.com/science/article/pii/S2405471220303719)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:39:14.454 | INFO | pyMultiOmics.common:download_file:59 - Downloading covid19_multiomics_data.zip\n",
"2.36kKB [00:00, 15.6kKB/s] \n",
"2022-03-24 14:39:14.634 | INFO | pyMultiOmics.common:extract_zip_file:71 - Extracting covid19_multiomics_data.zip\n",
"100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 256.77it/s]\n",
"2022-03-24 14:39:14.692 | INFO | pyMultiOmics.common:extract_zip_file:77 - Deleting covid19_multiomics_data.zip\n"
]
}
],
"source": [
"url = 'https://github.com/glasgowcompbio/pyMultiOmics-data/raw/main/covid19_multiomics_data.zip'\n",
"out_file = download_file(url)\n",
"extract_zip_file(out_file)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/joewandy/Work/git/pyMultiOmics/notebooks/covid19_multiomics_data'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DATA_FOLDER = os.path.abspath(os.path.join('covid19_multiomics_data'))\n",
"DATA_FOLDER"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read the individual dataframes"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"transcript_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_genes_include_p.csv'), index_col='Identifier')\n",
"transcript_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_genes_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"protein_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_protein_include_p.csv'), index_col='Identifier')\n",
"protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_protein_design.csv'), index_col='sample')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"compound_df = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_compounds_include_p.csv'), index_col='Identifier')\n",
"compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'covid_compounds_design.csv'), index_col='sample')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create single omics data container objects"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Some data cleaning is done upon loading in `SingleOmicsData`:\n",
"- Duplicate values are removed from the rows and columns\n",
"- Duplicate sample names are removed\n",
"- Measurements with missing metadata are removed\n",
"- Metadata with missing measurements are removed too"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-03-24 14:39:14.981 | WARNING | pyMultiOmics.base:_keep_common_samples:126 - Dropped 4 columns from sample metadata due to missing measurements\n"
]
},
{
"data": {
"text/plain": [
"genes data with (13028, 125) measurements"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transcript_data = SingleOmicsData(GENES, transcript_df, transcript_design)\n",
"transcript_data"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"proteins data with (1499, 129) measurements"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_data = SingleOmicsData(PROTEINS, protein_df, protein_design)\n",
"protein_data"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"compounds data with (46, 129) measurements"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compound_data = SingleOmicsData(COMPOUNDS, compound_df, compound_design)\n",
"compound_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Getting values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can get data out of the container by using the `data_df` and `design_df` attributes.\n",
"\n",
"Notice that after the data is loaded and cleaned, the number of samples in the measurement dataframe (`data_df`) is the same as the number of rows in the sample metadata dataframe (`design_df`)."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sample_1</th>\n",
" <th>sample_2</th>\n",
" <th>sample_3</th>\n",
" <th>sample_4</th>\n",
" <th>sample_5</th>\n",
" <th>sample_6</th>\n",
" <th>sample_7</th>\n",
" <th>sample_8</th>\n",
" <th>sample_9</th>\n",
" <th>sample_10</th>\n",
" <th>...</th>\n",
" <th>sample_120</th>\n",
" <th>sample_121</th>\n",
" <th>sample_122</th>\n",
" <th>sample_123</th>\n",
" <th>sample_124</th>\n",
" <th>sample_125</th>\n",
" <th>sample_126</th>\n",
" <th>sample_127</th>\n",
" <th>sample_128</th>\n",
" <th>sample_129</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Identifier</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A0A024R6I7</th>\n",
" <td>37.995543</td>\n",
" <td>37.353091</td>\n",
" <td>37.527875</td>\n",
" <td>37.673128</td>\n",
" <td>37.983542</td>\n",
" <td>37.489959</td>\n",
" <td>37.615303</td>\n",
" <td>37.534702</td>\n",
" <td>37.525762</td>\n",
" <td>37.844902</td>\n",
" <td>...</td>\n",
" <td>37.730180</td>\n",
" <td>38.082377</td>\n",
" <td>37.661959</td>\n",
" <td>37.195828</td>\n",
" <td>37.365447</td>\n",
" <td>37.564109</td>\n",
" <td>37.683884</td>\n",
" <td>37.282698</td>\n",
" <td>37.655909</td>\n",
" <td>37.754401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0A0G2JRN3</th>\n",
" <td>37.995543</td>\n",
" <td>37.353091</td>\n",
" <td>37.527875</td>\n",
" <td>37.673128</td>\n",
" <td>37.983542</td>\n",
" <td>37.489959</td>\n",
" <td>37.615303</td>\n",
" <td>37.534702</td>\n",
" <td>37.525762</td>\n",
" <td>37.844902</td>\n",
" <td>...</td>\n",
" <td>37.730180</td>\n",
" <td>38.082377</td>\n",
" <td>37.661959</td>\n",
" <td>37.195828</td>\n",
" <td>37.365447</td>\n",
" <td>37.564109</td>\n",
" <td>37.683884</td>\n",
" <td>37.282698</td>\n",
" <td>37.655909</td>\n",
" <td>37.754401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0A075B6H9</th>\n",
" <td>27.176361</td>\n",
" <td>29.419228</td>\n",
" <td>27.174171</td>\n",
" <td>28.879702</td>\n",
" <td>27.262485</td>\n",
" <td>29.535232</td>\n",
" <td>27.657446</td>\n",
" <td>30.323779</td>\n",
" <td>28.194885</td>\n",
" <td>29.253091</td>\n",
" <td>...</td>\n",
" <td>27.731627</td>\n",
" <td>27.621776</td>\n",
" <td>27.249976</td>\n",
" <td>27.403813</td>\n",
" <td>27.133006</td>\n",
" <td>27.793462</td>\n",
" <td>26.876193</td>\n",
" <td>27.715091</td>\n",
" <td>26.965212</td>\n",
" <td>28.376997</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0A075B6I0</th>\n",
" <td>28.294477</td>\n",
" <td>29.048510</td>\n",
" <td>28.953215</td>\n",
" <td>29.527460</td>\n",
" <td>28.605867</td>\n",
" <td>29.251895</td>\n",
" <td>26.943355</td>\n",
" <td>29.914134</td>\n",
" <td>28.947290</td>\n",
" <td>29.108465</td>\n",
" <td>...</td>\n",
" <td>28.977874</td>\n",
" <td>27.961132</td>\n",
" <td>29.085045</td>\n",
" <td>28.319069</td>\n",
" <td>29.418182</td>\n",
" <td>29.003245</td>\n",
" <td>28.002763</td>\n",
" <td>28.660543</td>\n",
" <td>28.665092</td>\n",
" <td>29.839541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0A075B6I4</th>\n",
" <td>24.783368</td>\n",
" <td>27.573331</td>\n",
" <td>25.619161</td>\n",
" <td>23.893562</td>\n",
" <td>18.440251</td>\n",
" <td>25.033513</td>\n",
" <td>19.384221</td>\n",
" <td>22.054705</td>\n",
" <td>27.642444</td>\n",
" <td>27.163654</td>\n",
" <td>...</td>\n",
" <td>27.365780</td>\n",
" <td>26.182814</td>\n",
" <td>27.649039</td>\n",
" <td>25.888229</td>\n",
" <td>26.724164</td>\n",
" <td>26.577443</td>\n",
" <td>27.996742</td>\n",
" <td>21.713502</td>\n",
" <td>19.659693</td>\n",
" <td>20.145198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>V9GYE3</th>\n",
" <td>30.157740</td>\n",
" <td>29.011826</td>\n",
" <td>30.755732</td>\n",
" <td>31.037739</td>\n",
" <td>30.690791</td>\n",
" <td>29.544621</td>\n",
" <td>29.991859</td>\n",
" <td>29.005067</td>\n",
" <td>28.611893</td>\n",
" <td>29.039570</td>\n",
" <td>...</td>\n",
" <td>30.307260</td>\n",
" <td>30.378135</td>\n",
" <td>29.925498</td>\n",
" <td>30.471018</td>\n",
" <td>31.777999</td>\n",
" <td>31.486205</td>\n",
" <td>31.822441</td>\n",
" <td>31.144855</td>\n",
" <td>32.149526</td>\n",
" <td>28.001153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>V9GYG9</th>\n",
" <td>30.157740</td>\n",
" <td>29.011826</td>\n",
" <td>30.755732</td>\n",
" <td>31.037739</td>\n",
" <td>30.690791</td>\n",
" <td>29.544621</td>\n",
" <td>29.991859</td>\n",
" <td>29.005067</td>\n",
" <td>28.611893</td>\n",
" <td>29.039570</td>\n",
" <td>...</td>\n",
" <td>30.307260</td>\n",
" <td>30.378135</td>\n",
" <td>29.925498</td>\n",
" <td>30.471018</td>\n",
" <td>31.777999</td>\n",
" <td>31.486205</td>\n",
" <td>31.822441</td>\n",
" <td>31.144855</td>\n",
" <td>32.149526</td>\n",
" <td>28.001153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>X6R8F3</th>\n",
" <td>22.821315</td>\n",
" <td>20.147061</td>\n",
" <td>21.002496</td>\n",
" <td>19.830364</td>\n",
" <td>18.213300</td>\n",
" <td>22.831885</td>\n",
" <td>21.686202</td>\n",
" <td>23.468367</td>\n",
" <td>20.292526</td>\n",
" <td>24.971321</td>\n",
" <td>...</td>\n",
" <td>20.178151</td>\n",
" <td>21.893118</td>\n",
" <td>24.072429</td>\n",
" <td>18.115715</td>\n",
" <td>20.282632</td>\n",
" <td>21.229028</td>\n",
" <td>23.293898</td>\n",
" <td>20.400931</td>\n",
" <td>18.845678</td>\n",
" <td>26.863133</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P80188</th>\n",
" <td>22.821315</td>\n",
" <td>20.147061</td>\n",
" <td>21.002496</td>\n",
" <td>19.830364</td>\n",
" <td>18.213300</td>\n",
" <td>22.831885</td>\n",
" <td>21.686202</td>\n",
" <td>23.468367</td>\n",
" <td>20.292526</td>\n",
" <td>24.971321</td>\n",
" <td>...</td>\n",
" <td>20.178151</td>\n",
" <td>21.893118</td>\n",
" <td>24.072429</td>\n",
" <td>18.115715</td>\n",
" <td>20.282632</td>\n",
" <td>21.229028</td>\n",
" <td>23.293898</td>\n",
" <td>20.400931</td>\n",
" <td>18.845678</td>\n",
" <td>26.863133</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P80188-2</th>\n",
" <td>22.821315</td>\n",
" <td>20.147061</td>\n",
" <td>21.002496</td>\n",
" <td>19.830364</td>\n",
" <td>18.213300</td>\n",
" <td>22.831885</td>\n",
" <td>21.686202</td>\n",
" <td>23.468367</td>\n",
" <td>20.292526</td>\n",
" <td>24.971321</td>\n",
" <td>...</td>\n",
" <td>20.178151</td>\n",
" <td>21.893118</td>\n",
" <td>24.072429</td>\n",
" <td>18.115715</td>\n",
" <td>20.282632</td>\n",
" <td>21.229028</td>\n",
" <td>23.293898</td>\n",
" <td>20.400931</td>\n",
" <td>18.845678</td>\n",
" <td>26.863133</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1499 rows × 129 columns</p>\n",
"</div>"
],
"text/plain": [
" sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 \\\n",
"Identifier \n",
"A0A024R6I7 37.995543 37.353091 37.527875 37.673128 37.983542 37.489959 \n",
"A0A0G2JRN3 37.995543 37.353091 37.527875 37.673128 37.983542 37.489959 \n",
"A0A075B6H9 27.176361 29.419228 27.174171 28.879702 27.262485 29.535232 \n",
"A0A075B6I0 28.294477 29.048510 28.953215 29.527460 28.605867 29.251895 \n",
"A0A075B6I4 24.783368 27.573331 25.619161 23.893562 18.440251 25.033513 \n",
"... ... ... ... ... ... ... \n",
"V9GYE3 30.157740 29.011826 30.755732 31.037739 30.690791 29.544621 \n",
"V9GYG9 30.157740 29.011826 30.755732 31.037739 30.690791 29.544621 \n",
"X6R8F3 22.821315 20.147061 21.002496 19.830364 18.213300 22.831885 \n",
"P80188 22.821315 20.147061 21.002496 19.830364 18.213300 22.831885 \n",
"P80188-2 22.821315 20.147061 21.002496 19.830364 18.213300 22.831885 \n",
"\n",
" sample_7 sample_8 sample_9 sample_10 ... sample_120 \\\n",
"Identifier ... \n",
"A0A024R6I7 37.615303 37.534702 37.525762 37.844902 ... 37.730180 \n",
"A0A0G2JRN3 37.615303 37.534702 37.525762 37.844902 ... 37.730180 \n",
"A0A075B6H9 27.657446 30.323779 28.194885 29.253091 ... 27.731627 \n",
"A0A075B6I0 26.943355 29.914134 28.947290 29.108465 ... 28.977874 \n",
"A0A075B6I4 19.384221 22.054705 27.642444 27.163654 ... 27.365780 \n",
"... ... ... ... ... ... ... \n",
"V9GYE3 29.991859 29.005067 28.611893 29.039570 ... 30.307260 \n",
"V9GYG9 29.991859 29.005067 28.611893 29.039570 ... 30.307260 \n",
"X6R8F3 21.686202 23.468367 20.292526 24.971321 ... 20.178151 \n",
"P80188 21.686202 23.468367 20.292526 24.971321 ... 20.178151 \n",
"P80188-2 21.686202 23.468367 20.292526 24.971321 ... 20.178151 \n",
"\n",
" sample_121 sample_122 sample_123 sample_124 sample_125 \\\n",
"Identifier \n",
"A0A024R6I7 38.082377 37.661959 37.195828 37.365447 37.564109 \n",
"A0A0G2JRN3 38.082377 37.661959 37.195828 37.365447 37.564109 \n",
"A0A075B6H9 27.621776 27.249976 27.403813 27.133006 27.793462 \n",
"A0A075B6I0 27.961132 29.085045 28.319069 29.418182 29.003245 \n",
"A0A075B6I4 26.182814 27.649039 25.888229 26.724164 26.577443 \n",
"... ... ... ... ... ... \n",
"V9GYE3 30.378135 29.925498 30.471018 31.777999 31.486205 \n",
"V9GYG9 30.378135 29.925498 30.471018 31.777999 31.486205 \n",
"X6R8F3 21.893118 24.072429 18.115715 20.282632 21.229028 \n",
"P80188 21.893118 24.072429 18.115715 20.282632 21.229028 \n",
"P80188-2 21.893118 24.072429 18.115715 20.282632 21.229028 \n",
"\n",
" sample_126 sample_127 sample_128 sample_129 \n",
"Identifier \n",
"A0A024R6I7 37.683884 37.282698 37.655909 37.754401 \n",
"A0A0G2JRN3 37.683884 37.282698 37.655909 37.754401 \n",
"A0A075B6H9 26.876193 27.715091 26.965212 28.376997 \n",
"A0A075B6I0 28.002763 28.660543 28.665092 29.839541 \n",
"A0A075B6I4 27.996742 21.713502 19.659693 20.145198 \n",
"... ... ... ... ... \n",
"V9GYE3 31.822441 31.144855 32.149526 28.001153 \n",
"V9GYG9 31.822441 31.144855 32.149526 28.001153 \n",
"X6R8F3 23.293898 20.400931 18.845678 26.863133 \n",
"P80188 23.293898 20.400931 18.845678 26.863133 \n",
"P80188-2 23.293898 20.400931 18.845678 26.863133 \n",
"\n",
"[1499 rows x 129 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_data.data_df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>group</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>sample_1</th>\n",
" <td>covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_2</th>\n",
" <td>covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_3</th>\n",
" <td>covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_4</th>\n",
" <td>covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_5</th>\n",
" <td>covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_125</th>\n",
" <td>non_covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_126</th>\n",
" <td>non_covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_127</th>\n",
" <td>non_covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_128</th>\n",
" <td>non_covid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_129</th>\n",
" <td>non_covid</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>129 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" group\n",
"sample \n",
"sample_1 covid\n",
"sample_2 covid\n",
"sample_3 covid\n",
"sample_4 covid\n",
"sample_5 covid\n",
"... ...\n",
"sample_125 non_covid\n",
"sample_126 non_covid\n",
"sample_127 non_covid\n",
"sample_128 non_covid\n",
"sample_129 non_covid\n",
"\n",
"[129 rows x 1 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"protein_data.design_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a multi-omics data container objects"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"publication = 'Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.'\n",
"url = 'https://www.sciencedirect.com/science/article/pii/S2405471220303719'"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Multi-omics data container\n",
"- publication: Overmyer, Katherine A., et al. \"Large-scale multi-omic analysis of COVID-19 severity.\" Cell systems 12.1 (2021): 23-40.\n",
"- URL: https://www.sciencedirect.com/science/article/pii/S2405471220303719\n",
"- Views: 3 modalities\n",
"\t - genes data with (13028, 125) measurements\n",
"\t - proteins data with (1499, 129) measurements\n",
"\t - compounds data with (46, 129) measurements"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mo = MultiOmicsData(publication=publication, url=url)\n",
"mo.add_data([transcript_data, protein_data, compound_data])\n",
"mo"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}