2865 lines (2864 with data), 148.8 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"# move to dash dir\n",
"os.chdir(\"../../../src/dash/\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"2020-07-08 13:15:06\n",
"Loading data for app...\n",
"\n",
"Loading metabolomics data...\n",
"Metabolomics data shape: (129, 174)\n",
"Loading lipidomics data...\n",
"Lipidomics data shape: (129, 3376)\n",
"Loading proteomics data...\n",
"Proteomics data shape: (129, 536)\n",
"Loading transcriptomics data...\n",
"Transcriptomics data shape: (125, 13282)\n",
"Getting biomolecule names for dataset: metabolomics\n",
"Getting biomolecule names for dataset: lipidomics\n",
"Getting biomolecule names for dataset: proteomics\n",
"Getting biomolecule names for dataset: transcriptomics\n"
]
}
],
"source": [
"### load shared data ###\n",
"from data import get_omics_data, get_biomolecule_names\n",
"import datetime\n",
"\n",
"print()\n",
"print(datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"))\n",
"print(\"Loading data for app...\")\n",
"print()\n",
"# load metabolomics data matrix\n",
"print(\"Loading metabolomics data...\")\n",
"metabolomics_df, metabolomics_quant_range = get_omics_data(dataset='metabolomics', with_metadata=True)\n",
"print(\"Metabolomics data shape: {}\".format(metabolomics_df.shape))\n",
"print(\"Loading lipidomics data...\")\n",
"lipidomics_df, lipidomics_quant_range = get_omics_data(dataset='lipidomics', with_metadata=True)\n",
"print(\"Lipidomics data shape: {}\".format(lipidomics_df.shape))\n",
"print(\"Loading proteomics data...\")\n",
"proteomics_df, proteomics_quant_range = get_omics_data(dataset='proteomics', with_metadata=True)\n",
"print(\"Proteomics data shape: {}\".format(proteomics_df.shape))\n",
"print(\"Loading transcriptomics data...\")\n",
"transcriptomics_df, transcriptomics_quant_range = get_omics_data(dataset='transcriptomics', with_metadata=True)\n",
"print(\"Transcriptomics data shape: {}\".format(transcriptomics_df.shape))\n",
"\n",
"# make biomolecule_name_dict\n",
"metabolomics_biomolecule_names_dict = get_biomolecule_names(dataset='metabolomics')\n",
"lipidomics_biomolecule_names_dict = get_biomolecule_names(dataset='lipidomics')\n",
"proteomics_biomolecule_names_dict = get_biomolecule_names(dataset='proteomics')\n",
"transcriptomics_biomolecule_names_dict = get_biomolecule_names(dataset='transcriptomics')\n",
"\n",
"# define dataset dictionaries\n",
"dataset_dict = {\n",
" \"Proteins\":\"proteomics\",\n",
" \"Lipids\":\"lipidomics\",\n",
" \"Metabolites\":\"metabolomics\",\n",
" \"Transcripts\":\"transcriptomics\",\n",
" \"Combined Biomolecules\":\"combined\"\n",
" }\n",
"\n",
"df_dict = {\n",
" \"proteomics\":proteomics_df,\n",
" \"lipidomics\":lipidomics_df,\n",
" \"metabolomics\":metabolomics_df,\n",
" \"transcriptomics\":transcriptomics_df\n",
"}\n",
"\n",
"quant_value_range_dict = {\n",
" \"proteomics\":proteomics_quant_range,\n",
" \"lipidomics\":lipidomics_quant_range,\n",
" \"metabolomics\":metabolomics_quant_range,\n",
" \"transcriptomics\":transcriptomics_quant_range\n",
"}\n",
"\n",
"global_names_dict = {\n",
" \"proteomics\":proteomics_biomolecule_names_dict,\n",
" \"lipidomics\":lipidomics_biomolecule_names_dict,\n",
" \"metabolomics\":metabolomics_biomolecule_names_dict,\n",
" \"transcriptomics\":transcriptomics_biomolecule_names_dict,\n",
" \"combined\":{**proteomics_biomolecule_names_dict,\n",
" **lipidomics_biomolecule_names_dict,\n",
" **metabolomics_biomolecule_names_dict}\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>7593</th>\n",
" <th>7596</th>\n",
" <th>7597</th>\n",
" <th>7599</th>\n",
" <th>7600</th>\n",
" <th>7601</th>\n",
" <th>7602</th>\n",
" <th>7605</th>\n",
" <th>7606</th>\n",
" <th>7607</th>\n",
" <th>...</th>\n",
" <th>Vent_free_days</th>\n",
" <th>DM</th>\n",
" <th>Hospital_free_days_45</th>\n",
" <th>Ferritin_NG/ML</th>\n",
" <th>CRP_MG/L</th>\n",
" <th>DDIMER_mg/L_FEU</th>\n",
" <th>Procalcitonin_NG/ML</th>\n",
" <th>Lactate_MMol/L</th>\n",
" <th>Fibrinogen</th>\n",
" <th>SOFA</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>37.995543</td>\n",
" <td>27.176361</td>\n",
" <td>28.294477</td>\n",
" <td>24.783368</td>\n",
" <td>23.926601</td>\n",
" <td>23.490571</td>\n",
" <td>25.490816</td>\n",
" <td>29.287382</td>\n",
" <td>29.505121</td>\n",
" <td>29.909421</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>946</td>\n",
" <td>73.1</td>\n",
" <td>1.3</td>\n",
" <td>36</td>\n",
" <td>0.9</td>\n",
" <td>513</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>37.353091</td>\n",
" <td>29.419228</td>\n",
" <td>29.048510</td>\n",
" <td>27.573331</td>\n",
" <td>24.671039</td>\n",
" <td>22.343591</td>\n",
" <td>27.166913</td>\n",
" <td>29.576452</td>\n",
" <td>30.081443</td>\n",
" <td>29.514012</td>\n",
" <td>...</td>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>39</td>\n",
" <td>1060</td>\n",
" <td></td>\n",
" <td>1.03</td>\n",
" <td>0.37</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>37.527875</td>\n",
" <td>27.174171</td>\n",
" <td>28.953215</td>\n",
" <td>25.619161</td>\n",
" <td>24.528623</td>\n",
" <td>20.714471</td>\n",
" <td>25.589237</td>\n",
" <td>29.621968</td>\n",
" <td>29.398338</td>\n",
" <td>29.436129</td>\n",
" <td>...</td>\n",
" <td>28</td>\n",
" <td>1</td>\n",
" <td>18</td>\n",
" <td>1335</td>\n",
" <td>53.2</td>\n",
" <td>1.48</td>\n",
" <td>0.07</td>\n",
" <td></td>\n",
" <td>513</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>37.673128</td>\n",
" <td>28.879702</td>\n",
" <td>29.527460</td>\n",
" <td>23.893562</td>\n",
" <td>25.875326</td>\n",
" <td>24.170967</td>\n",
" <td>25.280226</td>\n",
" <td>30.966780</td>\n",
" <td>30.360452</td>\n",
" <td>30.041791</td>\n",
" <td>...</td>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>39</td>\n",
" <td>583</td>\n",
" <td>251.1</td>\n",
" <td>1.32</td>\n",
" <td>0.98</td>\n",
" <td>0.87</td>\n",
" <td>949</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>5.0</th>\n",
" <td>37.983542</td>\n",
" <td>27.262485</td>\n",
" <td>28.605867</td>\n",
" <td>18.440251</td>\n",
" <td>20.888495</td>\n",
" <td>17.354782</td>\n",
" <td>25.654472</td>\n",
" <td>29.020542</td>\n",
" <td>29.175695</td>\n",
" <td>29.628988</td>\n",
" <td>...</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>27</td>\n",
" <td>800</td>\n",
" <td>355.8</td>\n",
" <td>0.69</td>\n",
" <td>4.92</td>\n",
" <td>1.48</td>\n",
" <td>929</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 536 columns</p>\n",
"</div>"
],
"text/plain": [
" 7593 7596 7597 7599 7600 7601 \\\n",
"sample_id \n",
"1.0 37.995543 27.176361 28.294477 24.783368 23.926601 23.490571 \n",
"2.0 37.353091 29.419228 29.048510 27.573331 24.671039 22.343591 \n",
"3.0 37.527875 27.174171 28.953215 25.619161 24.528623 20.714471 \n",
"4.0 37.673128 28.879702 29.527460 23.893562 25.875326 24.170967 \n",
"5.0 37.983542 27.262485 28.605867 18.440251 20.888495 17.354782 \n",
"\n",
" 7602 7605 7606 7607 ... Vent_free_days \\\n",
"sample_id ... \n",
"1.0 25.490816 29.287382 29.505121 29.909421 ... 0 \n",
"2.0 27.166913 29.576452 30.081443 29.514012 ... 28 \n",
"3.0 25.589237 29.621968 29.398338 29.436129 ... 28 \n",
"4.0 25.280226 30.966780 30.360452 30.041791 ... 28 \n",
"5.0 25.654472 29.020542 29.175695 29.628988 ... 23 \n",
"\n",
" DM Hospital_free_days_45 Ferritin_NG/ML CRP_MG/L \\\n",
"sample_id \n",
"1.0 0 0 946 73.1 \n",
"2.0 0 39 1060 \n",
"3.0 1 18 1335 53.2 \n",
"4.0 0 39 583 251.1 \n",
"5.0 0 27 800 355.8 \n",
"\n",
" DDIMER_mg/L_FEU Procalcitonin_NG/ML Lactate_MMol/L Fibrinogen \\\n",
"sample_id \n",
"1.0 1.3 36 0.9 513 \n",
"2.0 1.03 0.37 \n",
"3.0 1.48 0.07 513 \n",
"4.0 1.32 0.98 0.87 949 \n",
"5.0 0.69 4.92 1.48 929 \n",
"\n",
" SOFA \n",
"sample_id \n",
"1.0 8 \n",
"2.0 \n",
"3.0 \n",
"4.0 \n",
"5.0 7 \n",
"\n",
"[5 rows x 536 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"proteomics_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {},
"outputs": [],
"source": [
"col_names = [proteomics_biomolecule_names_dict[col] \\\n",
" if col in proteomics_biomolecule_names_dict else col \\\n",
" for col in proteomics_df.columns.values]"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"drop_cols = ['Sample_label', 'Albany_sampleID', 'DM']\n",
"keep_cols = ['COVID', 'ICU_1', 'Gender', 'Age_less_than_90', 'Hospital_free_days_45']\n",
"metadata_columns = proteomics_df.columns[proteomics_quant_range:]\n",
"#proteomics_df[metadata_columns.sort_values()].drop(drop_cols, axis=1).dropna(axis=1)\n",
"metadata_df = proteomics_df[keep_cols]"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"metadata_df = metadata_df.replace('', np.nan).dropna()"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [],
"source": [
"COVID_list = []\n",
"ICU_list = []\n",
"age_list = []\n",
"int_bool_dict = {\n",
" 0:\"False\",\n",
" 1:\"True\"\n",
"}\n",
"for index, row in metadata_df.iterrows():\n",
" COVID = int_bool_dict[row['COVID']]\n",
" COVID_list.append(COVID)\n",
" ICU = int_bool_dict[row['ICU_1']]\n",
" ICU_list.append(ICU)\n",
" \n",
" age_list.append(int(row['Age_less_than_90']))\n",
" \n",
"metadata_df['COVID'] = COVID_list\n",
"metadata_df['ICU_1'] = ICU_list\n",
"metadata_df['Age_less_than_90'] = age_list"
]
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>COVID</th>\n",
" <th>ICU_1</th>\n",
" <th>Gender</th>\n",
" <th>Age_less_than_90</th>\n",
" <th>Hospital_free_days_45</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>M</td>\n",
" <td>39</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>M</td>\n",
" <td>63</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>M</td>\n",
" <td>33</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>M</td>\n",
" <td>49</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5.0</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>M</td>\n",
" <td>49</td>\n",
" <td>27</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" COVID ICU_1 Gender Age_less_than_90 Hospital_free_days_45\n",
"sample_id \n",
"1.0 True False M 39 0\n",
"2.0 True False M 63 39\n",
"3.0 True False M 33 18\n",
"4.0 True False M 49 39\n",
"5.0 True False M 49 27"
]
},
"execution_count": 182,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metadata_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 183,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>7593</th>\n",
" <th>7596</th>\n",
" <th>7597</th>\n",
" <th>7599</th>\n",
" <th>7600</th>\n",
" <th>7601</th>\n",
" <th>7602</th>\n",
" <th>7605</th>\n",
" <th>7606</th>\n",
" <th>7607</th>\n",
" <th>...</th>\n",
" <th>8319</th>\n",
" <th>8321</th>\n",
" <th>8322</th>\n",
" <th>8323</th>\n",
" <th>8328</th>\n",
" <th>8330</th>\n",
" <th>8334</th>\n",
" <th>8335</th>\n",
" <th>8336</th>\n",
" <th>8337</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>37.995543</td>\n",
" <td>27.176361</td>\n",
" <td>28.294477</td>\n",
" <td>24.783368</td>\n",
" <td>23.926601</td>\n",
" <td>23.490571</td>\n",
" <td>25.490816</td>\n",
" <td>29.287382</td>\n",
" <td>29.505121</td>\n",
" <td>29.909421</td>\n",
" <td>...</td>\n",
" <td>25.746683</td>\n",
" <td>22.593546</td>\n",
" <td>22.024047</td>\n",
" <td>23.733969</td>\n",
" <td>19.755864</td>\n",
" <td>29.920821</td>\n",
" <td>25.980045</td>\n",
" <td>22.915739</td>\n",
" <td>30.157740</td>\n",
" <td>22.821315</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>37.353091</td>\n",
" <td>29.419228</td>\n",
" <td>29.048510</td>\n",
" <td>27.573331</td>\n",
" <td>24.671039</td>\n",
" <td>22.343591</td>\n",
" <td>27.166913</td>\n",
" <td>29.576452</td>\n",
" <td>30.081443</td>\n",
" <td>29.514012</td>\n",
" <td>...</td>\n",
" <td>25.580983</td>\n",
" <td>19.026956</td>\n",
" <td>24.628786</td>\n",
" <td>22.508744</td>\n",
" <td>19.399144</td>\n",
" <td>30.791802</td>\n",
" <td>26.601871</td>\n",
" <td>22.352374</td>\n",
" <td>29.011826</td>\n",
" <td>20.147061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>37.527875</td>\n",
" <td>27.174171</td>\n",
" <td>28.953215</td>\n",
" <td>25.619161</td>\n",
" <td>24.528623</td>\n",
" <td>20.714471</td>\n",
" <td>25.589237</td>\n",
" <td>29.621968</td>\n",
" <td>29.398338</td>\n",
" <td>29.436129</td>\n",
" <td>...</td>\n",
" <td>24.452494</td>\n",
" <td>17.439869</td>\n",
" <td>17.624139</td>\n",
" <td>20.428779</td>\n",
" <td>23.950737</td>\n",
" <td>30.332235</td>\n",
" <td>26.391377</td>\n",
" <td>21.239148</td>\n",
" <td>30.755732</td>\n",
" <td>21.002496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>37.673128</td>\n",
" <td>28.879702</td>\n",
" <td>29.527460</td>\n",
" <td>23.893562</td>\n",
" <td>25.875326</td>\n",
" <td>24.170967</td>\n",
" <td>25.280226</td>\n",
" <td>30.966780</td>\n",
" <td>30.360452</td>\n",
" <td>30.041791</td>\n",
" <td>...</td>\n",
" <td>25.934676</td>\n",
" <td>22.237947</td>\n",
" <td>22.410592</td>\n",
" <td>22.664706</td>\n",
" <td>22.214572</td>\n",
" <td>30.400396</td>\n",
" <td>26.618349</td>\n",
" <td>20.050655</td>\n",
" <td>31.037739</td>\n",
" <td>19.830364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5.0</th>\n",
" <td>37.983542</td>\n",
" <td>27.262485</td>\n",
" <td>28.605867</td>\n",
" <td>18.440251</td>\n",
" <td>20.888495</td>\n",
" <td>17.354782</td>\n",
" <td>25.654472</td>\n",
" <td>29.020542</td>\n",
" <td>29.175695</td>\n",
" <td>29.628988</td>\n",
" <td>...</td>\n",
" <td>25.115770</td>\n",
" <td>23.014475</td>\n",
" <td>20.869440</td>\n",
" <td>23.920980</td>\n",
" <td>23.752258</td>\n",
" <td>30.274865</td>\n",
" <td>26.143827</td>\n",
" <td>24.123605</td>\n",
" <td>30.690791</td>\n",
" <td>18.213300</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 517 columns</p>\n",
"</div>"
],
"text/plain": [
" 7593 7596 7597 7599 7600 7601 \\\n",
"sample_id \n",
"1.0 37.995543 27.176361 28.294477 24.783368 23.926601 23.490571 \n",
"2.0 37.353091 29.419228 29.048510 27.573331 24.671039 22.343591 \n",
"3.0 37.527875 27.174171 28.953215 25.619161 24.528623 20.714471 \n",
"4.0 37.673128 28.879702 29.527460 23.893562 25.875326 24.170967 \n",
"5.0 37.983542 27.262485 28.605867 18.440251 20.888495 17.354782 \n",
"\n",
" 7602 7605 7606 7607 ... 8319 \\\n",
"sample_id ... \n",
"1.0 25.490816 29.287382 29.505121 29.909421 ... 25.746683 \n",
"2.0 27.166913 29.576452 30.081443 29.514012 ... 25.580983 \n",
"3.0 25.589237 29.621968 29.398338 29.436129 ... 24.452494 \n",
"4.0 25.280226 30.966780 30.360452 30.041791 ... 25.934676 \n",
"5.0 25.654472 29.020542 29.175695 29.628988 ... 25.115770 \n",
"\n",
" 8321 8322 8323 8328 8330 8334 \\\n",
"sample_id \n",
"1.0 22.593546 22.024047 23.733969 19.755864 29.920821 25.980045 \n",
"2.0 19.026956 24.628786 22.508744 19.399144 30.791802 26.601871 \n",
"3.0 17.439869 17.624139 20.428779 23.950737 30.332235 26.391377 \n",
"4.0 22.237947 22.410592 22.664706 22.214572 30.400396 26.618349 \n",
"5.0 23.014475 20.869440 23.920980 23.752258 30.274865 26.143827 \n",
"\n",
" 8335 8336 8337 \n",
"sample_id \n",
"1.0 22.915739 30.157740 22.821315 \n",
"2.0 22.352374 29.011826 20.147061 \n",
"3.0 21.239148 30.755732 21.002496 \n",
"4.0 20.050655 31.037739 19.830364 \n",
"5.0 24.123605 30.690791 18.213300 \n",
"\n",
"[5 rows x 517 columns]"
]
},
"execution_count": 183,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"quant_df = proteomics_df.loc[metadata_df.index][proteomics_df.columns[:proteomics_quant_range]]\n",
"quant_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>('[P] Alpha-1-antitrypsin')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 4-69')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 8-61')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 10-54')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 7-46')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 5-37')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 2-18')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 3-10')</th>\n",
" <th>('[P] Immunoglobulin lambda variable 3-9')</th>\n",
" <th>('[P] Immunoglobulin kappa variable 2-28')</th>\n",
" <th>...</th>\n",
" <th>('[P] Prenylcysteine oxidase 1')</th>\n",
" <th>('[P] N-acetylglucosamine-1-phosphotransferase subunit gamma')</th>\n",
" <th>('[P] Coronin-1C')</th>\n",
" <th>('[P] Multiple inositol polyphosphate phosphatase 1')</th>\n",
" <th>('[P] Angiopoietin-related protein 3')</th>\n",
" <th>('[P] IgGFc-binding protein')</th>\n",
" <th>('[P] Histone H2B')</th>\n",
" <th>('[P] Low affinity immunoglobulin gamma Fc region receptor II-a (Fragment)')</th>\n",
" <th>('[P] Apolipoprotein A-II')</th>\n",
" <th>('[P] Neutrophil gelatinase-associated lipocalin')</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>37.995543</td>\n",
" <td>27.176361</td>\n",
" <td>28.294477</td>\n",
" <td>24.783368</td>\n",
" <td>23.926601</td>\n",
" <td>23.490571</td>\n",
" <td>25.490816</td>\n",
" <td>29.287382</td>\n",
" <td>29.505121</td>\n",
" <td>29.909421</td>\n",
" <td>...</td>\n",
" <td>25.746683</td>\n",
" <td>22.593546</td>\n",
" <td>22.024047</td>\n",
" <td>23.733969</td>\n",
" <td>19.755864</td>\n",
" <td>29.920821</td>\n",
" <td>25.980045</td>\n",
" <td>22.915739</td>\n",
" <td>30.157740</td>\n",
" <td>22.821315</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>37.353091</td>\n",
" <td>29.419228</td>\n",
" <td>29.048510</td>\n",
" <td>27.573331</td>\n",
" <td>24.671039</td>\n",
" <td>22.343591</td>\n",
" <td>27.166913</td>\n",
" <td>29.576452</td>\n",
" <td>30.081443</td>\n",
" <td>29.514012</td>\n",
" <td>...</td>\n",
" <td>25.580983</td>\n",
" <td>19.026956</td>\n",
" <td>24.628786</td>\n",
" <td>22.508744</td>\n",
" <td>19.399144</td>\n",
" <td>30.791802</td>\n",
" <td>26.601871</td>\n",
" <td>22.352374</td>\n",
" <td>29.011826</td>\n",
" <td>20.147061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>37.527875</td>\n",
" <td>27.174171</td>\n",
" <td>28.953215</td>\n",
" <td>25.619161</td>\n",
" <td>24.528623</td>\n",
" <td>20.714471</td>\n",
" <td>25.589237</td>\n",
" <td>29.621968</td>\n",
" <td>29.398338</td>\n",
" <td>29.436129</td>\n",
" <td>...</td>\n",
" <td>24.452494</td>\n",
" <td>17.439869</td>\n",
" <td>17.624139</td>\n",
" <td>20.428779</td>\n",
" <td>23.950737</td>\n",
" <td>30.332235</td>\n",
" <td>26.391377</td>\n",
" <td>21.239148</td>\n",
" <td>30.755732</td>\n",
" <td>21.002496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>37.673128</td>\n",
" <td>28.879702</td>\n",
" <td>29.527460</td>\n",
" <td>23.893562</td>\n",
" <td>25.875326</td>\n",
" <td>24.170967</td>\n",
" <td>25.280226</td>\n",
" <td>30.966780</td>\n",
" <td>30.360452</td>\n",
" <td>30.041791</td>\n",
" <td>...</td>\n",
" <td>25.934676</td>\n",
" <td>22.237947</td>\n",
" <td>22.410592</td>\n",
" <td>22.664706</td>\n",
" <td>22.214572</td>\n",
" <td>30.400396</td>\n",
" <td>26.618349</td>\n",
" <td>20.050655</td>\n",
" <td>31.037739</td>\n",
" <td>19.830364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5.0</th>\n",
" <td>37.983542</td>\n",
" <td>27.262485</td>\n",
" <td>28.605867</td>\n",
" <td>18.440251</td>\n",
" <td>20.888495</td>\n",
" <td>17.354782</td>\n",
" <td>25.654472</td>\n",
" <td>29.020542</td>\n",
" <td>29.175695</td>\n",
" <td>29.628988</td>\n",
" <td>...</td>\n",
" <td>25.115770</td>\n",
" <td>23.014475</td>\n",
" <td>20.869440</td>\n",
" <td>23.920980</td>\n",
" <td>23.752258</td>\n",
" <td>30.274865</td>\n",
" <td>26.143827</td>\n",
" <td>24.123605</td>\n",
" <td>30.690791</td>\n",
" <td>18.213300</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 517 columns</p>\n",
"</div>"
],
"text/plain": [
" ('[P] Alpha-1-antitrypsin') \\\n",
"sample_id \n",
"1.0 37.995543 \n",
"2.0 37.353091 \n",
"3.0 37.527875 \n",
"4.0 37.673128 \n",
"5.0 37.983542 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 4-69') \\\n",
"sample_id \n",
"1.0 27.176361 \n",
"2.0 29.419228 \n",
"3.0 27.174171 \n",
"4.0 28.879702 \n",
"5.0 27.262485 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 8-61') \\\n",
"sample_id \n",
"1.0 28.294477 \n",
"2.0 29.048510 \n",
"3.0 28.953215 \n",
"4.0 29.527460 \n",
"5.0 28.605867 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 10-54') \\\n",
"sample_id \n",
"1.0 24.783368 \n",
"2.0 27.573331 \n",
"3.0 25.619161 \n",
"4.0 23.893562 \n",
"5.0 18.440251 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 7-46') \\\n",
"sample_id \n",
"1.0 23.926601 \n",
"2.0 24.671039 \n",
"3.0 24.528623 \n",
"4.0 25.875326 \n",
"5.0 20.888495 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 5-37') \\\n",
"sample_id \n",
"1.0 23.490571 \n",
"2.0 22.343591 \n",
"3.0 20.714471 \n",
"4.0 24.170967 \n",
"5.0 17.354782 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 2-18') \\\n",
"sample_id \n",
"1.0 25.490816 \n",
"2.0 27.166913 \n",
"3.0 25.589237 \n",
"4.0 25.280226 \n",
"5.0 25.654472 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 3-10') \\\n",
"sample_id \n",
"1.0 29.287382 \n",
"2.0 29.576452 \n",
"3.0 29.621968 \n",
"4.0 30.966780 \n",
"5.0 29.020542 \n",
"\n",
" ('[P] Immunoglobulin lambda variable 3-9') \\\n",
"sample_id \n",
"1.0 29.505121 \n",
"2.0 30.081443 \n",
"3.0 29.398338 \n",
"4.0 30.360452 \n",
"5.0 29.175695 \n",
"\n",
" ('[P] Immunoglobulin kappa variable 2-28') ... \\\n",
"sample_id ... \n",
"1.0 29.909421 ... \n",
"2.0 29.514012 ... \n",
"3.0 29.436129 ... \n",
"4.0 30.041791 ... \n",
"5.0 29.628988 ... \n",
"\n",
" ('[P] Prenylcysteine oxidase 1') \\\n",
"sample_id \n",
"1.0 25.746683 \n",
"2.0 25.580983 \n",
"3.0 24.452494 \n",
"4.0 25.934676 \n",
"5.0 25.115770 \n",
"\n",
" ('[P] N-acetylglucosamine-1-phosphotransferase subunit gamma') \\\n",
"sample_id \n",
"1.0 22.593546 \n",
"2.0 19.026956 \n",
"3.0 17.439869 \n",
"4.0 22.237947 \n",
"5.0 23.014475 \n",
"\n",
" ('[P] Coronin-1C') \\\n",
"sample_id \n",
"1.0 22.024047 \n",
"2.0 24.628786 \n",
"3.0 17.624139 \n",
"4.0 22.410592 \n",
"5.0 20.869440 \n",
"\n",
" ('[P] Multiple inositol polyphosphate phosphatase 1') \\\n",
"sample_id \n",
"1.0 23.733969 \n",
"2.0 22.508744 \n",
"3.0 20.428779 \n",
"4.0 22.664706 \n",
"5.0 23.920980 \n",
"\n",
" ('[P] Angiopoietin-related protein 3') \\\n",
"sample_id \n",
"1.0 19.755864 \n",
"2.0 19.399144 \n",
"3.0 23.950737 \n",
"4.0 22.214572 \n",
"5.0 23.752258 \n",
"\n",
" ('[P] IgGFc-binding protein') ('[P] Histone H2B') \\\n",
"sample_id \n",
"1.0 29.920821 25.980045 \n",
"2.0 30.791802 26.601871 \n",
"3.0 30.332235 26.391377 \n",
"4.0 30.400396 26.618349 \n",
"5.0 30.274865 26.143827 \n",
"\n",
" ('[P] Low affinity immunoglobulin gamma Fc region receptor II-a (Fragment)') \\\n",
"sample_id \n",
"1.0 22.915739 \n",
"2.0 22.352374 \n",
"3.0 21.239148 \n",
"4.0 20.050655 \n",
"5.0 24.123605 \n",
"\n",
" ('[P] Apolipoprotein A-II') \\\n",
"sample_id \n",
"1.0 30.157740 \n",
"2.0 29.011826 \n",
"3.0 30.755732 \n",
"4.0 31.037739 \n",
"5.0 30.690791 \n",
"\n",
" ('[P] Neutrophil gelatinase-associated lipocalin') \n",
"sample_id \n",
"1.0 22.821315 \n",
"2.0 20.147061 \n",
"3.0 21.002496 \n",
"4.0 19.830364 \n",
"5.0 18.213300 \n",
"\n",
"[5 rows x 517 columns]"
]
},
"execution_count": 184,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# update col names\n",
"col_names = [\"('\" + proteomics_biomolecule_names_dict[col] + \"')\" for col in quant_df.columns.values]\n",
"quant_df.columns = col_names\n",
"quant_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [],
"source": [
"row_names = []\n",
"for index, row in metadata_df.iterrows():\n",
" #('ML1', 'tissue: thyroid', 'histology: carcinoma', 'sub-histology: follicular_carcinoma', 'gender: F')\n",
" # ('1', 'COVID: 1', 'ICU_1: 1'...)\n",
" out_list = []\n",
" for col in keep_cols:\n",
" value = row[col]\n",
" col_str = \"'{}: {}'\".format(col, value)\n",
" out_list.append(col_str)\n",
" out_str = \"('\" + str(int(index)) + \"', \" + \", \".join(out_list) + \")\"\n",
" #out_str = \"(\" + \", \".join(out_list) + \")\"\n",
" row_names.append(out_str)"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [],
"source": [
"quant_df.index = row_names"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>('1', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 39', 'Hospital_free_days_45: 0')</th>\n",
" <th>('2', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 39')</th>\n",
" <th>('3', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 33', 'Hospital_free_days_45: 18')</th>\n",
" <th>('4', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 39')</th>\n",
" <th>('5', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 27')</th>\n",
" <th>('6', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 45', 'Hospital_free_days_45: 36')</th>\n",
" <th>('7', 'COVID: True', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 38', 'Hospital_free_days_45: 42')</th>\n",
" <th>('8', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 78', 'Hospital_free_days_45: 0')</th>\n",
" <th>('9', 'COVID: True', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 64', 'Hospital_free_days_45: 0')</th>\n",
" <th>('10', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 0')</th>\n",
" <th>...</th>\n",
" <th>('120', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 84', 'Hospital_free_days_45: 41')</th>\n",
" <th>('121', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 88', 'Hospital_free_days_45: 0')</th>\n",
" <th>('122', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 66', 'Hospital_free_days_45: 29')</th>\n",
" <th>('123', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 62', 'Hospital_free_days_45: 40')</th>\n",
" <th>('124', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 71', 'Hospital_free_days_45: 36')</th>\n",
" <th>('125', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 43')</th>\n",
" <th>('126', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 42', 'Hospital_free_days_45: 40')</th>\n",
" <th>('127', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 32', 'Hospital_free_days_45: 43')</th>\n",
" <th>('128', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 44')</th>\n",
" <th>('129', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 36', 'Hospital_free_days_45: 0')</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>('[P] Alpha-1-antitrypsin')</th>\n",
" <td>37.995543</td>\n",
" <td>37.353091</td>\n",
" <td>37.527875</td>\n",
" <td>37.673128</td>\n",
" <td>37.983542</td>\n",
" <td>37.489959</td>\n",
" <td>37.615303</td>\n",
" <td>37.534702</td>\n",
" <td>37.525762</td>\n",
" <td>37.844902</td>\n",
" <td>...</td>\n",
" <td>37.730180</td>\n",
" <td>38.082377</td>\n",
" <td>37.661959</td>\n",
" <td>37.195828</td>\n",
" <td>37.365447</td>\n",
" <td>37.564109</td>\n",
" <td>37.683884</td>\n",
" <td>37.282698</td>\n",
" <td>37.655909</td>\n",
" <td>37.754401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 4-69')</th>\n",
" <td>27.176361</td>\n",
" <td>29.419228</td>\n",
" <td>27.174171</td>\n",
" <td>28.879702</td>\n",
" <td>27.262485</td>\n",
" <td>29.535232</td>\n",
" <td>27.657446</td>\n",
" <td>30.323779</td>\n",
" <td>28.194885</td>\n",
" <td>29.253091</td>\n",
" <td>...</td>\n",
" <td>27.731627</td>\n",
" <td>27.621776</td>\n",
" <td>27.249976</td>\n",
" <td>27.403813</td>\n",
" <td>27.133006</td>\n",
" <td>27.793462</td>\n",
" <td>26.876193</td>\n",
" <td>27.715091</td>\n",
" <td>26.965212</td>\n",
" <td>28.376997</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 8-61')</th>\n",
" <td>28.294477</td>\n",
" <td>29.048510</td>\n",
" <td>28.953215</td>\n",
" <td>29.527460</td>\n",
" <td>28.605867</td>\n",
" <td>29.251895</td>\n",
" <td>26.943355</td>\n",
" <td>29.914134</td>\n",
" <td>28.947290</td>\n",
" <td>29.108465</td>\n",
" <td>...</td>\n",
" <td>28.977874</td>\n",
" <td>27.961132</td>\n",
" <td>29.085045</td>\n",
" <td>28.319069</td>\n",
" <td>29.418182</td>\n",
" <td>29.003245</td>\n",
" <td>28.002763</td>\n",
" <td>28.660543</td>\n",
" <td>28.665092</td>\n",
" <td>29.839541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 10-54')</th>\n",
" <td>24.783368</td>\n",
" <td>27.573331</td>\n",
" <td>25.619161</td>\n",
" <td>23.893562</td>\n",
" <td>18.440251</td>\n",
" <td>25.033513</td>\n",
" <td>19.384221</td>\n",
" <td>22.054705</td>\n",
" <td>27.642444</td>\n",
" <td>27.163654</td>\n",
" <td>...</td>\n",
" <td>27.365780</td>\n",
" <td>26.182814</td>\n",
" <td>27.649039</td>\n",
" <td>25.888229</td>\n",
" <td>26.724164</td>\n",
" <td>26.577443</td>\n",
" <td>27.996742</td>\n",
" <td>21.713502</td>\n",
" <td>19.659693</td>\n",
" <td>20.145198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 7-46')</th>\n",
" <td>23.926601</td>\n",
" <td>24.671039</td>\n",
" <td>24.528623</td>\n",
" <td>25.875326</td>\n",
" <td>20.888495</td>\n",
" <td>26.576723</td>\n",
" <td>25.110084</td>\n",
" <td>25.695856</td>\n",
" <td>26.054428</td>\n",
" <td>26.153098</td>\n",
" <td>...</td>\n",
" <td>25.276113</td>\n",
" <td>24.855716</td>\n",
" <td>24.183591</td>\n",
" <td>25.328755</td>\n",
" <td>24.374313</td>\n",
" <td>25.152285</td>\n",
" <td>24.717171</td>\n",
" <td>25.369895</td>\n",
" <td>25.790296</td>\n",
" <td>26.346110</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 127 columns</p>\n",
"</div>"
],
"text/plain": [
" ('1', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 39', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 37.995543 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.176361 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.294477 \n",
"('[P] Immunoglobulin lambda variable 10-54') 24.783368 \n",
"('[P] Immunoglobulin lambda variable 7-46') 23.926601 \n",
"\n",
" ('2', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 39') \\\n",
"('[P] Alpha-1-antitrypsin') 37.353091 \n",
"('[P] Immunoglobulin lambda variable 4-69') 29.419228 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.048510 \n",
"('[P] Immunoglobulin lambda variable 10-54') 27.573331 \n",
"('[P] Immunoglobulin lambda variable 7-46') 24.671039 \n",
"\n",
" ('3', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 33', 'Hospital_free_days_45: 18') \\\n",
"('[P] Alpha-1-antitrypsin') 37.527875 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.174171 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.953215 \n",
"('[P] Immunoglobulin lambda variable 10-54') 25.619161 \n",
"('[P] Immunoglobulin lambda variable 7-46') 24.528623 \n",
"\n",
" ('4', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 39') \\\n",
"('[P] Alpha-1-antitrypsin') 37.673128 \n",
"('[P] Immunoglobulin lambda variable 4-69') 28.879702 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.527460 \n",
"('[P] Immunoglobulin lambda variable 10-54') 23.893562 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.875326 \n",
"\n",
" ('5', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 27') \\\n",
"('[P] Alpha-1-antitrypsin') 37.983542 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.262485 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.605867 \n",
"('[P] Immunoglobulin lambda variable 10-54') 18.440251 \n",
"('[P] Immunoglobulin lambda variable 7-46') 20.888495 \n",
"\n",
" ('6', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 45', 'Hospital_free_days_45: 36') \\\n",
"('[P] Alpha-1-antitrypsin') 37.489959 \n",
"('[P] Immunoglobulin lambda variable 4-69') 29.535232 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.251895 \n",
"('[P] Immunoglobulin lambda variable 10-54') 25.033513 \n",
"('[P] Immunoglobulin lambda variable 7-46') 26.576723 \n",
"\n",
" ('7', 'COVID: True', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 38', 'Hospital_free_days_45: 42') \\\n",
"('[P] Alpha-1-antitrypsin') 37.615303 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.657446 \n",
"('[P] Immunoglobulin lambda variable 8-61') 26.943355 \n",
"('[P] Immunoglobulin lambda variable 10-54') 19.384221 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.110084 \n",
"\n",
" ('8', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 78', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 37.534702 \n",
"('[P] Immunoglobulin lambda variable 4-69') 30.323779 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.914134 \n",
"('[P] Immunoglobulin lambda variable 10-54') 22.054705 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.695856 \n",
"\n",
" ('9', 'COVID: True', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 64', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 37.525762 \n",
"('[P] Immunoglobulin lambda variable 4-69') 28.194885 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.947290 \n",
"('[P] Immunoglobulin lambda variable 10-54') 27.642444 \n",
"('[P] Immunoglobulin lambda variable 7-46') 26.054428 \n",
"\n",
" ('10', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 37.844902 \n",
"('[P] Immunoglobulin lambda variable 4-69') 29.253091 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.108465 \n",
"('[P] Immunoglobulin lambda variable 10-54') 27.163654 \n",
"('[P] Immunoglobulin lambda variable 7-46') 26.153098 \n",
"\n",
" ... \\\n",
"('[P] Alpha-1-antitrypsin') ... \n",
"('[P] Immunoglobulin lambda variable 4-69') ... \n",
"('[P] Immunoglobulin lambda variable 8-61') ... \n",
"('[P] Immunoglobulin lambda variable 10-54') ... \n",
"('[P] Immunoglobulin lambda variable 7-46') ... \n",
"\n",
" ('120', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 84', 'Hospital_free_days_45: 41') \\\n",
"('[P] Alpha-1-antitrypsin') 37.730180 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.731627 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.977874 \n",
"('[P] Immunoglobulin lambda variable 10-54') 27.365780 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.276113 \n",
"\n",
" ('121', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 88', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 38.082377 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.621776 \n",
"('[P] Immunoglobulin lambda variable 8-61') 27.961132 \n",
"('[P] Immunoglobulin lambda variable 10-54') 26.182814 \n",
"('[P] Immunoglobulin lambda variable 7-46') 24.855716 \n",
"\n",
" ('122', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 66', 'Hospital_free_days_45: 29') \\\n",
"('[P] Alpha-1-antitrypsin') 37.661959 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.249976 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.085045 \n",
"('[P] Immunoglobulin lambda variable 10-54') 27.649039 \n",
"('[P] Immunoglobulin lambda variable 7-46') 24.183591 \n",
"\n",
" ('123', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 62', 'Hospital_free_days_45: 40') \\\n",
"('[P] Alpha-1-antitrypsin') 37.195828 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.403813 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.319069 \n",
"('[P] Immunoglobulin lambda variable 10-54') 25.888229 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.328755 \n",
"\n",
" ('124', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 71', 'Hospital_free_days_45: 36') \\\n",
"('[P] Alpha-1-antitrypsin') 37.365447 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.133006 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.418182 \n",
"('[P] Immunoglobulin lambda variable 10-54') 26.724164 \n",
"('[P] Immunoglobulin lambda variable 7-46') 24.374313 \n",
"\n",
" ('125', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 43') \\\n",
"('[P] Alpha-1-antitrypsin') 37.564109 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.793462 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.003245 \n",
"('[P] Immunoglobulin lambda variable 10-54') 26.577443 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.152285 \n",
"\n",
" ('126', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 42', 'Hospital_free_days_45: 40') \\\n",
"('[P] Alpha-1-antitrypsin') 37.683884 \n",
"('[P] Immunoglobulin lambda variable 4-69') 26.876193 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.002763 \n",
"('[P] Immunoglobulin lambda variable 10-54') 27.996742 \n",
"('[P] Immunoglobulin lambda variable 7-46') 24.717171 \n",
"\n",
" ('127', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 32', 'Hospital_free_days_45: 43') \\\n",
"('[P] Alpha-1-antitrypsin') 37.282698 \n",
"('[P] Immunoglobulin lambda variable 4-69') 27.715091 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.660543 \n",
"('[P] Immunoglobulin lambda variable 10-54') 21.713502 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.369895 \n",
"\n",
" ('128', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 44') \\\n",
"('[P] Alpha-1-antitrypsin') 37.655909 \n",
"('[P] Immunoglobulin lambda variable 4-69') 26.965212 \n",
"('[P] Immunoglobulin lambda variable 8-61') 28.665092 \n",
"('[P] Immunoglobulin lambda variable 10-54') 19.659693 \n",
"('[P] Immunoglobulin lambda variable 7-46') 25.790296 \n",
"\n",
" ('129', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 36', 'Hospital_free_days_45: 0') \n",
"('[P] Alpha-1-antitrypsin') 37.754401 \n",
"('[P] Immunoglobulin lambda variable 4-69') 28.376997 \n",
"('[P] Immunoglobulin lambda variable 8-61') 29.839541 \n",
"('[P] Immunoglobulin lambda variable 10-54') 20.145198 \n",
"('[P] Immunoglobulin lambda variable 7-46') 26.346110 \n",
"\n",
"[5 rows x 127 columns]"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"quant_df.T.head()"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>('1', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 39', 'Hospital_free_days_45: 0')</th>\n",
" <th>('2', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 39')</th>\n",
" <th>('3', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 33', 'Hospital_free_days_45: 18')</th>\n",
" <th>('4', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 39')</th>\n",
" <th>('5', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 27')</th>\n",
" <th>('6', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 45', 'Hospital_free_days_45: 36')</th>\n",
" <th>('7', 'COVID: True', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 38', 'Hospital_free_days_45: 42')</th>\n",
" <th>('8', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 78', 'Hospital_free_days_45: 0')</th>\n",
" <th>('9', 'COVID: True', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 64', 'Hospital_free_days_45: 0')</th>\n",
" <th>('10', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 0')</th>\n",
" <th>...</th>\n",
" <th>('120', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 84', 'Hospital_free_days_45: 41')</th>\n",
" <th>('121', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 88', 'Hospital_free_days_45: 0')</th>\n",
" <th>('122', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 66', 'Hospital_free_days_45: 29')</th>\n",
" <th>('123', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 62', 'Hospital_free_days_45: 40')</th>\n",
" <th>('124', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 71', 'Hospital_free_days_45: 36')</th>\n",
" <th>('125', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 43')</th>\n",
" <th>('126', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 42', 'Hospital_free_days_45: 40')</th>\n",
" <th>('127', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 32', 'Hospital_free_days_45: 43')</th>\n",
" <th>('128', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 44')</th>\n",
" <th>('129', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 36', 'Hospital_free_days_45: 0')</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>('[P] Alpha-1-antitrypsin')</th>\n",
" <td>1.08</td>\n",
" <td>-0.71</td>\n",
" <td>-0.22</td>\n",
" <td>0.18</td>\n",
" <td>1.04</td>\n",
" <td>-0.33</td>\n",
" <td>0.02</td>\n",
" <td>-0.21</td>\n",
" <td>-0.23</td>\n",
" <td>0.66</td>\n",
" <td>...</td>\n",
" <td>0.34</td>\n",
" <td>1.32</td>\n",
" <td>0.15</td>\n",
" <td>-1.15</td>\n",
" <td>-0.68</td>\n",
" <td>-0.12</td>\n",
" <td>0.21</td>\n",
" <td>-0.91</td>\n",
" <td>0.13</td>\n",
" <td>0.41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 4-69')</th>\n",
" <td>-1.04</td>\n",
" <td>1.20</td>\n",
" <td>-1.05</td>\n",
" <td>0.66</td>\n",
" <td>-0.96</td>\n",
" <td>1.31</td>\n",
" <td>-0.56</td>\n",
" <td>2.10</td>\n",
" <td>-0.03</td>\n",
" <td>1.03</td>\n",
" <td>...</td>\n",
" <td>-0.49</td>\n",
" <td>-0.60</td>\n",
" <td>-0.97</td>\n",
" <td>-0.82</td>\n",
" <td>-1.09</td>\n",
" <td>-0.43</td>\n",
" <td>-1.34</td>\n",
" <td>-0.51</td>\n",
" <td>-1.25</td>\n",
" <td>0.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 8-61')</th>\n",
" <td>-0.46</td>\n",
" <td>0.20</td>\n",
" <td>0.11</td>\n",
" <td>0.61</td>\n",
" <td>-0.19</td>\n",
" <td>0.37</td>\n",
" <td>-1.64</td>\n",
" <td>0.95</td>\n",
" <td>0.11</td>\n",
" <td>0.25</td>\n",
" <td>...</td>\n",
" <td>0.13</td>\n",
" <td>-0.75</td>\n",
" <td>0.23</td>\n",
" <td>-0.44</td>\n",
" <td>0.52</td>\n",
" <td>0.16</td>\n",
" <td>-0.71</td>\n",
" <td>-0.14</td>\n",
" <td>-0.14</td>\n",
" <td>0.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 10-54')</th>\n",
" <td>0.22</td>\n",
" <td>1.19</td>\n",
" <td>0.51</td>\n",
" <td>-0.08</td>\n",
" <td>-1.97</td>\n",
" <td>0.31</td>\n",
" <td>-1.64</td>\n",
" <td>-0.72</td>\n",
" <td>1.21</td>\n",
" <td>1.05</td>\n",
" <td>...</td>\n",
" <td>1.12</td>\n",
" <td>0.71</td>\n",
" <td>1.21</td>\n",
" <td>0.61</td>\n",
" <td>0.90</td>\n",
" <td>0.84</td>\n",
" <td>1.33</td>\n",
" <td>-0.84</td>\n",
" <td>-1.55</td>\n",
" <td>-1.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>('[P] Immunoglobulin lambda variable 7-46')</th>\n",
" <td>-0.54</td>\n",
" <td>-0.10</td>\n",
" <td>-0.19</td>\n",
" <td>0.61</td>\n",
" <td>-2.34</td>\n",
" <td>1.02</td>\n",
" <td>0.15</td>\n",
" <td>0.50</td>\n",
" <td>0.71</td>\n",
" <td>0.77</td>\n",
" <td>...</td>\n",
" <td>0.25</td>\n",
" <td>0.00</td>\n",
" <td>-0.39</td>\n",
" <td>0.28</td>\n",
" <td>-0.28</td>\n",
" <td>0.18</td>\n",
" <td>-0.08</td>\n",
" <td>0.31</td>\n",
" <td>0.56</td>\n",
" <td>0.89</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 127 columns</p>\n",
"</div>"
],
"text/plain": [
" ('1', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 39', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 1.08 \n",
"('[P] Immunoglobulin lambda variable 4-69') -1.04 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.46 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.22 \n",
"('[P] Immunoglobulin lambda variable 7-46') -0.54 \n",
"\n",
" ('2', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 39') \\\n",
"('[P] Alpha-1-antitrypsin') -0.71 \n",
"('[P] Immunoglobulin lambda variable 4-69') 1.20 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.20 \n",
"('[P] Immunoglobulin lambda variable 10-54') 1.19 \n",
"('[P] Immunoglobulin lambda variable 7-46') -0.10 \n",
"\n",
" ('3', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 33', 'Hospital_free_days_45: 18') \\\n",
"('[P] Alpha-1-antitrypsin') -0.22 \n",
"('[P] Immunoglobulin lambda variable 4-69') -1.05 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.11 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.51 \n",
"('[P] Immunoglobulin lambda variable 7-46') -0.19 \n",
"\n",
" ('4', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 39') \\\n",
"('[P] Alpha-1-antitrypsin') 0.18 \n",
"('[P] Immunoglobulin lambda variable 4-69') 0.66 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.61 \n",
"('[P] Immunoglobulin lambda variable 10-54') -0.08 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.61 \n",
"\n",
" ('5', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 27') \\\n",
"('[P] Alpha-1-antitrypsin') 1.04 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.96 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.19 \n",
"('[P] Immunoglobulin lambda variable 10-54') -1.97 \n",
"('[P] Immunoglobulin lambda variable 7-46') -2.34 \n",
"\n",
" ('6', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 45', 'Hospital_free_days_45: 36') \\\n",
"('[P] Alpha-1-antitrypsin') -0.33 \n",
"('[P] Immunoglobulin lambda variable 4-69') 1.31 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.37 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.31 \n",
"('[P] Immunoglobulin lambda variable 7-46') 1.02 \n",
"\n",
" ('7', 'COVID: True', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 38', 'Hospital_free_days_45: 42') \\\n",
"('[P] Alpha-1-antitrypsin') 0.02 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.56 \n",
"('[P] Immunoglobulin lambda variable 8-61') -1.64 \n",
"('[P] Immunoglobulin lambda variable 10-54') -1.64 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.15 \n",
"\n",
" ('8', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 78', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') -0.21 \n",
"('[P] Immunoglobulin lambda variable 4-69') 2.10 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.95 \n",
"('[P] Immunoglobulin lambda variable 10-54') -0.72 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.50 \n",
"\n",
" ('9', 'COVID: True', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 64', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') -0.23 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.03 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.11 \n",
"('[P] Immunoglobulin lambda variable 10-54') 1.21 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.71 \n",
"\n",
" ('10', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 0.66 \n",
"('[P] Immunoglobulin lambda variable 4-69') 1.03 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.25 \n",
"('[P] Immunoglobulin lambda variable 10-54') 1.05 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.77 \n",
"\n",
" ... \\\n",
"('[P] Alpha-1-antitrypsin') ... \n",
"('[P] Immunoglobulin lambda variable 4-69') ... \n",
"('[P] Immunoglobulin lambda variable 8-61') ... \n",
"('[P] Immunoglobulin lambda variable 10-54') ... \n",
"('[P] Immunoglobulin lambda variable 7-46') ... \n",
"\n",
" ('120', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 84', 'Hospital_free_days_45: 41') \\\n",
"('[P] Alpha-1-antitrypsin') 0.34 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.49 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.13 \n",
"('[P] Immunoglobulin lambda variable 10-54') 1.12 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.25 \n",
"\n",
" ('121', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 88', 'Hospital_free_days_45: 0') \\\n",
"('[P] Alpha-1-antitrypsin') 1.32 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.60 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.75 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.71 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.00 \n",
"\n",
" ('122', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 66', 'Hospital_free_days_45: 29') \\\n",
"('[P] Alpha-1-antitrypsin') 0.15 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.97 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.23 \n",
"('[P] Immunoglobulin lambda variable 10-54') 1.21 \n",
"('[P] Immunoglobulin lambda variable 7-46') -0.39 \n",
"\n",
" ('123', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 62', 'Hospital_free_days_45: 40') \\\n",
"('[P] Alpha-1-antitrypsin') -1.15 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.82 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.44 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.61 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.28 \n",
"\n",
" ('124', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 71', 'Hospital_free_days_45: 36') \\\n",
"('[P] Alpha-1-antitrypsin') -0.68 \n",
"('[P] Immunoglobulin lambda variable 4-69') -1.09 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.52 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.90 \n",
"('[P] Immunoglobulin lambda variable 7-46') -0.28 \n",
"\n",
" ('125', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 43') \\\n",
"('[P] Alpha-1-antitrypsin') -0.12 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.43 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.16 \n",
"('[P] Immunoglobulin lambda variable 10-54') 0.84 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.18 \n",
"\n",
" ('126', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 42', 'Hospital_free_days_45: 40') \\\n",
"('[P] Alpha-1-antitrypsin') 0.21 \n",
"('[P] Immunoglobulin lambda variable 4-69') -1.34 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.71 \n",
"('[P] Immunoglobulin lambda variable 10-54') 1.33 \n",
"('[P] Immunoglobulin lambda variable 7-46') -0.08 \n",
"\n",
" ('127', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 32', 'Hospital_free_days_45: 43') \\\n",
"('[P] Alpha-1-antitrypsin') -0.91 \n",
"('[P] Immunoglobulin lambda variable 4-69') -0.51 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.14 \n",
"('[P] Immunoglobulin lambda variable 10-54') -0.84 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.31 \n",
"\n",
" ('128', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 44') \\\n",
"('[P] Alpha-1-antitrypsin') 0.13 \n",
"('[P] Immunoglobulin lambda variable 4-69') -1.25 \n",
"('[P] Immunoglobulin lambda variable 8-61') -0.14 \n",
"('[P] Immunoglobulin lambda variable 10-54') -1.55 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.56 \n",
"\n",
" ('129', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 36', 'Hospital_free_days_45: 0') \n",
"('[P] Alpha-1-antitrypsin') 0.41 \n",
"('[P] Immunoglobulin lambda variable 4-69') 0.15 \n",
"('[P] Immunoglobulin lambda variable 8-61') 0.89 \n",
"('[P] Immunoglobulin lambda variable 10-54') -1.38 \n",
"('[P] Immunoglobulin lambda variable 7-46') 0.89 \n",
"\n",
"[5 rows x 127 columns]"
]
},
"execution_count": 195,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from scipy import stats\n",
"\n",
"clustergram_df = quant_df.apply(stats.zscore).T.round(2)\n",
"clustergram_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [],
"source": [
"clustergram_df.to_csv(\"../../data/clustergrammer/proteomics.txt\", sep='\\t')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notes on formatting:\n",
"\n",
"1. Apparently need to include sample number, otherwise get \"internal server error\"\n",
"2. Can't have missing values (at least in colnames/ rownames)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Now for lipidomics"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"dataset = \"transcriptomics\"\n",
"\n",
"# get omics dataframe of interest\n",
"omics_df = df_dict[dataset]\n",
"\n",
"# get biomolecule names for columns\n",
"biomolecule_names_dict = global_names_dict[dataset]\n",
"\n",
"# quant range\n",
"quant_value_range = quant_value_range_dict[dataset]\n",
"\n",
"col_names = [biomolecule_names_dict[col] \\\n",
" if col in biomolecule_names_dict else col \\\n",
" for col in omics_df.columns.values]"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"drop_cols = ['Sample_label', 'Albany_sampleID', 'DM']\n",
"keep_cols = ['COVID', 'ICU_1', 'Gender', 'Age_less_than_90', 'Hospital_free_days_45']\n",
"metadata_columns = omics_df.columns[quant_value_range:]\n",
"#proteomics_df[metadata_columns.sort_values()].drop(drop_cols, axis=1).dropna(axis=1)\n",
"metadata_df = omics_df[keep_cols]"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"# convert empty strings to NAs and drop missing values\n",
"import numpy as np\n",
"metadata_df = metadata_df.replace('', np.nan).dropna()"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"# convert 0 and 1 to True and False (for appropriate coloring in Clustergrammer)\n",
"# covert Age to integer values\n",
"\n",
"COVID_list = []\n",
"ICU_list = []\n",
"age_list = []\n",
"int_bool_dict = {\n",
" 0:\"False\",\n",
" 1:\"True\"\n",
"}\n",
"for index, row in metadata_df.iterrows():\n",
" COVID = int_bool_dict[row['COVID']]\n",
" COVID_list.append(COVID)\n",
" ICU = int_bool_dict[row['ICU_1']]\n",
" ICU_list.append(ICU)\n",
" \n",
" age_list.append(int(row['Age_less_than_90']))\n",
" \n",
"metadata_df['COVID'] = COVID_list\n",
"metadata_df['ICU_1'] = ICU_list\n",
"metadata_df['Age_less_than_90'] = age_list"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>8338</th>\n",
" <th>8340</th>\n",
" <th>8345</th>\n",
" <th>8346</th>\n",
" <th>8352</th>\n",
" <th>8353</th>\n",
" <th>8354</th>\n",
" <th>8355</th>\n",
" <th>8356</th>\n",
" <th>8357</th>\n",
" <th>...</th>\n",
" <th>27790</th>\n",
" <th>27791</th>\n",
" <th>27792</th>\n",
" <th>27793</th>\n",
" <th>27794</th>\n",
" <th>27795</th>\n",
" <th>27796</th>\n",
" <th>27797</th>\n",
" <th>27798</th>\n",
" <th>27799</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>4.5</td>\n",
" <td>3.7</td>\n",
" <td>8.7</td>\n",
" <td>7.4</td>\n",
" <td>9.8</td>\n",
" <td>9.8</td>\n",
" <td>6.5</td>\n",
" <td>10.3</td>\n",
" <td>3.0</td>\n",
" <td>9.3</td>\n",
" <td>...</td>\n",
" <td>6.9</td>\n",
" <td>6.4</td>\n",
" <td>6.0</td>\n",
" <td>6.9</td>\n",
" <td>9.6</td>\n",
" <td>1.9</td>\n",
" <td>9.8</td>\n",
" <td>13.3</td>\n",
" <td>11.5</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>3.8</td>\n",
" <td>3.2</td>\n",
" <td>8.8</td>\n",
" <td>7.1</td>\n",
" <td>9.9</td>\n",
" <td>9.5</td>\n",
" <td>6.2</td>\n",
" <td>10.3</td>\n",
" <td>0.0</td>\n",
" <td>9.3</td>\n",
" <td>...</td>\n",
" <td>7.4</td>\n",
" <td>8.1</td>\n",
" <td>5.5</td>\n",
" <td>6.5</td>\n",
" <td>9.7</td>\n",
" <td>3.1</td>\n",
" <td>9.9</td>\n",
" <td>13.5</td>\n",
" <td>11.3</td>\n",
" <td>8.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>4.3</td>\n",
" <td>1.8</td>\n",
" <td>9.0</td>\n",
" <td>6.9</td>\n",
" <td>10.2</td>\n",
" <td>9.4</td>\n",
" <td>6.8</td>\n",
" <td>9.8</td>\n",
" <td>4.7</td>\n",
" <td>8.8</td>\n",
" <td>...</td>\n",
" <td>6.5</td>\n",
" <td>6.7</td>\n",
" <td>4.4</td>\n",
" <td>5.3</td>\n",
" <td>10.4</td>\n",
" <td>4.5</td>\n",
" <td>9.3</td>\n",
" <td>14.2</td>\n",
" <td>11.8</td>\n",
" <td>7.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>4.2</td>\n",
" <td>2.5</td>\n",
" <td>8.7</td>\n",
" <td>7.3</td>\n",
" <td>9.9</td>\n",
" <td>9.7</td>\n",
" <td>6.3</td>\n",
" <td>10.3</td>\n",
" <td>0.8</td>\n",
" <td>9.1</td>\n",
" <td>...</td>\n",
" <td>7.2</td>\n",
" <td>6.9</td>\n",
" <td>6.4</td>\n",
" <td>7.2</td>\n",
" <td>9.5</td>\n",
" <td>3.6</td>\n",
" <td>9.6</td>\n",
" <td>12.3</td>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>3.7</td>\n",
" <td>0.0</td>\n",
" <td>8.3</td>\n",
" <td>6.4</td>\n",
" <td>10.1</td>\n",
" <td>10.3</td>\n",
" <td>6.4</td>\n",
" <td>9.3</td>\n",
" <td>5.2</td>\n",
" <td>8.5</td>\n",
" <td>...</td>\n",
" <td>6.8</td>\n",
" <td>7.3</td>\n",
" <td>5.0</td>\n",
" <td>6.1</td>\n",
" <td>9.6</td>\n",
" <td>2.8</td>\n",
" <td>10.1</td>\n",
" <td>13.6</td>\n",
" <td>11.1</td>\n",
" <td>8.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 13263 columns</p>\n",
"</div>"
],
"text/plain": [
" 8338 8340 8345 8346 8352 8353 8354 8355 8356 8357 ... \\\n",
"sample_id ... \n",
"1 4.5 3.7 8.7 7.4 9.8 9.8 6.5 10.3 3.0 9.3 ... \n",
"2 3.8 3.2 8.8 7.1 9.9 9.5 6.2 10.3 0.0 9.3 ... \n",
"3 4.3 1.8 9.0 6.9 10.2 9.4 6.8 9.8 4.7 8.8 ... \n",
"4 4.2 2.5 8.7 7.3 9.9 9.7 6.3 10.3 0.8 9.1 ... \n",
"5 3.7 0.0 8.3 6.4 10.1 10.3 6.4 9.3 5.2 8.5 ... \n",
"\n",
" 27790 27791 27792 27793 27794 27795 27796 27797 27798 \\\n",
"sample_id \n",
"1 6.9 6.4 6.0 6.9 9.6 1.9 9.8 13.3 11.5 \n",
"2 7.4 8.1 5.5 6.5 9.7 3.1 9.9 13.5 11.3 \n",
"3 6.5 6.7 4.4 5.3 10.4 4.5 9.3 14.2 11.8 \n",
"4 7.2 6.9 6.4 7.2 9.5 3.6 9.6 12.3 11.0 \n",
"5 6.8 7.3 5.0 6.1 9.6 2.8 10.1 13.6 11.1 \n",
"\n",
" 27799 \n",
"sample_id \n",
"1 9.0 \n",
"2 8.5 \n",
"3 7.8 \n",
"4 9.0 \n",
"5 8.6 \n",
"\n",
"[5 rows x 13263 columns]"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# grab subset of samples with complete metadata, taking only quant value columns\n",
"quant_df = omics_df.loc[metadata_df.index][omics_df.columns[:quant_value_range]]\n",
"quant_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>('[T] A1BG')</th>\n",
" <th>('[T] A2M')</th>\n",
" <th>('[T] AAAS')</th>\n",
" <th>('[T] AACS')</th>\n",
" <th>('[T] AAGAB')</th>\n",
" <th>('[T] AAK1')</th>\n",
" <th>('[T] AAMDC')</th>\n",
" <th>('[T] AAMP')</th>\n",
" <th>('[T] AANAT')</th>\n",
" <th>('[T] AAR2')</th>\n",
" <th>...</th>\n",
" <th>('[T] ZWILCH')</th>\n",
" <th>('[T] ZWINT')</th>\n",
" <th>('[T] ZXDA')</th>\n",
" <th>('[T] ZXDB')</th>\n",
" <th>('[T] ZXDC')</th>\n",
" <th>('[T] ZYG11A')</th>\n",
" <th>('[T] ZYG11B')</th>\n",
" <th>('[T] ZYX')</th>\n",
" <th>('[T] ZZEF1')</th>\n",
" <th>('[T] ZZZ3')</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>4.5</td>\n",
" <td>3.7</td>\n",
" <td>8.7</td>\n",
" <td>7.4</td>\n",
" <td>9.8</td>\n",
" <td>9.8</td>\n",
" <td>6.5</td>\n",
" <td>10.3</td>\n",
" <td>3.0</td>\n",
" <td>9.3</td>\n",
" <td>...</td>\n",
" <td>6.9</td>\n",
" <td>6.4</td>\n",
" <td>6.0</td>\n",
" <td>6.9</td>\n",
" <td>9.6</td>\n",
" <td>1.9</td>\n",
" <td>9.8</td>\n",
" <td>13.3</td>\n",
" <td>11.5</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>3.8</td>\n",
" <td>3.2</td>\n",
" <td>8.8</td>\n",
" <td>7.1</td>\n",
" <td>9.9</td>\n",
" <td>9.5</td>\n",
" <td>6.2</td>\n",
" <td>10.3</td>\n",
" <td>0.0</td>\n",
" <td>9.3</td>\n",
" <td>...</td>\n",
" <td>7.4</td>\n",
" <td>8.1</td>\n",
" <td>5.5</td>\n",
" <td>6.5</td>\n",
" <td>9.7</td>\n",
" <td>3.1</td>\n",
" <td>9.9</td>\n",
" <td>13.5</td>\n",
" <td>11.3</td>\n",
" <td>8.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>4.3</td>\n",
" <td>1.8</td>\n",
" <td>9.0</td>\n",
" <td>6.9</td>\n",
" <td>10.2</td>\n",
" <td>9.4</td>\n",
" <td>6.8</td>\n",
" <td>9.8</td>\n",
" <td>4.7</td>\n",
" <td>8.8</td>\n",
" <td>...</td>\n",
" <td>6.5</td>\n",
" <td>6.7</td>\n",
" <td>4.4</td>\n",
" <td>5.3</td>\n",
" <td>10.4</td>\n",
" <td>4.5</td>\n",
" <td>9.3</td>\n",
" <td>14.2</td>\n",
" <td>11.8</td>\n",
" <td>7.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>4.2</td>\n",
" <td>2.5</td>\n",
" <td>8.7</td>\n",
" <td>7.3</td>\n",
" <td>9.9</td>\n",
" <td>9.7</td>\n",
" <td>6.3</td>\n",
" <td>10.3</td>\n",
" <td>0.8</td>\n",
" <td>9.1</td>\n",
" <td>...</td>\n",
" <td>7.2</td>\n",
" <td>6.9</td>\n",
" <td>6.4</td>\n",
" <td>7.2</td>\n",
" <td>9.5</td>\n",
" <td>3.6</td>\n",
" <td>9.6</td>\n",
" <td>12.3</td>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>3.7</td>\n",
" <td>0.0</td>\n",
" <td>8.3</td>\n",
" <td>6.4</td>\n",
" <td>10.1</td>\n",
" <td>10.3</td>\n",
" <td>6.4</td>\n",
" <td>9.3</td>\n",
" <td>5.2</td>\n",
" <td>8.5</td>\n",
" <td>...</td>\n",
" <td>6.8</td>\n",
" <td>7.3</td>\n",
" <td>5.0</td>\n",
" <td>6.1</td>\n",
" <td>9.6</td>\n",
" <td>2.8</td>\n",
" <td>10.1</td>\n",
" <td>13.6</td>\n",
" <td>11.1</td>\n",
" <td>8.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 13263 columns</p>\n",
"</div>"
],
"text/plain": [
" ('[T] A1BG') ('[T] A2M') ('[T] AAAS') ('[T] AACS') \\\n",
"sample_id \n",
"1 4.5 3.7 8.7 7.4 \n",
"2 3.8 3.2 8.8 7.1 \n",
"3 4.3 1.8 9.0 6.9 \n",
"4 4.2 2.5 8.7 7.3 \n",
"5 3.7 0.0 8.3 6.4 \n",
"\n",
" ('[T] AAGAB') ('[T] AAK1') ('[T] AAMDC') ('[T] AAMP') \\\n",
"sample_id \n",
"1 9.8 9.8 6.5 10.3 \n",
"2 9.9 9.5 6.2 10.3 \n",
"3 10.2 9.4 6.8 9.8 \n",
"4 9.9 9.7 6.3 10.3 \n",
"5 10.1 10.3 6.4 9.3 \n",
"\n",
" ('[T] AANAT') ('[T] AAR2') ... ('[T] ZWILCH') ('[T] ZWINT') \\\n",
"sample_id ... \n",
"1 3.0 9.3 ... 6.9 6.4 \n",
"2 0.0 9.3 ... 7.4 8.1 \n",
"3 4.7 8.8 ... 6.5 6.7 \n",
"4 0.8 9.1 ... 7.2 6.9 \n",
"5 5.2 8.5 ... 6.8 7.3 \n",
"\n",
" ('[T] ZXDA') ('[T] ZXDB') ('[T] ZXDC') ('[T] ZYG11A') \\\n",
"sample_id \n",
"1 6.0 6.9 9.6 1.9 \n",
"2 5.5 6.5 9.7 3.1 \n",
"3 4.4 5.3 10.4 4.5 \n",
"4 6.4 7.2 9.5 3.6 \n",
"5 5.0 6.1 9.6 2.8 \n",
"\n",
" ('[T] ZYG11B') ('[T] ZYX') ('[T] ZZEF1') ('[T] ZZZ3') \n",
"sample_id \n",
"1 9.8 13.3 11.5 9.0 \n",
"2 9.9 13.5 11.3 8.5 \n",
"3 9.3 14.2 11.8 7.8 \n",
"4 9.6 12.3 11.0 9.0 \n",
"5 10.1 13.6 11.1 8.6 \n",
"\n",
"[5 rows x 13263 columns]"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# update col names\n",
"col_names = [\"('\" + biomolecule_names_dict[col] + \"')\" for col in quant_df.columns.values]\n",
"quant_df.columns = col_names\n",
"quant_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"# update row names\n",
"row_names = []\n",
"for index, row in metadata_df.iterrows():\n",
" #('ML1', 'tissue: thyroid', 'histology: carcinoma', 'sub-histology: follicular_carcinoma', 'gender: F')\n",
" # ('1', 'COVID: 1', 'ICU_1: 1'...)\n",
" out_list = []\n",
" for col in keep_cols:\n",
" value = row[col]\n",
" col_str = \"'{}: {}'\".format(col, value)\n",
" out_list.append(col_str)\n",
" out_str = \"('\" + str(int(index)) + \"', \" + \", \".join(out_list) + \")\"\n",
" #out_str = \"(\" + \", \".join(out_list) + \")\"\n",
" row_names.append(out_str)\n",
" \n",
"quant_df.index = row_names"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"# filter top n features by variance - adapting code from srd/dash/plot.py\n",
"n = 500\n",
"keep_list = quant_df.std(axis=0).sort_values(ascending=False)[:n].index.tolist()\n",
"quant_df = quant_df[keep_list]"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(124, 500)"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"quant_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>('1', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 39', 'Hospital_free_days_45: 0')</th>\n",
" <th>('2', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 39')</th>\n",
" <th>('3', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 33', 'Hospital_free_days_45: 18')</th>\n",
" <th>('4', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 39')</th>\n",
" <th>('5', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 27')</th>\n",
" <th>('6', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 45', 'Hospital_free_days_45: 36')</th>\n",
" <th>('7', 'COVID: True', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 38', 'Hospital_free_days_45: 42')</th>\n",
" <th>('8', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 78', 'Hospital_free_days_45: 0')</th>\n",
" <th>('9', 'COVID: True', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 64', 'Hospital_free_days_45: 0')</th>\n",
" <th>('10', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 0')</th>\n",
" <th>...</th>\n",
" <th>('119', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 40', 'Hospital_free_days_45: 29')</th>\n",
" <th>('120', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 84', 'Hospital_free_days_45: 41')</th>\n",
" <th>('121', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 88', 'Hospital_free_days_45: 0')</th>\n",
" <th>('122', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 66', 'Hospital_free_days_45: 29')</th>\n",
" <th>('123', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 62', 'Hospital_free_days_45: 40')</th>\n",
" <th>('124', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 71', 'Hospital_free_days_45: 36')</th>\n",
" <th>('125', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 43')</th>\n",
" <th>('126', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 42', 'Hospital_free_days_45: 40')</th>\n",
" <th>('127', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 32', 'Hospital_free_days_45: 43')</th>\n",
" <th>('128', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 44')</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>('[T] RPS4Y1')</td>\n",
" <td>0.70</td>\n",
" <td>0.77</td>\n",
" <td>0.68</td>\n",
" <td>0.87</td>\n",
" <td>0.79</td>\n",
" <td>0.85</td>\n",
" <td>-1.24</td>\n",
" <td>0.70</td>\n",
" <td>-1.24</td>\n",
" <td>0.67</td>\n",
" <td>...</td>\n",
" <td>-0.88</td>\n",
" <td>-1.24</td>\n",
" <td>0.89</td>\n",
" <td>-1.07</td>\n",
" <td>-1.24</td>\n",
" <td>1.09</td>\n",
" <td>1.14</td>\n",
" <td>-1.24</td>\n",
" <td>-1.24</td>\n",
" <td>1.25</td>\n",
" </tr>\n",
" <tr>\n",
" <td>('[T] DDX3Y')</td>\n",
" <td>0.83</td>\n",
" <td>0.83</td>\n",
" <td>0.83</td>\n",
" <td>0.94</td>\n",
" <td>1.11</td>\n",
" <td>0.81</td>\n",
" <td>-1.24</td>\n",
" <td>0.96</td>\n",
" <td>-1.07</td>\n",
" <td>0.77</td>\n",
" <td>...</td>\n",
" <td>-1.24</td>\n",
" <td>-1.24</td>\n",
" <td>0.75</td>\n",
" <td>-1.24</td>\n",
" <td>-1.24</td>\n",
" <td>0.66</td>\n",
" <td>0.62</td>\n",
" <td>-1.24</td>\n",
" <td>-1.24</td>\n",
" <td>0.51</td>\n",
" </tr>\n",
" <tr>\n",
" <td>('[T] EIF1AY')</td>\n",
" <td>0.63</td>\n",
" <td>0.78</td>\n",
" <td>0.88</td>\n",
" <td>0.70</td>\n",
" <td>0.78</td>\n",
" <td>0.80</td>\n",
" <td>-1.23</td>\n",
" <td>0.78</td>\n",
" <td>-1.06</td>\n",
" <td>0.70</td>\n",
" <td>...</td>\n",
" <td>-1.23</td>\n",
" <td>-1.23</td>\n",
" <td>0.78</td>\n",
" <td>-1.23</td>\n",
" <td>-1.23</td>\n",
" <td>0.97</td>\n",
" <td>0.84</td>\n",
" <td>-1.23</td>\n",
" <td>-1.23</td>\n",
" <td>0.91</td>\n",
" </tr>\n",
" <tr>\n",
" <td>('[T] KDM5D')</td>\n",
" <td>0.89</td>\n",
" <td>0.96</td>\n",
" <td>0.72</td>\n",
" <td>1.06</td>\n",
" <td>0.87</td>\n",
" <td>0.93</td>\n",
" <td>-1.27</td>\n",
" <td>0.93</td>\n",
" <td>-1.08</td>\n",
" <td>0.91</td>\n",
" <td>...</td>\n",
" <td>-0.88</td>\n",
" <td>-1.27</td>\n",
" <td>0.76</td>\n",
" <td>-1.27</td>\n",
" <td>-1.27</td>\n",
" <td>0.72</td>\n",
" <td>0.65</td>\n",
" <td>-1.27</td>\n",
" <td>-1.27</td>\n",
" <td>0.57</td>\n",
" </tr>\n",
" <tr>\n",
" <td>('[T] BTNL3')</td>\n",
" <td>-0.81</td>\n",
" <td>1.21</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>1.03</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>0.83</td>\n",
" <td>-0.81</td>\n",
" <td>...</td>\n",
" <td>1.17</td>\n",
" <td>1.55</td>\n",
" <td>1.55</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>-0.81</td>\n",
" <td>1.30</td>\n",
" <td>1.42</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 124 columns</p>\n",
"</div>"
],
"text/plain": [
" ('1', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 39', 'Hospital_free_days_45: 0') \\\n",
"('[T] RPS4Y1') 0.70 \n",
"('[T] DDX3Y') 0.83 \n",
"('[T] EIF1AY') 0.63 \n",
"('[T] KDM5D') 0.89 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('2', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 39') \\\n",
"('[T] RPS4Y1') 0.77 \n",
"('[T] DDX3Y') 0.83 \n",
"('[T] EIF1AY') 0.78 \n",
"('[T] KDM5D') 0.96 \n",
"('[T] BTNL3') 1.21 \n",
"\n",
" ('3', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 33', 'Hospital_free_days_45: 18') \\\n",
"('[T] RPS4Y1') 0.68 \n",
"('[T] DDX3Y') 0.83 \n",
"('[T] EIF1AY') 0.88 \n",
"('[T] KDM5D') 0.72 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('4', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 39') \\\n",
"('[T] RPS4Y1') 0.87 \n",
"('[T] DDX3Y') 0.94 \n",
"('[T] EIF1AY') 0.70 \n",
"('[T] KDM5D') 1.06 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('5', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 49', 'Hospital_free_days_45: 27') \\\n",
"('[T] RPS4Y1') 0.79 \n",
"('[T] DDX3Y') 1.11 \n",
"('[T] EIF1AY') 0.78 \n",
"('[T] KDM5D') 0.87 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('6', 'COVID: True', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 45', 'Hospital_free_days_45: 36') \\\n",
"('[T] RPS4Y1') 0.85 \n",
"('[T] DDX3Y') 0.81 \n",
"('[T] EIF1AY') 0.80 \n",
"('[T] KDM5D') 0.93 \n",
"('[T] BTNL3') 1.03 \n",
"\n",
" ('7', 'COVID: True', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 38', 'Hospital_free_days_45: 42') \\\n",
"('[T] RPS4Y1') -1.24 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -1.27 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('8', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 78', 'Hospital_free_days_45: 0') \\\n",
"('[T] RPS4Y1') 0.70 \n",
"('[T] DDX3Y') 0.96 \n",
"('[T] EIF1AY') 0.78 \n",
"('[T] KDM5D') 0.93 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('9', 'COVID: True', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 64', 'Hospital_free_days_45: 0') \\\n",
"('[T] RPS4Y1') -1.24 \n",
"('[T] DDX3Y') -1.07 \n",
"('[T] EIF1AY') -1.06 \n",
"('[T] KDM5D') -1.08 \n",
"('[T] BTNL3') 0.83 \n",
"\n",
" ('10', 'COVID: True', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 0') \\\n",
"('[T] RPS4Y1') 0.67 \n",
"('[T] DDX3Y') 0.77 \n",
"('[T] EIF1AY') 0.70 \n",
"('[T] KDM5D') 0.91 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ... \\\n",
"('[T] RPS4Y1') ... \n",
"('[T] DDX3Y') ... \n",
"('[T] EIF1AY') ... \n",
"('[T] KDM5D') ... \n",
"('[T] BTNL3') ... \n",
"\n",
" ('119', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 40', 'Hospital_free_days_45: 29') \\\n",
"('[T] RPS4Y1') -0.88 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -0.88 \n",
"('[T] BTNL3') 1.17 \n",
"\n",
" ('120', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 84', 'Hospital_free_days_45: 41') \\\n",
"('[T] RPS4Y1') -1.24 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -1.27 \n",
"('[T] BTNL3') 1.55 \n",
"\n",
" ('121', 'COVID: False', 'ICU_1: True', 'Gender: M', 'Age_less_than_90: 88', 'Hospital_free_days_45: 0') \\\n",
"('[T] RPS4Y1') 0.89 \n",
"('[T] DDX3Y') 0.75 \n",
"('[T] EIF1AY') 0.78 \n",
"('[T] KDM5D') 0.76 \n",
"('[T] BTNL3') 1.55 \n",
"\n",
" ('122', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 66', 'Hospital_free_days_45: 29') \\\n",
"('[T] RPS4Y1') -1.07 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -1.27 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('123', 'COVID: False', 'ICU_1: True', 'Gender: F', 'Age_less_than_90: 62', 'Hospital_free_days_45: 40') \\\n",
"('[T] RPS4Y1') -1.24 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -1.27 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('124', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 71', 'Hospital_free_days_45: 36') \\\n",
"('[T] RPS4Y1') 1.09 \n",
"('[T] DDX3Y') 0.66 \n",
"('[T] EIF1AY') 0.97 \n",
"('[T] KDM5D') 0.72 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('125', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 63', 'Hospital_free_days_45: 43') \\\n",
"('[T] RPS4Y1') 1.14 \n",
"('[T] DDX3Y') 0.62 \n",
"('[T] EIF1AY') 0.84 \n",
"('[T] KDM5D') 0.65 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('126', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 42', 'Hospital_free_days_45: 40') \\\n",
"('[T] RPS4Y1') -1.24 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -1.27 \n",
"('[T] BTNL3') -0.81 \n",
"\n",
" ('127', 'COVID: False', 'ICU_1: False', 'Gender: F', 'Age_less_than_90: 32', 'Hospital_free_days_45: 43') \\\n",
"('[T] RPS4Y1') -1.24 \n",
"('[T] DDX3Y') -1.24 \n",
"('[T] EIF1AY') -1.23 \n",
"('[T] KDM5D') -1.27 \n",
"('[T] BTNL3') 1.30 \n",
"\n",
" ('128', 'COVID: False', 'ICU_1: False', 'Gender: M', 'Age_less_than_90: 62', 'Hospital_free_days_45: 44') \n",
"('[T] RPS4Y1') 1.25 \n",
"('[T] DDX3Y') 0.51 \n",
"('[T] EIF1AY') 0.91 \n",
"('[T] KDM5D') 0.57 \n",
"('[T] BTNL3') 1.42 \n",
"\n",
"[5 rows x 124 columns]"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# calculate zscore, transpose, and round values\n",
"from scipy import stats\n",
"\n",
"clustergram_df = quant_df.apply(stats.zscore).T.round(2)\n",
"clustergram_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"# write out csv\n",
"outpath = \"../../data/clustergrammer/{}.txt\".format(dataset)\n",
"clustergram_df.head(n=100).to_csv(outpath, sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}