{ "cells": [ { "cell_type": "markdown", "id": "3a93d4c0", "metadata": {}, "source": [ "# Compare Associations\n", "* This notebook compares the final GWAS p-values for the full synthetic genome/phenome datasets to those in the original genome/phenome datasets and computes the precision, recall and F1 values." ] }, { "cell_type": "code", "execution_count": 1, "id": "97ac91d7", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pathlib\n", "import pandas as pd\n", "\n", "base_path = pathlib.Path(os.getcwd().replace(\"/synthetics\", \"\"))\n", "data_path = base_path / 'mice_data_set' / 'data' \n", "real_gwas_path = base_path / 'mice_data_set' / 'out' \n", "synthetic_gwas_path = base_path / 'mice_data_set' / 'out_synth'\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "0198fdaf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | index | \n", "snp | \n", "p | \n", "interest | \n", "
---|---|---|---|---|
0 | \n", "1 | \n", "rs29477109 | \n", "5.052317e-14 | \n", "True | \n", "
1 | \n", "2 | \n", "rs27071351 | \n", "7.074181e-14 | \n", "True | \n", "
2 | \n", "3 | \n", "rs27024162 | \n", "7.170582e-14 | \n", "True | \n", "
3 | \n", "4 | \n", "rs49423067 | \n", "7.198661e-14 | \n", "True | \n", "
4 | \n", "5 | \n", "rs29470802 | \n", "8.049849e-14 | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
79640 | \n", "79641 | \n", "rs3162358 | \n", "9.998911e-01 | \n", "False | \n", "
79641 | \n", "79642 | \n", "rs50509099 | \n", "9.999012e-01 | \n", "False | \n", "
79642 | \n", "79643 | \n", "rs47505090 | \n", "9.999041e-01 | \n", "False | \n", "
79643 | \n", "79644 | \n", "rs232293770 | \n", "9.999351e-01 | \n", "False | \n", "
79644 | \n", "79645 | \n", "rs247449322 | \n", "9.999861e-01 | \n", "False | \n", "
79645 rows × 4 columns
\n", "\n", " | index | \n", "snp | \n", "p | \n", "interest | \n", "
---|---|---|---|---|
0 | \n", "1 | \n", "rs36353660 | \n", "0.000000e+00 | \n", "True | \n", "
1 | \n", "2 | \n", "rs29220747 | \n", "1.398388e-86 | \n", "True | \n", "
2 | \n", "3 | \n", "rs29470086 | \n", "5.929727e-86 | \n", "True | \n", "
3 | \n", "4 | \n", "rs33102275 | \n", "2.838852e-85 | \n", "True | \n", "
4 | \n", "5 | \n", "rs252502314 | \n", "9.043721e-85 | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
71310 | \n", "71311 | \n", "cfw-17-49864534 | \n", "9.999609e-01 | \n", "False | \n", "
71311 | \n", "71312 | \n", "rs30856414 | \n", "9.999711e-01 | \n", "False | \n", "
71312 | \n", "71313 | \n", "rs108433568 | \n", "9.999735e-01 | \n", "False | \n", "
71313 | \n", "71314 | \n", "rs237834328 | \n", "9.999895e-01 | \n", "False | \n", "
71314 | \n", "71315 | \n", "rs52090420 | \n", "9.999899e-01 | \n", "False | \n", "
71315 rows × 4 columns
\n", "\n", " | index_synthetic | \n", "snp | \n", "p_synthetic | \n", "interest_synthetic | \n", "index_real | \n", "p_real | \n", "interest_real | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "rs36353660 | \n", "0.000000e+00 | \n", "True | \n", "77608 | \n", "9.734443e-01 | \n", "False | \n", "
1 | \n", "2 | \n", "rs29220747 | \n", "1.398388e-86 | \n", "True | \n", "75217 | \n", "9.426825e-01 | \n", "False | \n", "
2 | \n", "3 | \n", "rs29470086 | \n", "5.929727e-86 | \n", "True | \n", "77 | \n", "1.346918e-12 | \n", "True | \n", "
3 | \n", "4 | \n", "rs33102275 | \n", "2.838852e-85 | \n", "True | \n", "70949 | \n", "8.872558e-01 | \n", "False | \n", "
4 | \n", "5 | \n", "rs252502314 | \n", "9.043721e-85 | \n", "True | \n", "74884 | \n", "9.383166e-01 | \n", "False | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
71310 | \n", "71311 | \n", "cfw-17-49864534 | \n", "9.999609e-01 | \n", "False | \n", "58306 | \n", "7.228062e-01 | \n", "False | \n", "
71311 | \n", "71312 | \n", "rs30856414 | \n", "9.999711e-01 | \n", "False | \n", "31335 | \n", "3.776636e-01 | \n", "False | \n", "
71312 | \n", "71313 | \n", "rs108433568 | \n", "9.999735e-01 | \n", "False | \n", "21151 | \n", "2.508090e-01 | \n", "False | \n", "
71313 | \n", "71314 | \n", "rs237834328 | \n", "9.999895e-01 | \n", "False | \n", "13645 | \n", "1.552650e-01 | \n", "False | \n", "
71314 | \n", "71315 | \n", "rs52090420 | \n", "9.999899e-01 | \n", "False | \n", "66119 | \n", "8.233664e-01 | \n", "False | \n", "
71315 rows × 7 columns
\n", "