{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Reproducing Simulation result\n", "### Null simulation\n", "We share the pre-computed null simulation results for GWAS/FINDOR/GWAS in the data folder. We first load it via:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import multiprocessing\n", "from tqdm import tqdm\n", "import numpy as np\n", "import sys\n", "sys.path.append('../')\n", "from kgwas.eval_utils import get_clumps_gold_label, get_meta_clumps, get_mega_clump_query, get_curve\n", "\n", "data_path = '/dfs/project/datasets/20220524-ukbiobank/data/kgwas_data/'\n", "snp_info = pd.read_csv(data_path + 'misc_data/snp_qc_info.csv')\n", "\n", "df_gwas = pd.read_csv(data_path + 'model_pred/simulation/null_simulation_gwas.csv')\n", "df_kgwas = pd.read_csv(data_path + 'model_pred/simulation/null_simulation_kgwas.csv')\n", "df_findor = pd.read_csv(data_path + 'model_pred/simulation/null_simulation_findor.csv')\n", "\n", "df_gwas = snp_info.merge(df_gwas, left_on = 'SNP', right_on = 'ID')\n", "df_kgwas = snp_info.merge(df_kgwas, left_on = 'SNP', right_on = 'ID')\n", "df_findor = snp_info.merge(df_findor)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This dataframe saves p-values for all the 500 seeds:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | CHR | \n", "SNP | \n", "POS | \n", "A1 | \n", "A2 | \n", "N | \n", "AF1 | \n", "P_seed1 | \n", "P_seed2 | \n", "P_seed3 | \n", "... | \n", "P_seed491 | \n", "P_seed492 | \n", "P_seed493 | \n", "P_seed494 | \n", "P_seed495 | \n", "P_seed496 | \n", "P_seed497 | \n", "P_seed498 | \n", "P_seed499 | \n", "P_seed500 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "rs3131962 | \n", "756604 | \n", "A | \n", "G | \n", "155603 | \n", "0.129731 | \n", "1.039910 | \n", "0.966660 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "0.110102 | \n", "0.370526 | \n", "0.991077 | \n", "1.064326 | \n", "0.202375 | \n", "1.015493 | \n", "0.435631 | \n", "1.039910 | \n", "0.673215 | \n", "
1 | \n", "1 | \n", "rs12562034 | \n", "768448 | \n", "A | \n", "G | \n", "155612 | \n", "0.105188 | \n", "0.478095 | \n", "0.069428 | \n", "1.015493 | \n", "... | \n", "0.232310 | \n", "0.008637 | \n", "1.039910 | \n", "0.735306 | \n", "0.432725 | \n", "1.039910 | \n", "1.015493 | \n", "0.769550 | \n", "1.039910 | \n", "0.076054 | \n", "
2 | \n", "1 | \n", "rs4040617 | \n", "779322 | \n", "G | \n", "A | \n", "155423 | \n", "0.127632 | \n", "1.039910 | \n", "0.966660 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "0.385465 | \n", "0.051355 | \n", "0.991077 | \n", "1.064326 | \n", "1.039910 | \n", "1.015493 | \n", "0.991077 | \n", "0.094461 | \n", "1.015493 | \n", "
3 | \n", "1 | \n", "rs79373928 | \n", "801536 | \n", "G | \n", "T | \n", "155775 | \n", "0.014890 | \n", "1.039910 | \n", "0.966660 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "1.015493 | \n", "1.039910 | \n", "0.991077 | \n", "1.064326 | \n", "0.102502 | \n", "1.015493 | \n", "0.230932 | \n", "1.039910 | \n", "0.128050 | \n", "
4 | \n", "1 | \n", "rs11240779 | \n", "808631 | \n", "G | \n", "A | \n", "154651 | \n", "0.225226 | \n", "0.649663 | \n", "0.966660 | \n", "0.323705 | \n", "... | \n", "1.039910 | \n", "1.015493 | \n", "0.025025 | \n", "0.991077 | \n", "1.064326 | \n", "0.782647 | \n", "0.794389 | \n", "0.991077 | \n", "0.154599 | \n", "0.256710 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
524827 | \n", "22 | \n", "rs73174435 | \n", "51174939 | \n", "T | \n", "C | \n", "155675 | \n", "0.053374 | \n", "0.007886 | \n", "0.037670 | \n", "1.015493 | \n", "... | \n", "0.178205 | \n", "0.854147 | \n", "0.203594 | \n", "0.991077 | \n", "1.064326 | \n", "0.573911 | \n", "1.015493 | \n", "0.146576 | \n", "0.722465 | \n", "1.015493 | \n", "
524828 | \n", "22 | \n", "rs3810648 | \n", "51175626 | \n", "G | \n", "A | \n", "154836 | \n", "0.060984 | \n", "1.039910 | \n", "0.966660 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "0.224030 | \n", "1.039910 | \n", "0.991077 | \n", "0.002261 | \n", "1.039910 | \n", "0.056953 | \n", "0.389743 | \n", "1.039910 | \n", "0.930820 | \n", "
524829 | \n", "22 | \n", "rs5771002 | \n", "51183255 | \n", "A | \n", "G | \n", "153451 | \n", "0.334621 | \n", "1.039910 | \n", "0.966660 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "1.015493 | \n", "0.043223 | \n", "0.991077 | \n", "1.064326 | \n", "1.039910 | \n", "1.015493 | \n", "0.991077 | \n", "1.039910 | \n", "0.395818 | \n", "
524830 | \n", "22 | \n", "rs3865764 | \n", "51185848 | \n", "G | \n", "A | \n", "155442 | \n", "0.050797 | \n", "0.299359 | \n", "0.966660 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "1.015493 | \n", "1.039910 | \n", "0.991077 | \n", "1.064326 | \n", "0.280792 | \n", "0.584575 | \n", "0.844995 | \n", "1.039910 | \n", "0.281122 | \n", "
524831 | \n", "22 | \n", "rs142680588 | \n", "51193629 | \n", "G | \n", "A | \n", "155653 | \n", "0.075331 | \n", "0.028412 | \n", "0.513042 | \n", "1.015493 | \n", "... | \n", "1.039910 | \n", "0.699220 | \n", "1.039910 | \n", "0.361852 | \n", "1.064326 | \n", "0.288267 | \n", "0.549154 | \n", "0.991077 | \n", "0.287155 | \n", "1.015493 | \n", "
524832 rows × 507 columns
\n", "