{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Reproducing the subsampling analysis" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All required data files are present.\n" ] } ], "source": [ "import sys\n", "sys.path.append('../')\n", "\n", "from kgwas import KGWAS_Data\n", "from kgwas.eval_utils import get_clumps_gold_label, get_meta_clumps, get_mega_clump_query, get_curve\n", "from tqdm import tqdm\n", "import pandas as pd\n", "import numpy as np\n", "\n", "data_path = '/dfs/project/datasets/20220524-ukbiobank/data/kgwas_data/'\n", "data = KGWAS_Data(data_path = data_path)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['body_BALDING1',\n", " 'disease_ALLERGY_ECZEMA_DIAGNOSED',\n", " 'disease_HYPOTHYROIDISM_SELF_REP',\n", " 'pigment_SUNBURN',\n", " '21001',\n", " '50',\n", " '30080',\n", " '30070',\n", " '30010',\n", " '30000',\n", " 'biochemistry_AlkalinePhosphatase',\n", " 'biochemistry_AspartateAminotransferase',\n", " 'biochemistry_Cholesterol',\n", " 'biochemistry_Creatinine',\n", " 'biochemistry_IGF1',\n", " 'biochemistry_Phosphate',\n", " 'biochemistry_Testosterone_Male',\n", " 'biochemistry_TotalBilirubin',\n", " 'biochemistry_TotalProtein',\n", " 'biochemistry_VitaminD',\n", " 'bmd_HEEL_TSCOREz']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.get_pheno_list()['21_indep_traits']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ID | \n", "P_seed1 | \n", "P_seed2 | \n", "P_seed3 | \n", "P_seed4 | \n", "P_seed5 | \n", "
---|---|---|---|---|---|---|
0 | \n", "rs3131962 | \n", "0.769574 | \n", "0.317521 | \n", "0.179855 | \n", "0.524405 | \n", "0.449508 | \n", "
1 | \n", "rs12562034 | \n", "0.096443 | \n", "0.662786 | \n", "0.863633 | \n", "0.711095 | \n", "0.101351 | \n", "
2 | \n", "rs4040617 | \n", "0.700440 | \n", "0.289914 | \n", "0.230769 | \n", "0.455915 | \n", "0.471616 | \n", "
3 | \n", "rs79373928 | \n", "0.552305 | \n", "0.477996 | \n", "0.006953 | \n", "0.169693 | \n", "0.749829 | \n", "
4 | \n", "rs11240779 | \n", "0.810017 | \n", "0.556670 | \n", "0.291907 | \n", "0.833291 | \n", "0.223999 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
536541 | \n", "rs73174435 | \n", "0.312853 | \n", "0.730114 | \n", "0.870168 | \n", "0.409637 | \n", "0.345601 | \n", "
536542 | \n", "rs3810648 | \n", "0.623566 | \n", "0.984208 | \n", "0.461422 | \n", "0.711653 | \n", "0.931965 | \n", "
536543 | \n", "rs5771002 | \n", "0.131033 | \n", "0.890363 | \n", "0.700539 | \n", "0.513670 | \n", "0.429219 | \n", "
536544 | \n", "rs3865764 | \n", "0.831915 | \n", "0.794813 | \n", "0.914937 | \n", "0.659038 | \n", "0.814384 | \n", "
536545 | \n", "rs142680588 | \n", "0.397430 | \n", "0.324668 | \n", "0.326413 | \n", "0.435458 | \n", "0.370817 | \n", "
536546 rows × 6 columns
\n", "\n", " | CHR | \n", "SNP | \n", "POS | \n", "A1 | \n", "A2 | \n", "N | \n", "AF1 | \n", "BETA | \n", "SE | \n", "P | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "rs3131962 | \n", "756604 | \n", "A | \n", "G | \n", "407023 | \n", "0.129655 | \n", "0.000286 | \n", "0.001048 | \n", "0.784760 | \n", "
1 | \n", "1 | \n", "rs12562034 | \n", "768448 | \n", "A | \n", "G | \n", "407057 | \n", "0.104966 | \n", "-0.001491 | \n", "0.001147 | \n", "0.193592 | \n", "
2 | \n", "1 | \n", "rs4040617 | \n", "779322 | \n", "G | \n", "A | \n", "406623 | \n", "0.127520 | \n", "0.000108 | \n", "0.001056 | \n", "0.918404 | \n", "
3 | \n", "1 | \n", "rs79373928 | \n", "801536 | \n", "G | \n", "T | \n", "407517 | \n", "0.014884 | \n", "0.004382 | \n", "0.002904 | \n", "0.131349 | \n", "
4 | \n", "1 | \n", "rs11240779 | \n", "808631 | \n", "G | \n", "A | \n", "404493 | \n", "0.224886 | \n", "-0.001155 | \n", "0.000846 | \n", "0.172345 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
542753 | \n", "22 | \n", "rs73174435 | \n", "51174939 | \n", "T | \n", "C | \n", "407201 | \n", "0.053846 | \n", "-0.001980 | \n", "0.001559 | \n", "0.203959 | \n", "
542754 | \n", "22 | \n", "rs3810648 | \n", "51175626 | \n", "G | \n", "A | \n", "404901 | \n", "0.060979 | \n", "0.001922 | \n", "0.001474 | \n", "0.192116 | \n", "
542755 | \n", "22 | \n", "rs5771002 | \n", "51183255 | \n", "A | \n", "G | \n", "401398 | \n", "0.333603 | \n", "-0.000165 | \n", "0.000751 | \n", "0.826494 | \n", "
542756 | \n", "22 | \n", "rs3865764 | \n", "51185848 | \n", "G | \n", "A | \n", "406611 | \n", "0.050601 | \n", "-0.001311 | \n", "0.001605 | \n", "0.413994 | \n", "
542757 | \n", "22 | \n", "rs142680588 | \n", "51193629 | \n", "G | \n", "A | \n", "407108 | \n", "0.075912 | \n", "-0.002861 | \n", "0.001329 | \n", "0.031362 | \n", "
542758 rows × 10 columns
\n", "\n", " | Method | \n", "Threshold | \n", "seed | \n", "# of hits | \n", "sample size | \n", "
---|---|---|---|---|---|
0 | \n", "FINDOR | \n", "5e-08 | \n", "1 | \n", "2 | \n", "1000 | \n", "
1 | \n", "FINDOR | \n", "5e-08 | \n", "2 | \n", "1 | \n", "1000 | \n", "
2 | \n", "FINDOR | \n", "5e-08 | \n", "3 | \n", "3 | \n", "1000 | \n", "
3 | \n", "FINDOR | \n", "5e-08 | \n", "4 | \n", "4 | \n", "1000 | \n", "
4 | \n", "FINDOR | \n", "5e-08 | \n", "5 | \n", "2 | \n", "1000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
10 | \n", "KGWAS | \n", "5e-08 | \n", "1 | \n", "73 | \n", "10000 | \n", "
11 | \n", "KGWAS | \n", "5e-08 | \n", "2 | \n", "78 | \n", "10000 | \n", "
12 | \n", "KGWAS | \n", "5e-08 | \n", "3 | \n", "70 | \n", "10000 | \n", "
13 | \n", "KGWAS | \n", "5e-08 | \n", "4 | \n", "73 | \n", "10000 | \n", "
14 | \n", "KGWAS | \n", "5e-08 | \n", "5 | \n", "84 | \n", "10000 | \n", "
75 rows × 5 columns
\n", "Method | \n", "index | \n", "Relative Improvement (%) | \n", "Relative Improvement (%) FINDOR | \n", "Relative Improvement (%) GWAS | \n", "
---|---|---|---|---|
Sampling Ratio | \n", "\n", " | \n", " | \n", " | \n", " |
1000 | \n", "0.0 | \n", "75.035979 | \n", "20.851836 | \n", "0.0 | \n", "
10000 | \n", "1.0 | \n", "39.707777 | \n", "23.516556 | \n", "0.0 | \n", "
2500 | \n", "2.0 | \n", "79.700424 | \n", "37.739636 | \n", "0.0 | \n", "
5000 | \n", "3.0 | \n", "48.805510 | \n", "22.503181 | \n", "0.0 | \n", "
7500 | \n", "4.0 | \n", "41.634301 | \n", "23.718663 | \n", "0.0 | \n", "